3.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 4 Jun 2014 23:50:24 +0000 (16:50 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 4 Jun 2014 23:50:24 +0000 (16:50 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jun 2014 23:50:24 +0000 (16:50 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jun 2014 23:50:24 +0000 (16:50 -0700)
diff --git a/queue-3.4/dm-bufio-avoid-a-possible-__vmalloc-deadlock.patch b/queue-3.4/dm-bufio-avoid-a-possible-__vmalloc-deadlock.patch

new file mode 100644 (file)

index 0000000..c12f760
--- /dev/null
+++ b/queue-3.4/dm-bufio-avoid-a-possible-__vmalloc-deadlock.patch
@@ -0,0 +1,83 @@
+From 4725f1715429f75fa5f053dbe05575d08aeb5967 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Fri, 10 May 2013 14:37:15 +0100
+Subject: dm bufio: avoid a possible __vmalloc deadlock
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 502624bdad3dba45dfaacaf36b7d83e39e74b2d2 upstream.
+
+This patch uses memalloc_noio_save to avoid a possible deadlock in
+dm-bufio.  (it could happen only with large block size, at most
+PAGE_SIZE << MAX_ORDER (typically 8MiB).
+
+__vmalloc doesn't fully respect gfp flags. The specified gfp flags are
+used for allocation of requested pages, structures vmap_area, vmap_block
+and vm_struct and the radix tree nodes.
+
+However, the kernel pagetables are allocated always with GFP_KERNEL.
+Thus the allocation of pagetables can recurse back to the I/O layer and
+cause a deadlock.
+
+This patch uses the function memalloc_noio_save to set per-process
+PF_MEMALLOC_NOIO flag and the function memalloc_noio_restore to restore
+it. When this flag is set, all allocations in the process are done with
+implied GFP_NOIO flag, thus the deadlock can't happen.
+
+This should be backported to stable kernels, but they don't have the
+PF_MEMALLOC_NOIO flag and memalloc_noio_save/memalloc_noio_restore
+functions. So, PF_MEMALLOC should be set and restored instead.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Alasdair G Kergon <agk@redhat.com>
+[bwh: Backported to 3.2 as recommended]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-bufio.c |   26 +++++++++++++++++++++++++-
+ 1 file changed, 25 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-bufio.c
++++ b/drivers/md/dm-bufio.c
+@@ -321,6 +321,9 @@ static void __cache_size_refresh(void)
+ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
+                              enum data_mode *data_mode)
+ {
++      unsigned noio_flag;
++      void *ptr;
++
+       if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
+               *data_mode = DATA_MODE_SLAB;
+               return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
+@@ -334,7 +337,28 @@ static void *alloc_buffer_data(struct dm
+       }
+ 
+       *data_mode = DATA_MODE_VMALLOC;
+-      return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
++
++      /*
++       * __vmalloc allocates the data pages and auxiliary structures with
++       * gfp_flags that were specified, but pagetables are always allocated
++       * with GFP_KERNEL, no matter what was specified as gfp_mask.
++       *
++       * Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that
++       * all allocations done by this process (including pagetables) are done
++       * as if GFP_NOIO was specified.
++       */
++
++      if (gfp_mask & __GFP_NORETRY) {
++              noio_flag = current->flags & PF_MEMALLOC;
++              current->flags |= PF_MEMALLOC;
++      }
++
++      ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
++
++      if (gfp_mask & __GFP_NORETRY)
++              current->flags = (current->flags & ~PF_MEMALLOC) | noio_flag;
++
++      return ptr;
+ }
+ 
+ /*
diff --git a/queue-3.4/dm-mpath-fix-race-condition-between-multipath_dtr-and-pg_init_done.patch b/queue-3.4/dm-mpath-fix-race-condition-between-multipath_dtr-and-pg_init_done.patch

new file mode 100644 (file)

index 0000000..aa42088
--- /dev/null
+++ b/queue-3.4/dm-mpath-fix-race-condition-between-multipath_dtr-and-pg_init_done.patch
@@ -0,0 +1,110 @@
+From 954a73d5d3073df2231820c718fdd2f18b0fe4c9 Mon Sep 17 00:00:00 2001
+From: Shiva Krishna Merla <shivakrishna.merla@netapp.com>
+Date: Wed, 30 Oct 2013 03:26:38 +0000
+Subject: dm mpath: fix race condition between multipath_dtr and pg_init_done
+
+From: Shiva Krishna Merla <shivakrishna.merla@netapp.com>
+
+commit 954a73d5d3073df2231820c718fdd2f18b0fe4c9 upstream.
+
+Whenever multipath_dtr() is happening we must prevent queueing any
+further path activation work.  Implement this by adding a new
+'pg_init_disabled' flag to the multipath structure that denotes future
+path activation work should be skipped if it is set.  By disabling
+pg_init and then re-enabling in flush_multipath_work() we also avoid the
+potential for pg_init to be initiated while suspending an mpath device.
+
+Without this patch a race condition exists that may result in a kernel
+panic:
+
+1) If after pg_init_done() decrements pg_init_in_progress to 0, a call
+   to wait_for_pg_init_completion() assumes there are no more pending path
+   management commands.
+2) If pg_init_required is set by pg_init_done(), due to retryable
+   mode_select errors, then process_queued_ios() will again queue the
+   path activation work.
+3) If free_multipath() completes before activate_path() work is called a
+   NULL pointer dereference like the following can be seen when
+   accessing members of the recently destructed multipath:
+
+BUG: unable to handle kernel NULL pointer dereference at 0000000000000090
+RIP: 0010:[<ffffffffa003db1b>]  [<ffffffffa003db1b>] activate_path+0x1b/0x30 [dm_multipath]
+[<ffffffff81090ac0>] worker_thread+0x170/0x2a0
+[<ffffffff81096c80>] ? autoremove_wake_function+0x0/0x40
+
+[switch to disabling pg_init in flush_multipath_work & header edits by Mike Snitzer]
+Signed-off-by: Shiva Krishna Merla <shivakrishna.merla@netapp.com>
+Reviewed-by: Krishnasamy Somasundaram <somasundaram.krishnasamy@netapp.com>
+Tested-by: Speagle Andy <Andy.Speagle@netapp.com>
+Acked-by: Junichi Nomura <j-nomura@ce.jp.nec.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+[bwh: Backported to 3.2:
+ - Adjust context
+ - Bump version to 1.3.2 not 1.6.0]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+[xr: Backported to 3.4: Adjust context]
+Signed-off-by: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-mpath.c |   18 +++++++++++++++---
+ 1 file changed, 15 insertions(+), 3 deletions(-)
+
+--- a/drivers/md/dm-mpath.c
++++ b/drivers/md/dm-mpath.c
+@@ -84,6 +84,7 @@ struct multipath {
+       unsigned queue_io;              /* Must we queue all I/O? */
+       unsigned queue_if_no_path;      /* Queue I/O if last path fails? */
+       unsigned saved_queue_if_no_path;/* Saved state during suspension */
++      unsigned pg_init_disabled:1;    /* pg_init is not currently allowed */
+       unsigned pg_init_retries;       /* Number of times to retry pg_init */
+       unsigned pg_init_count;         /* Number of times pg_init called */
+       unsigned pg_init_delay_msecs;   /* Number of msecs before pg_init retry */
+@@ -493,7 +494,8 @@ static void process_queued_ios(struct wo
+           (!pgpath && !m->queue_if_no_path))
+               must_queue = 0;
+ 
+-      if (m->pg_init_required && !m->pg_init_in_progress && pgpath)
++      if (m->pg_init_required && !m->pg_init_in_progress && pgpath &&
++          !m->pg_init_disabled)
+               __pg_init_all_paths(m);
+ 
+ out:
+@@ -907,10 +909,20 @@ static void multipath_wait_for_pg_init_c
+ 
+ static void flush_multipath_work(struct multipath *m)
+ {
++      unsigned long flags;
++
++      spin_lock_irqsave(&m->lock, flags);
++      m->pg_init_disabled = 1;
++      spin_unlock_irqrestore(&m->lock, flags);
++
+       flush_workqueue(kmpath_handlerd);
+       multipath_wait_for_pg_init_completion(m);
+       flush_workqueue(kmultipathd);
+       flush_work_sync(&m->trigger_event);
++
++      spin_lock_irqsave(&m->lock, flags);
++      m->pg_init_disabled = 0;
++      spin_unlock_irqrestore(&m->lock, flags);
+ }
+ 
+ static void multipath_dtr(struct dm_target *ti)
+@@ -1129,7 +1141,7 @@ static int pg_init_limit_reached(struct
+ 
+       spin_lock_irqsave(&m->lock, flags);
+ 
+-      if (m->pg_init_count <= m->pg_init_retries)
++      if (m->pg_init_count <= m->pg_init_retries && !m->pg_init_disabled)
+               m->pg_init_required = 1;
+       else
+               limit_reached = 1;
+@@ -1644,7 +1656,7 @@ out:
+  *---------------------------------------------------------------*/
+ static struct target_type multipath_target = {
+       .name = "multipath",
+-      .version = {1, 3, 0},
++      .version = {1, 3, 2},
+       .module = THIS_MODULE,
+       .ctr = multipath_ctr,
+       .dtr = multipath_dtr,
diff --git a/queue-3.4/dm-snapshot-add-missing-module-aliases.patch b/queue-3.4/dm-snapshot-add-missing-module-aliases.patch

new file mode 100644 (file)

index 0000000..bef7c72
--- /dev/null
+++ b/queue-3.4/dm-snapshot-add-missing-module-aliases.patch
@@ -0,0 +1,33 @@
+From e0179c31e0818d5efc87a475f2b83979ef2d3585 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Fri, 1 Mar 2013 22:45:47 +0000
+Subject: dm snapshot: add missing module aliases
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 23cb21092eb9dcec9d3604b68d95192b79915890 upstream.
+
+Add module aliases so that autoloading works correctly if the user
+tries to activate "snapshot-origin" or "snapshot-merge" targets.
+
+Reference: https://bugzilla.redhat.com/889973
+
+Reported-by: Chao Yang <chyang@redhat.com>
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Alasdair G Kergon <agk@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/dm-snap.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/dm-snap.c
++++ b/drivers/md/dm-snap.c
+@@ -2323,3 +2323,5 @@ module_exit(dm_snapshot_exit);
+ MODULE_DESCRIPTION(DM_NAME " snapshot target");
+ MODULE_AUTHOR("Joe Thornber");
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS("dm-snapshot-origin");
++MODULE_ALIAS("dm-snapshot-merge");
diff --git a/queue-3.4/dm-snapshot-avoid-snapshot-space-leak-on-crash.patch b/queue-3.4/dm-snapshot-avoid-snapshot-space-leak-on-crash.patch

new file mode 100644 (file)

index 0000000..1fd7fa0
--- /dev/null
+++ b/queue-3.4/dm-snapshot-avoid-snapshot-space-leak-on-crash.patch
@@ -0,0 +1,195 @@
+From 230c83afdd9cd384348475bea1e14b80b3b6b1b8 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Fri, 29 Nov 2013 18:13:37 -0500
+Subject: dm snapshot: avoid snapshot space leak on crash
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 230c83afdd9cd384348475bea1e14b80b3b6b1b8 upstream.
+
+There is a possible leak of snapshot space in case of crash.
+
+The reason for space leaking is that chunks in the snapshot device are
+allocated sequentially, but they are finished (and stored in the metadata)
+out of order, depending on the order in which copying finished.
+
+For example, supposed that the metadata contains the following records
+SUPERBLOCK
+METADATA (blocks 0 ... 250)
+DATA 0
+DATA 1
+DATA 2
+...
+DATA 250
+
+Now suppose that you allocate 10 new data blocks 251-260. Suppose that
+copying of these blocks finish out of order (block 260 finished first
+and the block 251 finished last). Now, the snapshot device looks like
+this:
+SUPERBLOCK
+METADATA (blocks 0 ... 250, 260, 259, 258, 257, 256)
+DATA 0
+DATA 1
+DATA 2
+...
+DATA 250
+DATA 251
+DATA 252
+DATA 253
+DATA 254
+DATA 255
+METADATA (blocks 255, 254, 253, 252, 251)
+DATA 256
+DATA 257
+DATA 258
+DATA 259
+DATA 260
+
+Now, if the machine crashes after writing the first metadata block but
+before writing the second metadata block, the space for areas DATA 250-255
+is leaked, it contains no valid data and it will never be used in the
+future.
+
+This patch makes dm-snapshot complete exceptions in the same order they
+were allocated, thus fixing this bug.
+
+Note: when backporting this patch to the stable kernel, change the version
+field in the following way:
+* if version in the stable kernel is {1, 11, 1}, change it to {1, 12, 0}
+* if version in the stable kernel is {1, 10, 0} or {1, 10, 1}, change it
+  to {1, 10, 2}
+Userspace reads the version to determine if the bug was fixed, so the
+version change is needed.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+[xr: Backported to 3.4: adjust version]
+Signed-off-by: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-snap.c |   71 +++++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 64 insertions(+), 7 deletions(-)
+
+--- a/drivers/md/dm-snap.c
++++ b/drivers/md/dm-snap.c
+@@ -66,6 +66,18 @@ struct dm_snapshot {
+ 
+       atomic_t pending_exceptions_count;
+ 
++      /* Protected by "lock" */
++      sector_t exception_start_sequence;
++
++      /* Protected by kcopyd single-threaded callback */
++      sector_t exception_complete_sequence;
++
++      /*
++       * A list of pending exceptions that completed out of order.
++       * Protected by kcopyd single-threaded callback.
++       */
++      struct list_head out_of_order_list;
++
+       mempool_t *pending_pool;
+ 
+       struct dm_exception_table pending;
+@@ -171,6 +183,14 @@ struct dm_snap_pending_exception {
+        */
+       int started;
+ 
++      /* There was copying error. */
++      int copy_error;
++
++      /* A sequence number, it is used for in-order completion. */
++      sector_t exception_sequence;
++
++      struct list_head out_of_order_entry;
++
+       /*
+        * For writing a complete chunk, bypassing the copy.
+        */
+@@ -1090,6 +1110,9 @@ static int snapshot_ctr(struct dm_target
+       s->valid = 1;
+       s->active = 0;
+       atomic_set(&s->pending_exceptions_count, 0);
++      s->exception_start_sequence = 0;
++      s->exception_complete_sequence = 0;
++      INIT_LIST_HEAD(&s->out_of_order_list);
+       init_rwsem(&s->lock);
+       INIT_LIST_HEAD(&s->list);
+       spin_lock_init(&s->pe_lock);
+@@ -1448,6 +1471,19 @@ static void commit_callback(void *contex
+       pending_complete(pe, success);
+ }
+ 
++static void complete_exception(struct dm_snap_pending_exception *pe)
++{
++      struct dm_snapshot *s = pe->snap;
++
++      if (unlikely(pe->copy_error))
++              pending_complete(pe, 0);
++
++      else
++              /* Update the metadata if we are persistent */
++              s->store->type->commit_exception(s->store, &pe->e,
++                                               commit_callback, pe);
++}
++
+ /*
+  * Called when the copy I/O has finished.  kcopyd actually runs
+  * this code so don't block.
+@@ -1457,13 +1493,32 @@ static void copy_callback(int read_err,
+       struct dm_snap_pending_exception *pe = context;
+       struct dm_snapshot *s = pe->snap;
+ 
+-      if (read_err || write_err)
+-              pending_complete(pe, 0);
++      pe->copy_error = read_err || write_err;
+ 
+-      else
+-              /* Update the metadata if we are persistent */
+-              s->store->type->commit_exception(s->store, &pe->e,
+-                                               commit_callback, pe);
++      if (pe->exception_sequence == s->exception_complete_sequence) {
++              s->exception_complete_sequence++;
++              complete_exception(pe);
++
++              while (!list_empty(&s->out_of_order_list)) {
++                      pe = list_entry(s->out_of_order_list.next,
++                                      struct dm_snap_pending_exception, out_of_order_entry);
++                      if (pe->exception_sequence != s->exception_complete_sequence)
++                              break;
++                      s->exception_complete_sequence++;
++                      list_del(&pe->out_of_order_entry);
++                      complete_exception(pe);
++              }
++      } else {
++              struct list_head *lh;
++              struct dm_snap_pending_exception *pe2;
++
++              list_for_each_prev(lh, &s->out_of_order_list) {
++                      pe2 = list_entry(lh, struct dm_snap_pending_exception, out_of_order_entry);
++                      if (pe2->exception_sequence < pe->exception_sequence)
++                              break;
++              }
++              list_add(&pe->out_of_order_entry, lh);
++      }
+ }
+ 
+ /*
+@@ -1558,6 +1613,8 @@ __find_pending_exception(struct dm_snaps
+               return NULL;
+       }
+ 
++      pe->exception_sequence = s->exception_start_sequence++;
++
+       dm_insert_exception(&s->pending, &pe->e);
+ 
+       return pe;
+@@ -2200,7 +2257,7 @@ static struct target_type origin_target
+ 
+ static struct target_type snapshot_target = {
+       .name    = "snapshot",
+-      .version = {1, 10, 0},
++      .version = {1, 10, 2},
+       .module  = THIS_MODULE,
+       .ctr     = snapshot_ctr,
+       .dtr     = snapshot_dtr,
diff --git a/queue-3.4/dm-thin-fix-discard-corruption.patch b/queue-3.4/dm-thin-fix-discard-corruption.patch

new file mode 100644 (file)

index 0000000..7fe9ae4
--- /dev/null
+++ b/queue-3.4/dm-thin-fix-discard-corruption.patch
@@ -0,0 +1,195 @@
+From f046f89a99ccfd9408b94c653374ff3065c7edb3 Mon Sep 17 00:00:00 2001
+From: Joe Thornber <ejt@redhat.com>
+Date: Wed, 20 Mar 2013 17:21:24 +0000
+Subject: dm thin: fix discard corruption
+
+From: Joe Thornber <ejt@redhat.com>
+
+commit f046f89a99ccfd9408b94c653374ff3065c7edb3 upstream.
+
+Fix a bug in dm_btree_remove that could leave leaf values with incorrect
+reference counts.  The effect of this was that removal of a shared block
+could result in the space maps thinking the block was no longer used.
+More concretely, if you have a thin device and a snapshot of it, sending
+a discard to a shared region of the thin could corrupt the snapshot.
+
+Thinp uses a 2-level nested btree to store it's mappings.  This first
+level is indexed by thin device, and the second level by logical
+block.
+
+Often when we're removing an entry in this mapping tree we need to
+rebalance nodes, which can involve shadowing them, possibly creating a
+copy if the block is shared.  If we do create a copy then children of
+that node need to have their reference counts incremented.  In this
+way reference counts percolate down the tree as shared trees diverge.
+
+The rebalance functions were incrementing the children at the
+appropriate time, but they were always assuming the children were
+internal nodes.  This meant the leaf values (in our case packed
+block/flags entries) were not being incremented.
+
+Signed-off-by: Joe Thornber <ejt@redhat.com>
+Signed-off-by: Alasdair G Kergon <agk@redhat.com>
+[bwh: Backported to 3.2: bump target version numbers from 1.0.1 to 1.0.2]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+[xr: Backported to 3.4: bump target version numbers to 1.1.1]
+Signed-off-by: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-thin.c                         |    4 +-
+ drivers/md/persistent-data/dm-btree-remove.c |   46 ++++++++++++++-------------
+ 2 files changed, 26 insertions(+), 24 deletions(-)
+
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -2472,7 +2472,7 @@ static struct target_type pool_target =
+       .name = "thin-pool",
+       .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
+                   DM_TARGET_IMMUTABLE,
+-      .version = {1, 1, 0},
++      .version = {1, 1, 1},
+       .module = THIS_MODULE,
+       .ctr = pool_ctr,
+       .dtr = pool_dtr,
+@@ -2752,7 +2752,7 @@ static void thin_io_hints(struct dm_targ
+ 
+ static struct target_type thin_target = {
+       .name = "thin",
+-      .version = {1, 1, 0},
++      .version = {1, 1, 1},
+       .module = THIS_MODULE,
+       .ctr = thin_ctr,
+       .dtr = thin_dtr,
+--- a/drivers/md/persistent-data/dm-btree-remove.c
++++ b/drivers/md/persistent-data/dm-btree-remove.c
+@@ -139,15 +139,8 @@ struct child {
+       struct btree_node *n;
+ };
+ 
+-static struct dm_btree_value_type le64_type = {
+-      .context = NULL,
+-      .size = sizeof(__le64),
+-      .inc = NULL,
+-      .dec = NULL,
+-      .equal = NULL
+-};
+-
+-static int init_child(struct dm_btree_info *info, struct btree_node *parent,
++static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
++                    struct btree_node *parent,
+                     unsigned index, struct child *result)
+ {
+       int r, inc;
+@@ -164,7 +157,7 @@ static int init_child(struct dm_btree_in
+       result->n = dm_block_data(result->block);
+ 
+       if (inc)
+-              inc_children(info->tm, result->n, &le64_type);
++              inc_children(info->tm, result->n, vt);
+ 
+       *((__le64 *) value_ptr(parent, index)) =
+               cpu_to_le64(dm_block_location(result->block));
+@@ -236,7 +229,7 @@ static void __rebalance2(struct dm_btree
+ }
+ 
+ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
+-                    unsigned left_index)
++                    struct dm_btree_value_type *vt, unsigned left_index)
+ {
+       int r;
+       struct btree_node *parent;
+@@ -244,11 +237,11 @@ static int rebalance2(struct shadow_spin
+ 
+       parent = dm_block_data(shadow_current(s));
+ 
+-      r = init_child(info, parent, left_index, &left);
++      r = init_child(info, vt, parent, left_index, &left);
+       if (r)
+               return r;
+ 
+-      r = init_child(info, parent, left_index + 1, &right);
++      r = init_child(info, vt, parent, left_index + 1, &right);
+       if (r) {
+               exit_child(info, &left);
+               return r;
+@@ -368,7 +361,7 @@ static void __rebalance3(struct dm_btree
+ }
+ 
+ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
+-                    unsigned left_index)
++                    struct dm_btree_value_type *vt, unsigned left_index)
+ {
+       int r;
+       struct btree_node *parent = dm_block_data(shadow_current(s));
+@@ -377,17 +370,17 @@ static int rebalance3(struct shadow_spin
+       /*
+        * FIXME: fill out an array?
+        */
+-      r = init_child(info, parent, left_index, &left);
++      r = init_child(info, vt, parent, left_index, &left);
+       if (r)
+               return r;
+ 
+-      r = init_child(info, parent, left_index + 1, &center);
++      r = init_child(info, vt, parent, left_index + 1, &center);
+       if (r) {
+               exit_child(info, &left);
+               return r;
+       }
+ 
+-      r = init_child(info, parent, left_index + 2, &right);
++      r = init_child(info, vt, parent, left_index + 2, &right);
+       if (r) {
+               exit_child(info, &left);
+               exit_child(info, &center);
+@@ -434,7 +427,8 @@ static int get_nr_entries(struct dm_tran
+ }
+ 
+ static int rebalance_children(struct shadow_spine *s,
+-                            struct dm_btree_info *info, uint64_t key)
++                            struct dm_btree_info *info,
++                            struct dm_btree_value_type *vt, uint64_t key)
+ {
+       int i, r, has_left_sibling, has_right_sibling;
+       uint32_t child_entries;
+@@ -472,13 +466,13 @@ static int rebalance_children(struct sha
+       has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
+ 
+       if (!has_left_sibling)
+-              r = rebalance2(s, info, i);
++              r = rebalance2(s, info, vt, i);
+ 
+       else if (!has_right_sibling)
+-              r = rebalance2(s, info, i - 1);
++              r = rebalance2(s, info, vt, i - 1);
+ 
+       else
+-              r = rebalance3(s, info, i - 1);
++              r = rebalance3(s, info, vt, i - 1);
+ 
+       return r;
+ }
+@@ -529,7 +523,7 @@ static int remove_raw(struct shadow_spin
+               if (le32_to_cpu(n->header.flags) & LEAF_NODE)
+                       return do_leaf(n, key, index);
+ 
+-              r = rebalance_children(s, info, key);
++              r = rebalance_children(s, info, vt, key);
+               if (r)
+                       break;
+ 
+@@ -550,6 +544,14 @@ static int remove_raw(struct shadow_spin
+       return r;
+ }
+ 
++static struct dm_btree_value_type le64_type = {
++      .context = NULL,
++      .size = sizeof(__le64),
++      .inc = NULL,
++      .dec = NULL,
++      .equal = NULL
++};
++
+ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
+                   uint64_t *keys, dm_block_t *new_root)
+ {
diff --git a/queue-3.4/md-raid10-fix-enough-function-for-detecting-if-array-is-failed.patch b/queue-3.4/md-raid10-fix-enough-function-for-detecting-if-array-is-failed.patch

new file mode 100644 (file)

index 0000000..e4abea7
--- /dev/null
+++ b/queue-3.4/md-raid10-fix-enough-function-for-detecting-if-array-is-failed.patch
@@ -0,0 +1,57 @@
+From 4e19de3be14c9390e63271effb5b95ab50f298f4 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Thu, 27 Sep 2012 12:35:21 +1000
+Subject: md/raid10: fix "enough" function for detecting if array is failed.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: NeilBrown <neilb@suse.de>
+
+commit 80b4812407c6b1f66a4f2430e69747a13f010839 upstream.
+
+The 'enough' function is written to work with 'near' arrays only
+in that is implicitly assumes that the offset from one 'group' of
+devices to the next is the same as the number of copies.
+In reality it is the number of 'near' copies.
+
+So change it to make this number explicit.
+
+This bug makes it possible to run arrays without enough drives
+present, which is dangerous.
+It is appropriate for an -stable kernel, but will almost certainly
+need to be modified for some of them.
+
+Reported-by: Jakub Husák <jakub@gooseman.cz>
+Signed-off-by: NeilBrown <neilb@suse.de>
+[bwh: Backported to 3.2: s/geo->/conf->/]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid10.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1419,14 +1419,16 @@ static int enough(struct r10conf *conf,
+       do {
+               int n = conf->copies;
+               int cnt = 0;
++              int this = first;
+               while (n--) {
+-                      if (conf->mirrors[first].rdev &&
+-                          first != ignore)
++                      if (conf->mirrors[this].rdev &&
++                          this != ignore)
+                               cnt++;
+-                      first = (first+1) % conf->raid_disks;
++                      this = (this+1) % conf->raid_disks;
+               }
+               if (cnt == 0)
+                       return 0;
++              first = (first + conf->near_copies) % conf->raid_disks;
+       } while (first != 0);
+       return 1;
+ }
diff --git a/queue-3.4/nfs-nfs_getaclargs.acl_len-is-a-size_t.patch b/queue-3.4/nfs-nfs_getaclargs.acl_len-is-a-size_t.patch

new file mode 100644 (file)

index 0000000..387d4cc
--- /dev/null
+++ b/queue-3.4/nfs-nfs_getaclargs.acl_len-is-a-size_t.patch
@@ -0,0 +1,45 @@
+From 1bf2642f4cb2a773cd1d41b9558acf5af81738b7 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 11 Jul 2012 16:30:32 -0400
+Subject: NFS: nfs_getaclargs.acl_len is a size_t
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+commit 56d08fef2369d5ca9ad2e1fc697f5379fd8af751 upstream.
+
+Squelch compiler warnings:
+
+fs/nfs/nfs4proc.c: In function ‘__nfs4_get_acl_uncached’:
+fs/nfs/nfs4proc.c:3811:14: warning: comparison between signed and
+       unsigned integer expressions [-Wsign-compare]
+fs/nfs/nfs4proc.c:3818:15: warning: comparison between signed and
+       unsigned integer expressions [-Wsign-compare]
+
+Introduced by commit bf118a34 "NFSv4: include bitmap in nfsv4 get
+acl data", Dec 7, 2011.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4proc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3724,7 +3724,8 @@ static ssize_t __nfs4_get_acl_uncached(s
+               .rpc_argp = &args,
+               .rpc_resp = &res,
+       };
+-      int ret = -ENOMEM, npages, i, acl_len = 0;
++      int ret = -ENOMEM, npages, i;
++      size_t acl_len = 0;
+ 
+       npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       /* As long as we're doing a round trip to the server anyway,
diff --git a/queue-3.4/nfsd-don-t-run-get_file-if-nfs4_preprocess_stateid_op-return-error.patch b/queue-3.4/nfsd-don-t-run-get_file-if-nfs4_preprocess_stateid_op-return-error.patch

new file mode 100644 (file)

index 0000000..60f881f
--- /dev/null
+++ b/queue-3.4/nfsd-don-t-run-get_file-if-nfs4_preprocess_stateid_op-return-error.patch
@@ -0,0 +1,44 @@
+From 7e6c247f30c65913688850620e40b418223a5ff3 Mon Sep 17 00:00:00 2001
+From: fanchaoting <fanchaoting@cn.fujitsu.com>
+Date: Mon, 1 Apr 2013 21:07:22 +0800
+Subject: nfsd: don't run get_file if nfs4_preprocess_stateid_op return error
+
+From: fanchaoting <fanchaoting@cn.fujitsu.com>
+
+commit b022032e195ffca83d7002d6b84297d796ed443b upstream.
+
+we should return error status directly when nfs4_preprocess_stateid_op
+return error.
+
+Signed-off-by: fanchaoting <fanchaoting@cn.fujitsu.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+[bwh: Backported to 3.2: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4proc.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -904,14 +904,14 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ 
+       nfs4_lock_state();
+       status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp);
+-      if (filp)
+-              get_file(filp);
+-      nfs4_unlock_state();
+-
+       if (status) {
++              nfs4_unlock_state();
+               dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
+               return status;
+       }
++      if (filp)
++              get_file(filp);
++      nfs4_unlock_state();
+ 
+       cnt = write->wr_buflen;
+       write->wr_how_written = write->wr_stable_how;
diff --git a/queue-3.4/nfsd-nfsd_open-when-dentry_open-returns-an-error-do-not-propagate-as-struct-file.patch b/queue-3.4/nfsd-nfsd_open-when-dentry_open-returns-an-error-do-not-propagate-as-struct-file.patch

new file mode 100644 (file)

index 0000000..f0e20e2
--- /dev/null
+++ b/queue-3.4/nfsd-nfsd_open-when-dentry_open-returns-an-error-do-not-propagate-as-struct-file.patch
@@ -0,0 +1,96 @@
+From e4daf1ffbe6cc3b12aab4d604e627829e93e9914 Mon Sep 17 00:00:00 2001
+From: Harshula Jayasuriya <harshula@redhat.com>
+Date: Tue, 23 Jul 2013 14:21:35 +1000
+Subject: nfsd: nfsd_open: when dentry_open returns an error do not propagate as struct file
+
+From: Harshula Jayasuriya <harshula@redhat.com>
+
+commit e4daf1ffbe6cc3b12aab4d604e627829e93e9914 upstream.
+
+The following call chain:
+------------------------------------------------------------
+nfs4_get_vfs_file
+- nfsd_open
+  - dentry_open
+    - do_dentry_open
+      - __get_file_write_access
+        - get_write_access
+          - return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY;
+------------------------------------------------------------
+
+can result in the following state:
+------------------------------------------------------------
+struct nfs4_file {
+...
+  fi_fds = {0xffff880c1fa65c80, 0xffffffffffffffe6, 0x0},
+  fi_access = {{
+      counter = 0x1
+    }, {
+      counter = 0x0
+    }},
+...
+------------------------------------------------------------
+
+1) First time around, in nfs4_get_vfs_file() fp->fi_fds[O_WRONLY] is
+NULL, hence nfsd_open() is called where we get status set to an error
+and fp->fi_fds[O_WRONLY] to -ETXTBSY. Thus we do not reach
+nfs4_file_get_access() and fi_access[O_WRONLY] is not incremented.
+
+2) Second time around, in nfs4_get_vfs_file() fp->fi_fds[O_WRONLY] is
+NOT NULL (-ETXTBSY), so nfsd_open() is NOT called, but
+nfs4_file_get_access() IS called and fi_access[O_WRONLY] is incremented.
+Thus we leave a landmine in the form of the nfs4_file data structure in
+an incorrect state.
+
+3) Eventually, when __nfs4_file_put_access() is called it finds
+fi_access[O_WRONLY] being non-zero, it decrements it and calls
+nfs4_file_put_fd() which tries to fput -ETXTBSY.
+------------------------------------------------------------
+...
+     [exception RIP: fput+0x9]
+     RIP: ffffffff81177fa9  RSP: ffff88062e365c90  RFLAGS: 00010282
+     RAX: ffff880c2b3d99cc  RBX: ffff880c2b3d9978  RCX: 0000000000000002
+     RDX: dead000000100101  RSI: 0000000000000001  RDI: ffffffffffffffe6
+     RBP: ffff88062e365c90   R8: ffff88041fe797d8   R9: ffff88062e365d58
+     R10: 0000000000000008  R11: 0000000000000000  R12: 0000000000000001
+     R13: 0000000000000007  R14: 0000000000000000  R15: 0000000000000000
+     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
+  #9 [ffff88062e365c98] __nfs4_file_put_access at ffffffffa0562334 [nfsd]
+ #10 [ffff88062e365cc8] nfs4_file_put_access at ffffffffa05623ab [nfsd]
+ #11 [ffff88062e365ce8] free_generic_stateid at ffffffffa056634d [nfsd]
+ #12 [ffff88062e365d18] release_open_stateid at ffffffffa0566e4b [nfsd]
+ #13 [ffff88062e365d38] nfsd4_close at ffffffffa0567401 [nfsd]
+ #14 [ffff88062e365d88] nfsd4_proc_compound at ffffffffa0557f28 [nfsd]
+ #15 [ffff88062e365dd8] nfsd_dispatch at ffffffffa054543e [nfsd]
+ #16 [ffff88062e365e18] svc_process_common at ffffffffa04ba5a4 [sunrpc]
+ #17 [ffff88062e365e98] svc_process at ffffffffa04babe0 [sunrpc]
+ #18 [ffff88062e365eb8] nfsd at ffffffffa0545b62 [nfsd]
+ #19 [ffff88062e365ee8] kthread at ffffffff81090886
+ #20 [ffff88062e365f48] kernel_thread at ffffffff8100c14a
+------------------------------------------------------------
+
+
+Signed-off-by: Harshula Jayasuriya <harshula@redhat.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+[xr: Backported to 3.4: adjust context]
+Signed-off-by: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/vfs.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -828,9 +828,10 @@ nfsd_open(struct svc_rqst *rqstp, struct
+       }
+       *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
+                           flags, current_cred());
+-      if (IS_ERR(*filp))
++      if (IS_ERR(*filp)) {
+               host_err = PTR_ERR(*filp);
+-      else {
++              *filp = NULL;
++      } else {
+               host_err = ima_file_check(*filp, may_flags);
+ 
+               if (may_flags & NFSD_MAY_64BIT_COOKIE)
diff --git a/queue-3.4/nfsd4-fix-xdr-decoding-of-large-non-write-compounds.patch b/queue-3.4/nfsd4-fix-xdr-decoding-of-large-non-write-compounds.patch

new file mode 100644 (file)

index 0000000..cd1558a
--- /dev/null
+++ b/queue-3.4/nfsd4-fix-xdr-decoding-of-large-non-write-compounds.patch
@@ -0,0 +1,43 @@
+From bca06620c941f2427f13710e330adcef1cf30007 Mon Sep 17 00:00:00 2001
+From: "J. Bruce Fields" <bfields@redhat.com>
+Date: Tue, 19 Nov 2013 17:32:43 -0500
+Subject: nfsd4: fix xdr decoding of large non-write compounds
+
+From: "J. Bruce Fields" <bfields@redhat.com>
+
+commit 365da4adebb1c012febf81019ad3dc5bb52e2a13 upstream.
+
+This fixes a regression from 247500820ebd02ad87525db5d9b199e5b66f6636
+"nfsd4: fix decoding of compounds across page boundaries".  The previous
+code was correct: argp->pagelist is initialized in
+nfs4svc_deocde_compoundargs to rqstp->rq_arg.pages, and is therefore a
+pointer to the page *after* the page we are currently decoding.
+
+The reason that patch nevertheless fixed a problem with decoding
+compounds containing write was a bug in the write decoding introduced by
+5a80a54d21c96590d013378d8c5f65f879451ab4 "nfsd4: reorganize write
+decoding", after which write decoding no longer adhered to the rule that
+argp->pagelist point to the next page.
+
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+[bwh: Backported to 3.2: adjust context; there is only one instance to fix]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4xdr.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -161,8 +161,8 @@ static __be32 *read_buf(struct nfsd4_com
+        */
+       memcpy(p, argp->p, avail);
+       /* step to next page */
+-      argp->pagelist++;
+       argp->p = page_address(argp->pagelist[0]);
++      argp->pagelist++;
+       if (argp->pagelen < PAGE_SIZE) {
+               argp->end = argp->p + (argp->pagelen>>2);
+               argp->pagelen = 0;
diff --git a/queue-3.4/nfsv4-wait-on-recovery-for-async-session-errors.patch b/queue-3.4/nfsv4-wait-on-recovery-for-async-session-errors.patch

new file mode 100644 (file)

index 0000000..3bad86b
--- /dev/null
+++ b/queue-3.4/nfsv4-wait-on-recovery-for-async-session-errors.patch
@@ -0,0 +1,78 @@
+From a687a58fd0fa69d405d777eba5f23a0c00cce2e7 Mon Sep 17 00:00:00 2001
+From: Andy Adamson <andros@netapp.com>
+Date: Fri, 15 Nov 2013 16:36:16 -0500
+Subject: NFSv4 wait on recovery for async session errors
+
+From: Andy Adamson <andros@netapp.com>
+
+commit 4a82fd7c4e78a1b7a224f9ae8bb7e1fd95f670e0 upstream.
+
+When the state manager is processing the NFS4CLNT_DELEGRETURN flag, session
+draining is off, but DELEGRETURN can still get a session error.
+The async handler calls nfs4_schedule_session_recovery returns -EAGAIN, and
+the DELEGRETURN done then restarts the RPC task in the prepare state.
+With the state manager still processing the NFS4CLNT_DELEGRETURN flag with
+session draining off, these DELEGRETURNs will cycle with errors filling up the
+session slots.
+
+This prevents OPEN reclaims (from nfs_delegation_claim_opens) required by the
+NFS4CLNT_DELEGRETURN state manager processing from completing, hanging the
+state manager in the __rpc_wait_for_completion_task in nfs4_run_open_task
+as seen in this kernel thread dump:
+
+kernel: 4.12.32.53-ma D 0000000000000000     0  3393      2 0x00000000
+kernel: ffff88013995fb60 0000000000000046 ffff880138cc5400 ffff88013a9df140
+kernel: ffff8800000265c0 ffffffff8116eef0 ffff88013fc10080 0000000300000001
+kernel: ffff88013a4ad058 ffff88013995ffd8 000000000000fbc8 ffff88013a4ad058
+kernel: Call Trace:
+kernel: [<ffffffff8116eef0>] ? cache_alloc_refill+0x1c0/0x240
+kernel: [<ffffffffa0358110>] ? rpc_wait_bit_killable+0x0/0xa0 [sunrpc]
+kernel: [<ffffffffa0358152>] rpc_wait_bit_killable+0x42/0xa0 [sunrpc]
+kernel: [<ffffffff8152914f>] __wait_on_bit+0x5f/0x90
+kernel: [<ffffffffa0358110>] ? rpc_wait_bit_killable+0x0/0xa0 [sunrpc]
+kernel: [<ffffffff815291f8>] out_of_line_wait_on_bit+0x78/0x90
+kernel: [<ffffffff8109b520>] ? wake_bit_function+0x0/0x50
+kernel: [<ffffffffa035810d>] __rpc_wait_for_completion_task+0x2d/0x30 [sunrpc]
+kernel: [<ffffffffa040d44c>] nfs4_run_open_task+0x11c/0x160 [nfs]
+kernel: [<ffffffffa04114e7>] nfs4_open_recover_helper+0x87/0x120 [nfs]
+kernel: [<ffffffffa0411646>] nfs4_open_recover+0xc6/0x150 [nfs]
+kernel: [<ffffffffa040cc6f>] ? nfs4_open_recoverdata_alloc+0x2f/0x60 [nfs]
+kernel: [<ffffffffa0414e1a>] nfs4_open_delegation_recall+0x6a/0xa0 [nfs]
+kernel: [<ffffffffa0424020>] nfs_end_delegation_return+0x120/0x2e0 [nfs]
+kernel: [<ffffffff8109580f>] ? queue_work+0x1f/0x30
+kernel: [<ffffffffa0424347>] nfs_client_return_marked_delegations+0xd7/0x110 [nfs]
+kernel: [<ffffffffa04225d8>] nfs4_run_state_manager+0x548/0x620 [nfs]
+kernel: [<ffffffffa0422090>] ? nfs4_run_state_manager+0x0/0x620 [nfs]
+kernel: [<ffffffff8109b0f6>] kthread+0x96/0xa0
+kernel: [<ffffffff8100c20a>] child_rip+0xa/0x20
+kernel: [<ffffffff8109b060>] ? kthread+0x0/0xa0
+kernel: [<ffffffff8100c200>] ? child_rip+0x0/0x20
+
+The state manager can not therefore process the DELEGRETURN session errors.
+Change the async handler to wait for recovery on session errors.
+
+Signed-off-by: Andy Adamson <andros@netapp.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+[bwh: Backported to 3.2:
+ - Adjust context
+ - There's no restart_call label]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4proc.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3910,8 +3910,7 @@ nfs4_async_handle_error(struct rpc_task
+                       dprintk("%s ERROR %d, Reset session\n", __func__,
+                               task->tk_status);
+                       nfs4_schedule_session_recovery(clp->cl_session);
+-                      task->tk_status = 0;
+-                      return -EAGAIN;
++                      goto wait_on_recovery;
+ #endif /* CONFIG_NFS_V4_1 */
+               case -NFS4ERR_DELAY:
+                       nfs_inc_server_stats(server, NFSIOS_DELAY);
diff --git a/queue-3.4/nfsv4.1-don-t-decode-skipped-layoutgets.patch b/queue-3.4/nfsv4.1-don-t-decode-skipped-layoutgets.patch

new file mode 100644 (file)

index 0000000..97b114c
--- /dev/null
+++ b/queue-3.4/nfsv4.1-don-t-decode-skipped-layoutgets.patch
@@ -0,0 +1,74 @@
+From 3c5add9c302ac2e86a1b99c738fc3d1c06dfc03a Mon Sep 17 00:00:00 2001
+From: Weston Andros Adamson <dros@netapp.com>
+Date: Fri, 15 Feb 2013 16:03:46 -0500
+Subject: NFSv4.1: Don't decode skipped layoutgets
+
+From: Weston Andros Adamson <dros@netapp.com>
+
+commit 085b7a45c63d3da5be155faab9249a5cab224561 upstream.
+
+layoutget's prepare hook can call rpc_exit with status = NFS4_OK (0).
+Because of this, nfs4_proc_layoutget can't depend on a 0 status to mean
+that the RPC was successfully sent, received and parsed.
+
+To fix this, use the result's len member to see if parsing took place.
+
+This fixes the following OOPS -- calling xdr_init_decode() with a buffer length
+0 doesn't set the stream's 'p' member and ends up using uninitialized memory
+in filelayout_decode_layout.
+
+BUG: unable to handle kernel paging request at 0000000000008050
+IP: [<ffffffff81282e78>] memcpy+0x18/0x120
+PGD 0
+Oops: 0000 [#1] SMP
+last sysfs file: /sys/devices/pci0000:00/0000:00:11.0/0000:02:01.0/irq
+CPU 1
+Modules linked in: nfs_layout_nfsv41_files nfs lockd fscache auth_rpcgss nfs_acl autofs4 sunrpc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 dm_mirror dm_region_hash dm_log dm_mod ppdev parport_pc parport snd_ens1371 snd_rawmidi snd_ac97_codec ac97_bus snd_seq snd_seq_device snd_pcm snd_timer snd soundcore snd_page_alloc e1000 microcode vmware_balloon i2c_piix4 i2c_core sg shpchp ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif pata_acpi ata_generic ata_piix mptspi mptscsih mptbase scsi_transport_spi [last unloaded: speedstep_lib]
+
+Pid: 1665, comm: flush-0:22 Not tainted 2.6.32-356-test-2 #2 VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform
+RIP: 0010:[<ffffffff81282e78>]  [<ffffffff81282e78>] memcpy+0x18/0x120
+RSP: 0018:ffff88003dfab588  EFLAGS: 00010206
+RAX: ffff88003dc42000 RBX: ffff88003dfab610 RCX: 0000000000000009
+RDX: 000000003f807ff0 RSI: 0000000000008050 RDI: ffff88003dc42000
+RBP: ffff88003dfab5b0 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000080 R12: 0000000000000024
+R13: ffff88003dc42000 R14: ffff88003f808030 R15: ffff88003dfab6a0
+FS:  0000000000000000(0000) GS:ffff880003420000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
+CR2: 0000000000008050 CR3: 000000003bc92000 CR4: 00000000001407e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+Process flush-0:22 (pid: 1665, threadinfo ffff88003dfaa000, task ffff880037f77540)
+Stack:
+ffffffffa0398ac1 ffff8800397c5940 ffff88003dfab610 ffff88003dfab6a0
+<d> ffff88003dfab5d0 ffff88003dfab680 ffffffffa01c150b ffffea0000d82e70
+<d> 000000508116713b 0000000000000000 0000000000000000 0000000000000000
+Call Trace:
+[<ffffffffa0398ac1>] ? xdr_inline_decode+0xb1/0x120 [sunrpc]
+[<ffffffffa01c150b>] filelayout_decode_layout+0xeb/0x350 [nfs_layout_nfsv41_files]
+[<ffffffffa01c17fc>] filelayout_alloc_lseg+0x8c/0x3c0 [nfs_layout_nfsv41_files]
+[<ffffffff8150e6ce>] ? __wait_on_bit+0x7e/0x90
+
+Signed-off-by: Weston Andros Adamson <dros@netapp.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+[bwh: Backported to 3.2: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4proc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -6084,7 +6084,8 @@ int nfs4_proc_layoutget(struct nfs4_layo
+       status = nfs4_wait_for_completion_rpc_task(task);
+       if (status == 0)
+               status = task->tk_status;
+-      if (status == 0)
++      /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
++      if (status == 0 && lgp->res.layoutp->len)
+               status = pnfs_layout_process(lgp);
+       rpc_put_task(task);
+       dprintk("<-- %s status=%d\n", __func__, status);
diff --git a/queue-3.4/nfsv4.1-fix-a-race-in-pnfs-layoutcommit.patch b/queue-3.4/nfsv4.1-fix-a-race-in-pnfs-layoutcommit.patch

new file mode 100644 (file)

index 0000000..76a1f74
--- /dev/null
+++ b/queue-3.4/nfsv4.1-fix-a-race-in-pnfs-layoutcommit.patch
@@ -0,0 +1,95 @@
+From 9ca2044f9041283bff117bb7e9ba7e9cd644bae0 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Wed, 20 Mar 2013 12:34:32 -0400
+Subject: NFSv4.1: Fix a race in pNFS layoutcommit
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit a073dbff359f4741013ae4b8395f5364c5e00b48 upstream.
+
+We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the
+NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations
+where the two are out of sync.
+The first half of the problem is to ensure that pnfs_layoutcommit_inode
+clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg.
+We still need to keep the reference to those segments until the RPC call
+is finished, so in order to make it clear _where_ those references come
+from, we add a helper pnfs_list_write_lseg_done() that cleans up after
+pnfs_list_write_lseg.
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Acked-by: Benny Halevy <bhalevy@tonian.com>
+[bwh: Backported to 3.2: s/pnfs_put_lseg/put_lseg/]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4proc.c |   14 --------------
+ fs/nfs/pnfs.c     |   19 ++++++++++++++++++-
+ 2 files changed, 18 insertions(+), 15 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -6297,22 +6297,8 @@ nfs4_layoutcommit_done(struct rpc_task *
+ static void nfs4_layoutcommit_release(void *calldata)
+ {
+       struct nfs4_layoutcommit_data *data = calldata;
+-      struct pnfs_layout_segment *lseg, *tmp;
+-      unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
+ 
+       pnfs_cleanup_layoutcommit(data);
+-      /* Matched by references in pnfs_set_layoutcommit */
+-      list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
+-              list_del_init(&lseg->pls_lc_list);
+-              if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
+-                                     &lseg->pls_flags))
+-                      put_lseg(lseg);
+-      }
+-
+-      clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+-      smp_mb__after_clear_bit();
+-      wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+-
+       put_rpccred(data->cred);
+       kfree(data);
+ }
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -1381,11 +1381,27 @@ static void pnfs_list_write_lseg(struct
+ 
+       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
+               if (lseg->pls_range.iomode == IOMODE_RW &&
+-                  test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
++                  test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+                       list_add(&lseg->pls_lc_list, listp);
+       }
+ }
+ 
++static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
++{
++      struct pnfs_layout_segment *lseg, *tmp;
++      unsigned long *bitlock = &NFS_I(inode)->flags;
++
++      /* Matched by references in pnfs_set_layoutcommit */
++      list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
++              list_del_init(&lseg->pls_lc_list);
++              put_lseg(lseg);
++      }
++
++      clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
++      smp_mb__after_clear_bit();
++      wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
++}
++
+ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
+ {
+       if (lseg->pls_range.iomode == IOMODE_RW) {
+@@ -1434,6 +1450,7 @@ void pnfs_cleanup_layoutcommit(struct nf
+ 
+       if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
+               nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
++      pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
+ }
+ 
+ /*
diff --git a/queue-3.4/nfsv4.1-handle-nfs4err_delay-when-resetting-the-nfsv4.1-session.patch b/queue-3.4/nfsv4.1-handle-nfs4err_delay-when-resetting-the-nfsv4.1-session.patch

new file mode 100644 (file)

index 0000000..8dd7b1e
--- /dev/null
+++ b/queue-3.4/nfsv4.1-handle-nfs4err_delay-when-resetting-the-nfsv4.1-session.patch
@@ -0,0 +1,49 @@
+From 78e3ae2d57c0df313b079a07f6ffc16e4041e56c Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Wed, 30 Jan 2013 13:04:10 -0500
+Subject: NFSv4.1: Handle NFS4ERR_DELAY when resetting the NFSv4.1 session
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit c489ee290bdbbace6bb63ebe6ebd4dd605819495 upstream.
+
+NFS4ERR_DELAY is a legal reply when we call DESTROY_SESSION. It
+usually means that the server is busy handling an unfinished RPC
+request. Just sleep for a second and then retry.
+We also need to be able to handle the NFS4ERR_BACK_CHAN_BUSY return
+value. If the NFS server has outstanding callbacks, we just want to
+similarly sleep & retry.
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+[bwh: Backported to 3.2: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4state.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -1651,8 +1651,18 @@ static int nfs4_reset_session(struct nfs
+ 
+       nfs4_begin_drain_session(clp);
+       status = nfs4_proc_destroy_session(clp->cl_session);
+-      if (status && status != -NFS4ERR_BADSESSION &&
+-          status != -NFS4ERR_DEADSESSION) {
++      switch (status) {
++      case 0:
++      case -NFS4ERR_BADSESSION:
++      case -NFS4ERR_DEADSESSION:
++              break;
++      case -NFS4ERR_BACK_CHAN_BUSY:
++      case -NFS4ERR_DELAY:
++              set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
++              status = 0;
++              ssleep(1);
++              goto out;
++      default:
+               status = nfs4_recovery_handle_error(clp, status);
+               goto out;
+       }
diff --git a/queue-3.4/nfsv4.1-integer-overflow-in-decode_cb_sequence_args.patch b/queue-3.4/nfsv4.1-integer-overflow-in-decode_cb_sequence_args.patch

new file mode 100644 (file)

index 0000000..9b3b8eb
--- /dev/null
+++ b/queue-3.4/nfsv4.1-integer-overflow-in-decode_cb_sequence_args.patch
@@ -0,0 +1,37 @@
+From 70bea7f2c038f04b5bc2e84f12615f79ed394d13 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Tue, 12 Jun 2012 10:37:08 +0300
+Subject: NFSv4.1: integer overflow in decode_cb_sequence_args()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 0439f31c35d1da0b28988b308ea455e38e6a350d upstream.
+
+This seems like it could overflow on 32 bits.  Use kmalloc_array() which
+has overflow protection built in.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Rui Xiang <rui.xiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/callback_xdr.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -455,9 +455,9 @@ static __be32 decode_cb_sequence_args(st
+       args->csa_nrclists = ntohl(*p++);
+       args->csa_rclists = NULL;
+       if (args->csa_nrclists) {
+-              args->csa_rclists = kmalloc(args->csa_nrclists *
+-                                          sizeof(*args->csa_rclists),
+-                                          GFP_KERNEL);
++              args->csa_rclists = kmalloc_array(args->csa_nrclists,
++                                                sizeof(*args->csa_rclists),
++                                                GFP_KERNEL);
+               if (unlikely(args->csa_rclists == NULL))
+                       goto out;
+ 
diff --git a/queue-3.4/series b/queue-3.4/series

index 898e79d3deca5c3c2b89cdd989142a2a150f4779..1db0b9672b5789df6d1e1f8e0fb90155231f613a 100644 (file)
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -84,3 +84,18 @@ perf-fix-error-return-code.patch
  tracing-keep-overwrite-in-sync-between-regular-and-snapshot-buffers.patch
  vfs-make-vfs_fstat-use-f_light.patch
  cifs-delay-super-block-destruction-until-all-cifsfileinfo-objects-are-gone.patch
+nfsv4-wait-on-recovery-for-async-session-errors.patch
+nfsd4-fix-xdr-decoding-of-large-non-write-compounds.patch
+nfsv4.1-integer-overflow-in-decode_cb_sequence_args.patch
+nfsd-don-t-run-get_file-if-nfs4_preprocess_stateid_op-return-error.patch
+nfs-nfs_getaclargs.acl_len-is-a-size_t.patch
+nfsv4.1-fix-a-race-in-pnfs-layoutcommit.patch
+nfsv4.1-don-t-decode-skipped-layoutgets.patch
+nfsv4.1-handle-nfs4err_delay-when-resetting-the-nfsv4.1-session.patch
+dm-bufio-avoid-a-possible-__vmalloc-deadlock.patch
+dm-snapshot-add-missing-module-aliases.patch
+md-raid10-fix-enough-function-for-detecting-if-array-is-failed.patch
+nfsd-nfsd_open-when-dentry_open-returns-an-error-do-not-propagate-as-struct-file.patch
+dm-snapshot-avoid-snapshot-space-leak-on-crash.patch
+dm-mpath-fix-race-condition-between-multipath_dtr-and-pg_init_done.patch
+dm-thin-fix-discard-corruption.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 4 Jun 2014 23:50:24 +0000 (16:50 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 4 Jun 2014 23:50:24 +0000 (16:50 -0700)
queue-3.4/dm-bufio-avoid-a-possible-__vmalloc-deadlock.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/dm-mpath-fix-race-condition-between-multipath_dtr-and-pg_init_done.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/dm-snapshot-add-missing-module-aliases.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/dm-snapshot-avoid-snapshot-space-leak-on-crash.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/dm-thin-fix-discard-corruption.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/md-raid10-fix-enough-function-for-detecting-if-array-is-failed.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/nfs-nfs_getaclargs.acl_len-is-a-size_t.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/nfsd-don-t-run-get_file-if-nfs4_preprocess_stateid_op-return-error.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/nfsd-nfsd_open-when-dentry_open-returns-an-error-do-not-propagate-as-struct-file.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/nfsd4-fix-xdr-decoding-of-large-non-write-compounds.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/nfsv4-wait-on-recovery-for-async-session-errors.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/nfsv4.1-don-t-decode-skipped-layoutgets.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/nfsv4.1-fix-a-race-in-pnfs-layoutcommit.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/nfsv4.1-handle-nfs4err_delay-when-resetting-the-nfsv4.1-session.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/nfsv4.1-integer-overflow-in-decode_cb_sequence_args.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/series		patch \| blob \| blame \| history