From: Greg Kroah-Hartman Date: Wed, 4 Jun 2014 23:50:24 +0000 (-0700) Subject: 3.4-stable patches X-Git-Tag: v3.14.6~28 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4267360f22d6e985ae3430e7d8de7bbe06658899;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: dm-bufio-avoid-a-possible-__vmalloc-deadlock.patch dm-mpath-fix-race-condition-between-multipath_dtr-and-pg_init_done.patch dm-snapshot-add-missing-module-aliases.patch dm-snapshot-avoid-snapshot-space-leak-on-crash.patch dm-thin-fix-discard-corruption.patch md-raid10-fix-enough-function-for-detecting-if-array-is-failed.patch nfs-nfs_getaclargs.acl_len-is-a-size_t.patch nfsd-don-t-run-get_file-if-nfs4_preprocess_stateid_op-return-error.patch nfsd-nfsd_open-when-dentry_open-returns-an-error-do-not-propagate-as-struct-file.patch nfsd4-fix-xdr-decoding-of-large-non-write-compounds.patch nfsv4-wait-on-recovery-for-async-session-errors.patch nfsv4.1-don-t-decode-skipped-layoutgets.patch nfsv4.1-fix-a-race-in-pnfs-layoutcommit.patch nfsv4.1-handle-nfs4err_delay-when-resetting-the-nfsv4.1-session.patch nfsv4.1-integer-overflow-in-decode_cb_sequence_args.patch --- diff --git a/queue-3.4/dm-bufio-avoid-a-possible-__vmalloc-deadlock.patch b/queue-3.4/dm-bufio-avoid-a-possible-__vmalloc-deadlock.patch new file mode 100644 index 00000000000..c12f760f514 --- /dev/null +++ b/queue-3.4/dm-bufio-avoid-a-possible-__vmalloc-deadlock.patch @@ -0,0 +1,83 @@ +From 4725f1715429f75fa5f053dbe05575d08aeb5967 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Fri, 10 May 2013 14:37:15 +0100 +Subject: dm bufio: avoid a possible __vmalloc deadlock + +From: Mikulas Patocka + +commit 502624bdad3dba45dfaacaf36b7d83e39e74b2d2 upstream. + +This patch uses memalloc_noio_save to avoid a possible deadlock in +dm-bufio. (it could happen only with large block size, at most +PAGE_SIZE << MAX_ORDER (typically 8MiB). + +__vmalloc doesn't fully respect gfp flags. The specified gfp flags are +used for allocation of requested pages, structures vmap_area, vmap_block +and vm_struct and the radix tree nodes. + +However, the kernel pagetables are allocated always with GFP_KERNEL. +Thus the allocation of pagetables can recurse back to the I/O layer and +cause a deadlock. + +This patch uses the function memalloc_noio_save to set per-process +PF_MEMALLOC_NOIO flag and the function memalloc_noio_restore to restore +it. When this flag is set, all allocations in the process are done with +implied GFP_NOIO flag, thus the deadlock can't happen. + +This should be backported to stable kernels, but they don't have the +PF_MEMALLOC_NOIO flag and memalloc_noio_save/memalloc_noio_restore +functions. So, PF_MEMALLOC should be set and restored instead. + +Signed-off-by: Mikulas Patocka +Signed-off-by: Alasdair G Kergon +[bwh: Backported to 3.2 as recommended] +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-bufio.c | 26 +++++++++++++++++++++++++- + 1 file changed, 25 insertions(+), 1 deletion(-) + +--- a/drivers/md/dm-bufio.c ++++ b/drivers/md/dm-bufio.c +@@ -321,6 +321,9 @@ static void __cache_size_refresh(void) + static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, + enum data_mode *data_mode) + { ++ unsigned noio_flag; ++ void *ptr; ++ + if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) { + *data_mode = DATA_MODE_SLAB; + return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask); +@@ -334,7 +337,28 @@ static void *alloc_buffer_data(struct dm + } + + *data_mode = DATA_MODE_VMALLOC; +- return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); ++ ++ /* ++ * __vmalloc allocates the data pages and auxiliary structures with ++ * gfp_flags that were specified, but pagetables are always allocated ++ * with GFP_KERNEL, no matter what was specified as gfp_mask. ++ * ++ * Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that ++ * all allocations done by this process (including pagetables) are done ++ * as if GFP_NOIO was specified. ++ */ ++ ++ if (gfp_mask & __GFP_NORETRY) { ++ noio_flag = current->flags & PF_MEMALLOC; ++ current->flags |= PF_MEMALLOC; ++ } ++ ++ ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); ++ ++ if (gfp_mask & __GFP_NORETRY) ++ current->flags = (current->flags & ~PF_MEMALLOC) | noio_flag; ++ ++ return ptr; + } + + /* diff --git a/queue-3.4/dm-mpath-fix-race-condition-between-multipath_dtr-and-pg_init_done.patch b/queue-3.4/dm-mpath-fix-race-condition-between-multipath_dtr-and-pg_init_done.patch new file mode 100644 index 00000000000..aa42088bffe --- /dev/null +++ b/queue-3.4/dm-mpath-fix-race-condition-between-multipath_dtr-and-pg_init_done.patch @@ -0,0 +1,110 @@ +From 954a73d5d3073df2231820c718fdd2f18b0fe4c9 Mon Sep 17 00:00:00 2001 +From: Shiva Krishna Merla +Date: Wed, 30 Oct 2013 03:26:38 +0000 +Subject: dm mpath: fix race condition between multipath_dtr and pg_init_done + +From: Shiva Krishna Merla + +commit 954a73d5d3073df2231820c718fdd2f18b0fe4c9 upstream. + +Whenever multipath_dtr() is happening we must prevent queueing any +further path activation work. Implement this by adding a new +'pg_init_disabled' flag to the multipath structure that denotes future +path activation work should be skipped if it is set. By disabling +pg_init and then re-enabling in flush_multipath_work() we also avoid the +potential for pg_init to be initiated while suspending an mpath device. + +Without this patch a race condition exists that may result in a kernel +panic: + +1) If after pg_init_done() decrements pg_init_in_progress to 0, a call + to wait_for_pg_init_completion() assumes there are no more pending path + management commands. +2) If pg_init_required is set by pg_init_done(), due to retryable + mode_select errors, then process_queued_ios() will again queue the + path activation work. +3) If free_multipath() completes before activate_path() work is called a + NULL pointer dereference like the following can be seen when + accessing members of the recently destructed multipath: + +BUG: unable to handle kernel NULL pointer dereference at 0000000000000090 +RIP: 0010:[] [] activate_path+0x1b/0x30 [dm_multipath] +[] worker_thread+0x170/0x2a0 +[] ? autoremove_wake_function+0x0/0x40 + +[switch to disabling pg_init in flush_multipath_work & header edits by Mike Snitzer] +Signed-off-by: Shiva Krishna Merla +Reviewed-by: Krishnasamy Somasundaram +Tested-by: Speagle Andy +Acked-by: Junichi Nomura +Signed-off-by: Mike Snitzer +[bwh: Backported to 3.2: + - Adjust context + - Bump version to 1.3.2 not 1.6.0] +Signed-off-by: Ben Hutchings +[xr: Backported to 3.4: Adjust context] +Signed-off-by: Rui Xiang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-mpath.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +--- a/drivers/md/dm-mpath.c ++++ b/drivers/md/dm-mpath.c +@@ -84,6 +84,7 @@ struct multipath { + unsigned queue_io; /* Must we queue all I/O? */ + unsigned queue_if_no_path; /* Queue I/O if last path fails? */ + unsigned saved_queue_if_no_path;/* Saved state during suspension */ ++ unsigned pg_init_disabled:1; /* pg_init is not currently allowed */ + unsigned pg_init_retries; /* Number of times to retry pg_init */ + unsigned pg_init_count; /* Number of times pg_init called */ + unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ +@@ -493,7 +494,8 @@ static void process_queued_ios(struct wo + (!pgpath && !m->queue_if_no_path)) + must_queue = 0; + +- if (m->pg_init_required && !m->pg_init_in_progress && pgpath) ++ if (m->pg_init_required && !m->pg_init_in_progress && pgpath && ++ !m->pg_init_disabled) + __pg_init_all_paths(m); + + out: +@@ -907,10 +909,20 @@ static void multipath_wait_for_pg_init_c + + static void flush_multipath_work(struct multipath *m) + { ++ unsigned long flags; ++ ++ spin_lock_irqsave(&m->lock, flags); ++ m->pg_init_disabled = 1; ++ spin_unlock_irqrestore(&m->lock, flags); ++ + flush_workqueue(kmpath_handlerd); + multipath_wait_for_pg_init_completion(m); + flush_workqueue(kmultipathd); + flush_work_sync(&m->trigger_event); ++ ++ spin_lock_irqsave(&m->lock, flags); ++ m->pg_init_disabled = 0; ++ spin_unlock_irqrestore(&m->lock, flags); + } + + static void multipath_dtr(struct dm_target *ti) +@@ -1129,7 +1141,7 @@ static int pg_init_limit_reached(struct + + spin_lock_irqsave(&m->lock, flags); + +- if (m->pg_init_count <= m->pg_init_retries) ++ if (m->pg_init_count <= m->pg_init_retries && !m->pg_init_disabled) + m->pg_init_required = 1; + else + limit_reached = 1; +@@ -1644,7 +1656,7 @@ out: + *---------------------------------------------------------------*/ + static struct target_type multipath_target = { + .name = "multipath", +- .version = {1, 3, 0}, ++ .version = {1, 3, 2}, + .module = THIS_MODULE, + .ctr = multipath_ctr, + .dtr = multipath_dtr, diff --git a/queue-3.4/dm-snapshot-add-missing-module-aliases.patch b/queue-3.4/dm-snapshot-add-missing-module-aliases.patch new file mode 100644 index 00000000000..bef7c72fe3f --- /dev/null +++ b/queue-3.4/dm-snapshot-add-missing-module-aliases.patch @@ -0,0 +1,33 @@ +From e0179c31e0818d5efc87a475f2b83979ef2d3585 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Fri, 1 Mar 2013 22:45:47 +0000 +Subject: dm snapshot: add missing module aliases + +From: Mikulas Patocka + +commit 23cb21092eb9dcec9d3604b68d95192b79915890 upstream. + +Add module aliases so that autoloading works correctly if the user +tries to activate "snapshot-origin" or "snapshot-merge" targets. + +Reference: https://bugzilla.redhat.com/889973 + +Reported-by: Chao Yang +Signed-off-by: Mikulas Patocka +Signed-off-by: Alasdair G Kergon +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-snap.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/md/dm-snap.c ++++ b/drivers/md/dm-snap.c +@@ -2323,3 +2323,5 @@ module_exit(dm_snapshot_exit); + MODULE_DESCRIPTION(DM_NAME " snapshot target"); + MODULE_AUTHOR("Joe Thornber"); + MODULE_LICENSE("GPL"); ++MODULE_ALIAS("dm-snapshot-origin"); ++MODULE_ALIAS("dm-snapshot-merge"); diff --git a/queue-3.4/dm-snapshot-avoid-snapshot-space-leak-on-crash.patch b/queue-3.4/dm-snapshot-avoid-snapshot-space-leak-on-crash.patch new file mode 100644 index 00000000000..1fd7fa0ab08 --- /dev/null +++ b/queue-3.4/dm-snapshot-avoid-snapshot-space-leak-on-crash.patch @@ -0,0 +1,195 @@ +From 230c83afdd9cd384348475bea1e14b80b3b6b1b8 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Fri, 29 Nov 2013 18:13:37 -0500 +Subject: dm snapshot: avoid snapshot space leak on crash + +From: Mikulas Patocka + +commit 230c83afdd9cd384348475bea1e14b80b3b6b1b8 upstream. + +There is a possible leak of snapshot space in case of crash. + +The reason for space leaking is that chunks in the snapshot device are +allocated sequentially, but they are finished (and stored in the metadata) +out of order, depending on the order in which copying finished. + +For example, supposed that the metadata contains the following records +SUPERBLOCK +METADATA (blocks 0 ... 250) +DATA 0 +DATA 1 +DATA 2 +... +DATA 250 + +Now suppose that you allocate 10 new data blocks 251-260. Suppose that +copying of these blocks finish out of order (block 260 finished first +and the block 251 finished last). Now, the snapshot device looks like +this: +SUPERBLOCK +METADATA (blocks 0 ... 250, 260, 259, 258, 257, 256) +DATA 0 +DATA 1 +DATA 2 +... +DATA 250 +DATA 251 +DATA 252 +DATA 253 +DATA 254 +DATA 255 +METADATA (blocks 255, 254, 253, 252, 251) +DATA 256 +DATA 257 +DATA 258 +DATA 259 +DATA 260 + +Now, if the machine crashes after writing the first metadata block but +before writing the second metadata block, the space for areas DATA 250-255 +is leaked, it contains no valid data and it will never be used in the +future. + +This patch makes dm-snapshot complete exceptions in the same order they +were allocated, thus fixing this bug. + +Note: when backporting this patch to the stable kernel, change the version +field in the following way: +* if version in the stable kernel is {1, 11, 1}, change it to {1, 12, 0} +* if version in the stable kernel is {1, 10, 0} or {1, 10, 1}, change it + to {1, 10, 2} +Userspace reads the version to determine if the bug was fixed, so the +version change is needed. + +Signed-off-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +[xr: Backported to 3.4: adjust version] +Signed-off-by: Rui Xiang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-snap.c | 71 +++++++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 64 insertions(+), 7 deletions(-) + +--- a/drivers/md/dm-snap.c ++++ b/drivers/md/dm-snap.c +@@ -66,6 +66,18 @@ struct dm_snapshot { + + atomic_t pending_exceptions_count; + ++ /* Protected by "lock" */ ++ sector_t exception_start_sequence; ++ ++ /* Protected by kcopyd single-threaded callback */ ++ sector_t exception_complete_sequence; ++ ++ /* ++ * A list of pending exceptions that completed out of order. ++ * Protected by kcopyd single-threaded callback. ++ */ ++ struct list_head out_of_order_list; ++ + mempool_t *pending_pool; + + struct dm_exception_table pending; +@@ -171,6 +183,14 @@ struct dm_snap_pending_exception { + */ + int started; + ++ /* There was copying error. */ ++ int copy_error; ++ ++ /* A sequence number, it is used for in-order completion. */ ++ sector_t exception_sequence; ++ ++ struct list_head out_of_order_entry; ++ + /* + * For writing a complete chunk, bypassing the copy. + */ +@@ -1090,6 +1110,9 @@ static int snapshot_ctr(struct dm_target + s->valid = 1; + s->active = 0; + atomic_set(&s->pending_exceptions_count, 0); ++ s->exception_start_sequence = 0; ++ s->exception_complete_sequence = 0; ++ INIT_LIST_HEAD(&s->out_of_order_list); + init_rwsem(&s->lock); + INIT_LIST_HEAD(&s->list); + spin_lock_init(&s->pe_lock); +@@ -1448,6 +1471,19 @@ static void commit_callback(void *contex + pending_complete(pe, success); + } + ++static void complete_exception(struct dm_snap_pending_exception *pe) ++{ ++ struct dm_snapshot *s = pe->snap; ++ ++ if (unlikely(pe->copy_error)) ++ pending_complete(pe, 0); ++ ++ else ++ /* Update the metadata if we are persistent */ ++ s->store->type->commit_exception(s->store, &pe->e, ++ commit_callback, pe); ++} ++ + /* + * Called when the copy I/O has finished. kcopyd actually runs + * this code so don't block. +@@ -1457,13 +1493,32 @@ static void copy_callback(int read_err, + struct dm_snap_pending_exception *pe = context; + struct dm_snapshot *s = pe->snap; + +- if (read_err || write_err) +- pending_complete(pe, 0); ++ pe->copy_error = read_err || write_err; + +- else +- /* Update the metadata if we are persistent */ +- s->store->type->commit_exception(s->store, &pe->e, +- commit_callback, pe); ++ if (pe->exception_sequence == s->exception_complete_sequence) { ++ s->exception_complete_sequence++; ++ complete_exception(pe); ++ ++ while (!list_empty(&s->out_of_order_list)) { ++ pe = list_entry(s->out_of_order_list.next, ++ struct dm_snap_pending_exception, out_of_order_entry); ++ if (pe->exception_sequence != s->exception_complete_sequence) ++ break; ++ s->exception_complete_sequence++; ++ list_del(&pe->out_of_order_entry); ++ complete_exception(pe); ++ } ++ } else { ++ struct list_head *lh; ++ struct dm_snap_pending_exception *pe2; ++ ++ list_for_each_prev(lh, &s->out_of_order_list) { ++ pe2 = list_entry(lh, struct dm_snap_pending_exception, out_of_order_entry); ++ if (pe2->exception_sequence < pe->exception_sequence) ++ break; ++ } ++ list_add(&pe->out_of_order_entry, lh); ++ } + } + + /* +@@ -1558,6 +1613,8 @@ __find_pending_exception(struct dm_snaps + return NULL; + } + ++ pe->exception_sequence = s->exception_start_sequence++; ++ + dm_insert_exception(&s->pending, &pe->e); + + return pe; +@@ -2200,7 +2257,7 @@ static struct target_type origin_target + + static struct target_type snapshot_target = { + .name = "snapshot", +- .version = {1, 10, 0}, ++ .version = {1, 10, 2}, + .module = THIS_MODULE, + .ctr = snapshot_ctr, + .dtr = snapshot_dtr, diff --git a/queue-3.4/dm-thin-fix-discard-corruption.patch b/queue-3.4/dm-thin-fix-discard-corruption.patch new file mode 100644 index 00000000000..7fe9ae41b9f --- /dev/null +++ b/queue-3.4/dm-thin-fix-discard-corruption.patch @@ -0,0 +1,195 @@ +From f046f89a99ccfd9408b94c653374ff3065c7edb3 Mon Sep 17 00:00:00 2001 +From: Joe Thornber +Date: Wed, 20 Mar 2013 17:21:24 +0000 +Subject: dm thin: fix discard corruption + +From: Joe Thornber + +commit f046f89a99ccfd9408b94c653374ff3065c7edb3 upstream. + +Fix a bug in dm_btree_remove that could leave leaf values with incorrect +reference counts. The effect of this was that removal of a shared block +could result in the space maps thinking the block was no longer used. +More concretely, if you have a thin device and a snapshot of it, sending +a discard to a shared region of the thin could corrupt the snapshot. + +Thinp uses a 2-level nested btree to store it's mappings. This first +level is indexed by thin device, and the second level by logical +block. + +Often when we're removing an entry in this mapping tree we need to +rebalance nodes, which can involve shadowing them, possibly creating a +copy if the block is shared. If we do create a copy then children of +that node need to have their reference counts incremented. In this +way reference counts percolate down the tree as shared trees diverge. + +The rebalance functions were incrementing the children at the +appropriate time, but they were always assuming the children were +internal nodes. This meant the leaf values (in our case packed +block/flags entries) were not being incremented. + +Signed-off-by: Joe Thornber +Signed-off-by: Alasdair G Kergon +[bwh: Backported to 3.2: bump target version numbers from 1.0.1 to 1.0.2] +Signed-off-by: Ben Hutchings +[xr: Backported to 3.4: bump target version numbers to 1.1.1] +Signed-off-by: Rui Xiang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-thin.c | 4 +- + drivers/md/persistent-data/dm-btree-remove.c | 46 ++++++++++++++------------- + 2 files changed, 26 insertions(+), 24 deletions(-) + +--- a/drivers/md/dm-thin.c ++++ b/drivers/md/dm-thin.c +@@ -2472,7 +2472,7 @@ static struct target_type pool_target = + .name = "thin-pool", + .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | + DM_TARGET_IMMUTABLE, +- .version = {1, 1, 0}, ++ .version = {1, 1, 1}, + .module = THIS_MODULE, + .ctr = pool_ctr, + .dtr = pool_dtr, +@@ -2752,7 +2752,7 @@ static void thin_io_hints(struct dm_targ + + static struct target_type thin_target = { + .name = "thin", +- .version = {1, 1, 0}, ++ .version = {1, 1, 1}, + .module = THIS_MODULE, + .ctr = thin_ctr, + .dtr = thin_dtr, +--- a/drivers/md/persistent-data/dm-btree-remove.c ++++ b/drivers/md/persistent-data/dm-btree-remove.c +@@ -139,15 +139,8 @@ struct child { + struct btree_node *n; + }; + +-static struct dm_btree_value_type le64_type = { +- .context = NULL, +- .size = sizeof(__le64), +- .inc = NULL, +- .dec = NULL, +- .equal = NULL +-}; +- +-static int init_child(struct dm_btree_info *info, struct btree_node *parent, ++static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt, ++ struct btree_node *parent, + unsigned index, struct child *result) + { + int r, inc; +@@ -164,7 +157,7 @@ static int init_child(struct dm_btree_in + result->n = dm_block_data(result->block); + + if (inc) +- inc_children(info->tm, result->n, &le64_type); ++ inc_children(info->tm, result->n, vt); + + *((__le64 *) value_ptr(parent, index)) = + cpu_to_le64(dm_block_location(result->block)); +@@ -236,7 +229,7 @@ static void __rebalance2(struct dm_btree + } + + static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, +- unsigned left_index) ++ struct dm_btree_value_type *vt, unsigned left_index) + { + int r; + struct btree_node *parent; +@@ -244,11 +237,11 @@ static int rebalance2(struct shadow_spin + + parent = dm_block_data(shadow_current(s)); + +- r = init_child(info, parent, left_index, &left); ++ r = init_child(info, vt, parent, left_index, &left); + if (r) + return r; + +- r = init_child(info, parent, left_index + 1, &right); ++ r = init_child(info, vt, parent, left_index + 1, &right); + if (r) { + exit_child(info, &left); + return r; +@@ -368,7 +361,7 @@ static void __rebalance3(struct dm_btree + } + + static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, +- unsigned left_index) ++ struct dm_btree_value_type *vt, unsigned left_index) + { + int r; + struct btree_node *parent = dm_block_data(shadow_current(s)); +@@ -377,17 +370,17 @@ static int rebalance3(struct shadow_spin + /* + * FIXME: fill out an array? + */ +- r = init_child(info, parent, left_index, &left); ++ r = init_child(info, vt, parent, left_index, &left); + if (r) + return r; + +- r = init_child(info, parent, left_index + 1, ¢er); ++ r = init_child(info, vt, parent, left_index + 1, ¢er); + if (r) { + exit_child(info, &left); + return r; + } + +- r = init_child(info, parent, left_index + 2, &right); ++ r = init_child(info, vt, parent, left_index + 2, &right); + if (r) { + exit_child(info, &left); + exit_child(info, ¢er); +@@ -434,7 +427,8 @@ static int get_nr_entries(struct dm_tran + } + + static int rebalance_children(struct shadow_spine *s, +- struct dm_btree_info *info, uint64_t key) ++ struct dm_btree_info *info, ++ struct dm_btree_value_type *vt, uint64_t key) + { + int i, r, has_left_sibling, has_right_sibling; + uint32_t child_entries; +@@ -472,13 +466,13 @@ static int rebalance_children(struct sha + has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); + + if (!has_left_sibling) +- r = rebalance2(s, info, i); ++ r = rebalance2(s, info, vt, i); + + else if (!has_right_sibling) +- r = rebalance2(s, info, i - 1); ++ r = rebalance2(s, info, vt, i - 1); + + else +- r = rebalance3(s, info, i - 1); ++ r = rebalance3(s, info, vt, i - 1); + + return r; + } +@@ -529,7 +523,7 @@ static int remove_raw(struct shadow_spin + if (le32_to_cpu(n->header.flags) & LEAF_NODE) + return do_leaf(n, key, index); + +- r = rebalance_children(s, info, key); ++ r = rebalance_children(s, info, vt, key); + if (r) + break; + +@@ -550,6 +544,14 @@ static int remove_raw(struct shadow_spin + return r; + } + ++static struct dm_btree_value_type le64_type = { ++ .context = NULL, ++ .size = sizeof(__le64), ++ .inc = NULL, ++ .dec = NULL, ++ .equal = NULL ++}; ++ + int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, + uint64_t *keys, dm_block_t *new_root) + { diff --git a/queue-3.4/md-raid10-fix-enough-function-for-detecting-if-array-is-failed.patch b/queue-3.4/md-raid10-fix-enough-function-for-detecting-if-array-is-failed.patch new file mode 100644 index 00000000000..e4abea73faf --- /dev/null +++ b/queue-3.4/md-raid10-fix-enough-function-for-detecting-if-array-is-failed.patch @@ -0,0 +1,57 @@ +From 4e19de3be14c9390e63271effb5b95ab50f298f4 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Thu, 27 Sep 2012 12:35:21 +1000 +Subject: md/raid10: fix "enough" function for detecting if array is failed. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: NeilBrown + +commit 80b4812407c6b1f66a4f2430e69747a13f010839 upstream. + +The 'enough' function is written to work with 'near' arrays only +in that is implicitly assumes that the offset from one 'group' of +devices to the next is the same as the number of copies. +In reality it is the number of 'near' copies. + +So change it to make this number explicit. + +This bug makes it possible to run arrays without enough drives +present, which is dangerous. +It is appropriate for an -stable kernel, but will almost certainly +need to be modified for some of them. + +Reported-by: Jakub Husák +Signed-off-by: NeilBrown +[bwh: Backported to 3.2: s/geo->/conf->/] +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid10.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -1419,14 +1419,16 @@ static int enough(struct r10conf *conf, + do { + int n = conf->copies; + int cnt = 0; ++ int this = first; + while (n--) { +- if (conf->mirrors[first].rdev && +- first != ignore) ++ if (conf->mirrors[this].rdev && ++ this != ignore) + cnt++; +- first = (first+1) % conf->raid_disks; ++ this = (this+1) % conf->raid_disks; + } + if (cnt == 0) + return 0; ++ first = (first + conf->near_copies) % conf->raid_disks; + } while (first != 0); + return 1; + } diff --git a/queue-3.4/nfs-nfs_getaclargs.acl_len-is-a-size_t.patch b/queue-3.4/nfs-nfs_getaclargs.acl_len-is-a-size_t.patch new file mode 100644 index 00000000000..387d4cc50cb --- /dev/null +++ b/queue-3.4/nfs-nfs_getaclargs.acl_len-is-a-size_t.patch @@ -0,0 +1,45 @@ +From 1bf2642f4cb2a773cd1d41b9558acf5af81738b7 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Wed, 11 Jul 2012 16:30:32 -0400 +Subject: NFS: nfs_getaclargs.acl_len is a size_t +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Chuck Lever + +commit 56d08fef2369d5ca9ad2e1fc697f5379fd8af751 upstream. + +Squelch compiler warnings: + +fs/nfs/nfs4proc.c: In function ‘__nfs4_get_acl_uncached’: +fs/nfs/nfs4proc.c:3811:14: warning: comparison between signed and + unsigned integer expressions [-Wsign-compare] +fs/nfs/nfs4proc.c:3818:15: warning: comparison between signed and + unsigned integer expressions [-Wsign-compare] + +Introduced by commit bf118a34 "NFSv4: include bitmap in nfsv4 get +acl data", Dec 7, 2011. + +Signed-off-by: Chuck Lever +Signed-off-by: Trond Myklebust +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/nfs4proc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3724,7 +3724,8 @@ static ssize_t __nfs4_get_acl_uncached(s + .rpc_argp = &args, + .rpc_resp = &res, + }; +- int ret = -ENOMEM, npages, i, acl_len = 0; ++ int ret = -ENOMEM, npages, i; ++ size_t acl_len = 0; + + npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; + /* As long as we're doing a round trip to the server anyway, diff --git a/queue-3.4/nfsd-don-t-run-get_file-if-nfs4_preprocess_stateid_op-return-error.patch b/queue-3.4/nfsd-don-t-run-get_file-if-nfs4_preprocess_stateid_op-return-error.patch new file mode 100644 index 00000000000..60f881ff67b --- /dev/null +++ b/queue-3.4/nfsd-don-t-run-get_file-if-nfs4_preprocess_stateid_op-return-error.patch @@ -0,0 +1,44 @@ +From 7e6c247f30c65913688850620e40b418223a5ff3 Mon Sep 17 00:00:00 2001 +From: fanchaoting +Date: Mon, 1 Apr 2013 21:07:22 +0800 +Subject: nfsd: don't run get_file if nfs4_preprocess_stateid_op return error + +From: fanchaoting + +commit b022032e195ffca83d7002d6b84297d796ed443b upstream. + +we should return error status directly when nfs4_preprocess_stateid_op +return error. + +Signed-off-by: fanchaoting +Signed-off-by: J. Bruce Fields +[bwh: Backported to 3.2: adjust context] +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfsd/nfs4proc.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -904,14 +904,14 @@ nfsd4_write(struct svc_rqst *rqstp, stru + + nfs4_lock_state(); + status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp); +- if (filp) +- get_file(filp); +- nfs4_unlock_state(); +- + if (status) { ++ nfs4_unlock_state(); + dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); + return status; + } ++ if (filp) ++ get_file(filp); ++ nfs4_unlock_state(); + + cnt = write->wr_buflen; + write->wr_how_written = write->wr_stable_how; diff --git a/queue-3.4/nfsd-nfsd_open-when-dentry_open-returns-an-error-do-not-propagate-as-struct-file.patch b/queue-3.4/nfsd-nfsd_open-when-dentry_open-returns-an-error-do-not-propagate-as-struct-file.patch new file mode 100644 index 00000000000..f0e20e25733 --- /dev/null +++ b/queue-3.4/nfsd-nfsd_open-when-dentry_open-returns-an-error-do-not-propagate-as-struct-file.patch @@ -0,0 +1,96 @@ +From e4daf1ffbe6cc3b12aab4d604e627829e93e9914 Mon Sep 17 00:00:00 2001 +From: Harshula Jayasuriya +Date: Tue, 23 Jul 2013 14:21:35 +1000 +Subject: nfsd: nfsd_open: when dentry_open returns an error do not propagate as struct file + +From: Harshula Jayasuriya + +commit e4daf1ffbe6cc3b12aab4d604e627829e93e9914 upstream. + +The following call chain: +------------------------------------------------------------ +nfs4_get_vfs_file +- nfsd_open + - dentry_open + - do_dentry_open + - __get_file_write_access + - get_write_access + - return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY; +------------------------------------------------------------ + +can result in the following state: +------------------------------------------------------------ +struct nfs4_file { +... + fi_fds = {0xffff880c1fa65c80, 0xffffffffffffffe6, 0x0}, + fi_access = {{ + counter = 0x1 + }, { + counter = 0x0 + }}, +... +------------------------------------------------------------ + +1) First time around, in nfs4_get_vfs_file() fp->fi_fds[O_WRONLY] is +NULL, hence nfsd_open() is called where we get status set to an error +and fp->fi_fds[O_WRONLY] to -ETXTBSY. Thus we do not reach +nfs4_file_get_access() and fi_access[O_WRONLY] is not incremented. + +2) Second time around, in nfs4_get_vfs_file() fp->fi_fds[O_WRONLY] is +NOT NULL (-ETXTBSY), so nfsd_open() is NOT called, but +nfs4_file_get_access() IS called and fi_access[O_WRONLY] is incremented. +Thus we leave a landmine in the form of the nfs4_file data structure in +an incorrect state. + +3) Eventually, when __nfs4_file_put_access() is called it finds +fi_access[O_WRONLY] being non-zero, it decrements it and calls +nfs4_file_put_fd() which tries to fput -ETXTBSY. +------------------------------------------------------------ +... + [exception RIP: fput+0x9] + RIP: ffffffff81177fa9 RSP: ffff88062e365c90 RFLAGS: 00010282 + RAX: ffff880c2b3d99cc RBX: ffff880c2b3d9978 RCX: 0000000000000002 + RDX: dead000000100101 RSI: 0000000000000001 RDI: ffffffffffffffe6 + RBP: ffff88062e365c90 R8: ffff88041fe797d8 R9: ffff88062e365d58 + R10: 0000000000000008 R11: 0000000000000000 R12: 0000000000000001 + R13: 0000000000000007 R14: 0000000000000000 R15: 0000000000000000 + ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 + #9 [ffff88062e365c98] __nfs4_file_put_access at ffffffffa0562334 [nfsd] + #10 [ffff88062e365cc8] nfs4_file_put_access at ffffffffa05623ab [nfsd] + #11 [ffff88062e365ce8] free_generic_stateid at ffffffffa056634d [nfsd] + #12 [ffff88062e365d18] release_open_stateid at ffffffffa0566e4b [nfsd] + #13 [ffff88062e365d38] nfsd4_close at ffffffffa0567401 [nfsd] + #14 [ffff88062e365d88] nfsd4_proc_compound at ffffffffa0557f28 [nfsd] + #15 [ffff88062e365dd8] nfsd_dispatch at ffffffffa054543e [nfsd] + #16 [ffff88062e365e18] svc_process_common at ffffffffa04ba5a4 [sunrpc] + #17 [ffff88062e365e98] svc_process at ffffffffa04babe0 [sunrpc] + #18 [ffff88062e365eb8] nfsd at ffffffffa0545b62 [nfsd] + #19 [ffff88062e365ee8] kthread at ffffffff81090886 + #20 [ffff88062e365f48] kernel_thread at ffffffff8100c14a +------------------------------------------------------------ + + +Signed-off-by: Harshula Jayasuriya +Signed-off-by: J. Bruce Fields +[xr: Backported to 3.4: adjust context] +Signed-off-by: Rui Xiang +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/vfs.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -828,9 +828,10 @@ nfsd_open(struct svc_rqst *rqstp, struct + } + *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), + flags, current_cred()); +- if (IS_ERR(*filp)) ++ if (IS_ERR(*filp)) { + host_err = PTR_ERR(*filp); +- else { ++ *filp = NULL; ++ } else { + host_err = ima_file_check(*filp, may_flags); + + if (may_flags & NFSD_MAY_64BIT_COOKIE) diff --git a/queue-3.4/nfsd4-fix-xdr-decoding-of-large-non-write-compounds.patch b/queue-3.4/nfsd4-fix-xdr-decoding-of-large-non-write-compounds.patch new file mode 100644 index 00000000000..cd1558a4686 --- /dev/null +++ b/queue-3.4/nfsd4-fix-xdr-decoding-of-large-non-write-compounds.patch @@ -0,0 +1,43 @@ +From bca06620c941f2427f13710e330adcef1cf30007 Mon Sep 17 00:00:00 2001 +From: "J. Bruce Fields" +Date: Tue, 19 Nov 2013 17:32:43 -0500 +Subject: nfsd4: fix xdr decoding of large non-write compounds + +From: "J. Bruce Fields" + +commit 365da4adebb1c012febf81019ad3dc5bb52e2a13 upstream. + +This fixes a regression from 247500820ebd02ad87525db5d9b199e5b66f6636 +"nfsd4: fix decoding of compounds across page boundaries". The previous +code was correct: argp->pagelist is initialized in +nfs4svc_deocde_compoundargs to rqstp->rq_arg.pages, and is therefore a +pointer to the page *after* the page we are currently decoding. + +The reason that patch nevertheless fixed a problem with decoding +compounds containing write was a bug in the write decoding introduced by +5a80a54d21c96590d013378d8c5f65f879451ab4 "nfsd4: reorganize write +decoding", after which write decoding no longer adhered to the rule that +argp->pagelist point to the next page. + +Signed-off-by: J. Bruce Fields +[bwh: Backported to 3.2: adjust context; there is only one instance to fix] +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfsd/nfs4xdr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -161,8 +161,8 @@ static __be32 *read_buf(struct nfsd4_com + */ + memcpy(p, argp->p, avail); + /* step to next page */ +- argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); ++ argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { + argp->end = argp->p + (argp->pagelen>>2); + argp->pagelen = 0; diff --git a/queue-3.4/nfsv4-wait-on-recovery-for-async-session-errors.patch b/queue-3.4/nfsv4-wait-on-recovery-for-async-session-errors.patch new file mode 100644 index 00000000000..3bad86b6012 --- /dev/null +++ b/queue-3.4/nfsv4-wait-on-recovery-for-async-session-errors.patch @@ -0,0 +1,78 @@ +From a687a58fd0fa69d405d777eba5f23a0c00cce2e7 Mon Sep 17 00:00:00 2001 +From: Andy Adamson +Date: Fri, 15 Nov 2013 16:36:16 -0500 +Subject: NFSv4 wait on recovery for async session errors + +From: Andy Adamson + +commit 4a82fd7c4e78a1b7a224f9ae8bb7e1fd95f670e0 upstream. + +When the state manager is processing the NFS4CLNT_DELEGRETURN flag, session +draining is off, but DELEGRETURN can still get a session error. +The async handler calls nfs4_schedule_session_recovery returns -EAGAIN, and +the DELEGRETURN done then restarts the RPC task in the prepare state. +With the state manager still processing the NFS4CLNT_DELEGRETURN flag with +session draining off, these DELEGRETURNs will cycle with errors filling up the +session slots. + +This prevents OPEN reclaims (from nfs_delegation_claim_opens) required by the +NFS4CLNT_DELEGRETURN state manager processing from completing, hanging the +state manager in the __rpc_wait_for_completion_task in nfs4_run_open_task +as seen in this kernel thread dump: + +kernel: 4.12.32.53-ma D 0000000000000000 0 3393 2 0x00000000 +kernel: ffff88013995fb60 0000000000000046 ffff880138cc5400 ffff88013a9df140 +kernel: ffff8800000265c0 ffffffff8116eef0 ffff88013fc10080 0000000300000001 +kernel: ffff88013a4ad058 ffff88013995ffd8 000000000000fbc8 ffff88013a4ad058 +kernel: Call Trace: +kernel: [] ? cache_alloc_refill+0x1c0/0x240 +kernel: [] ? rpc_wait_bit_killable+0x0/0xa0 [sunrpc] +kernel: [] rpc_wait_bit_killable+0x42/0xa0 [sunrpc] +kernel: [] __wait_on_bit+0x5f/0x90 +kernel: [] ? rpc_wait_bit_killable+0x0/0xa0 [sunrpc] +kernel: [] out_of_line_wait_on_bit+0x78/0x90 +kernel: [] ? wake_bit_function+0x0/0x50 +kernel: [] __rpc_wait_for_completion_task+0x2d/0x30 [sunrpc] +kernel: [] nfs4_run_open_task+0x11c/0x160 [nfs] +kernel: [] nfs4_open_recover_helper+0x87/0x120 [nfs] +kernel: [] nfs4_open_recover+0xc6/0x150 [nfs] +kernel: [] ? nfs4_open_recoverdata_alloc+0x2f/0x60 [nfs] +kernel: [] nfs4_open_delegation_recall+0x6a/0xa0 [nfs] +kernel: [] nfs_end_delegation_return+0x120/0x2e0 [nfs] +kernel: [] ? queue_work+0x1f/0x30 +kernel: [] nfs_client_return_marked_delegations+0xd7/0x110 [nfs] +kernel: [] nfs4_run_state_manager+0x548/0x620 [nfs] +kernel: [] ? nfs4_run_state_manager+0x0/0x620 [nfs] +kernel: [] kthread+0x96/0xa0 +kernel: [] child_rip+0xa/0x20 +kernel: [] ? kthread+0x0/0xa0 +kernel: [] ? child_rip+0x0/0x20 + +The state manager can not therefore process the DELEGRETURN session errors. +Change the async handler to wait for recovery on session errors. + +Signed-off-by: Andy Adamson +Signed-off-by: Trond Myklebust +[bwh: Backported to 3.2: + - Adjust context + - There's no restart_call label] +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/nfs4proc.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -3910,8 +3910,7 @@ nfs4_async_handle_error(struct rpc_task + dprintk("%s ERROR %d, Reset session\n", __func__, + task->tk_status); + nfs4_schedule_session_recovery(clp->cl_session); +- task->tk_status = 0; +- return -EAGAIN; ++ goto wait_on_recovery; + #endif /* CONFIG_NFS_V4_1 */ + case -NFS4ERR_DELAY: + nfs_inc_server_stats(server, NFSIOS_DELAY); diff --git a/queue-3.4/nfsv4.1-don-t-decode-skipped-layoutgets.patch b/queue-3.4/nfsv4.1-don-t-decode-skipped-layoutgets.patch new file mode 100644 index 00000000000..97b114cb78c --- /dev/null +++ b/queue-3.4/nfsv4.1-don-t-decode-skipped-layoutgets.patch @@ -0,0 +1,74 @@ +From 3c5add9c302ac2e86a1b99c738fc3d1c06dfc03a Mon Sep 17 00:00:00 2001 +From: Weston Andros Adamson +Date: Fri, 15 Feb 2013 16:03:46 -0500 +Subject: NFSv4.1: Don't decode skipped layoutgets + +From: Weston Andros Adamson + +commit 085b7a45c63d3da5be155faab9249a5cab224561 upstream. + +layoutget's prepare hook can call rpc_exit with status = NFS4_OK (0). +Because of this, nfs4_proc_layoutget can't depend on a 0 status to mean +that the RPC was successfully sent, received and parsed. + +To fix this, use the result's len member to see if parsing took place. + +This fixes the following OOPS -- calling xdr_init_decode() with a buffer length +0 doesn't set the stream's 'p' member and ends up using uninitialized memory +in filelayout_decode_layout. + +BUG: unable to handle kernel paging request at 0000000000008050 +IP: [] memcpy+0x18/0x120 +PGD 0 +Oops: 0000 [#1] SMP +last sysfs file: /sys/devices/pci0000:00/0000:00:11.0/0000:02:01.0/irq +CPU 1 +Modules linked in: nfs_layout_nfsv41_files nfs lockd fscache auth_rpcgss nfs_acl autofs4 sunrpc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 dm_mirror dm_region_hash dm_log dm_mod ppdev parport_pc parport snd_ens1371 snd_rawmidi snd_ac97_codec ac97_bus snd_seq snd_seq_device snd_pcm snd_timer snd soundcore snd_page_alloc e1000 microcode vmware_balloon i2c_piix4 i2c_core sg shpchp ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif pata_acpi ata_generic ata_piix mptspi mptscsih mptbase scsi_transport_spi [last unloaded: speedstep_lib] + +Pid: 1665, comm: flush-0:22 Not tainted 2.6.32-356-test-2 #2 VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform +RIP: 0010:[] [] memcpy+0x18/0x120 +RSP: 0018:ffff88003dfab588 EFLAGS: 00010206 +RAX: ffff88003dc42000 RBX: ffff88003dfab610 RCX: 0000000000000009 +RDX: 000000003f807ff0 RSI: 0000000000008050 RDI: ffff88003dc42000 +RBP: ffff88003dfab5b0 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000080 R12: 0000000000000024 +R13: ffff88003dc42000 R14: ffff88003f808030 R15: ffff88003dfab6a0 +FS: 0000000000000000(0000) GS:ffff880003420000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b +CR2: 0000000000008050 CR3: 000000003bc92000 CR4: 00000000001407e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 +Process flush-0:22 (pid: 1665, threadinfo ffff88003dfaa000, task ffff880037f77540) +Stack: +ffffffffa0398ac1 ffff8800397c5940 ffff88003dfab610 ffff88003dfab6a0 + ffff88003dfab5d0 ffff88003dfab680 ffffffffa01c150b ffffea0000d82e70 + 000000508116713b 0000000000000000 0000000000000000 0000000000000000 +Call Trace: +[] ? xdr_inline_decode+0xb1/0x120 [sunrpc] +[] filelayout_decode_layout+0xeb/0x350 [nfs_layout_nfsv41_files] +[] filelayout_alloc_lseg+0x8c/0x3c0 [nfs_layout_nfsv41_files] +[] ? __wait_on_bit+0x7e/0x90 + +Signed-off-by: Weston Andros Adamson +Signed-off-by: Trond Myklebust +[bwh: Backported to 3.2: adjust context] +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/nfs4proc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -6084,7 +6084,8 @@ int nfs4_proc_layoutget(struct nfs4_layo + status = nfs4_wait_for_completion_rpc_task(task); + if (status == 0) + status = task->tk_status; +- if (status == 0) ++ /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ ++ if (status == 0 && lgp->res.layoutp->len) + status = pnfs_layout_process(lgp); + rpc_put_task(task); + dprintk("<-- %s status=%d\n", __func__, status); diff --git a/queue-3.4/nfsv4.1-fix-a-race-in-pnfs-layoutcommit.patch b/queue-3.4/nfsv4.1-fix-a-race-in-pnfs-layoutcommit.patch new file mode 100644 index 00000000000..76a1f74893a --- /dev/null +++ b/queue-3.4/nfsv4.1-fix-a-race-in-pnfs-layoutcommit.patch @@ -0,0 +1,95 @@ +From 9ca2044f9041283bff117bb7e9ba7e9cd644bae0 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Wed, 20 Mar 2013 12:34:32 -0400 +Subject: NFSv4.1: Fix a race in pNFS layoutcommit + +From: Trond Myklebust + +commit a073dbff359f4741013ae4b8395f5364c5e00b48 upstream. + +We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the +NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations +where the two are out of sync. +The first half of the problem is to ensure that pnfs_layoutcommit_inode +clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg. +We still need to keep the reference to those segments until the RPC call +is finished, so in order to make it clear _where_ those references come +from, we add a helper pnfs_list_write_lseg_done() that cleans up after +pnfs_list_write_lseg. + +Signed-off-by: Trond Myklebust +Acked-by: Benny Halevy +[bwh: Backported to 3.2: s/pnfs_put_lseg/put_lseg/] +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/nfs4proc.c | 14 -------------- + fs/nfs/pnfs.c | 19 ++++++++++++++++++- + 2 files changed, 18 insertions(+), 15 deletions(-) + +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -6297,22 +6297,8 @@ nfs4_layoutcommit_done(struct rpc_task * + static void nfs4_layoutcommit_release(void *calldata) + { + struct nfs4_layoutcommit_data *data = calldata; +- struct pnfs_layout_segment *lseg, *tmp; +- unsigned long *bitlock = &NFS_I(data->args.inode)->flags; + + pnfs_cleanup_layoutcommit(data); +- /* Matched by references in pnfs_set_layoutcommit */ +- list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) { +- list_del_init(&lseg->pls_lc_list); +- if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, +- &lseg->pls_flags)) +- put_lseg(lseg); +- } +- +- clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); +- smp_mb__after_clear_bit(); +- wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); +- + put_rpccred(data->cred); + kfree(data); + } +--- a/fs/nfs/pnfs.c ++++ b/fs/nfs/pnfs.c +@@ -1381,11 +1381,27 @@ static void pnfs_list_write_lseg(struct + + list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { + if (lseg->pls_range.iomode == IOMODE_RW && +- test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) ++ test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + list_add(&lseg->pls_lc_list, listp); + } + } + ++static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) ++{ ++ struct pnfs_layout_segment *lseg, *tmp; ++ unsigned long *bitlock = &NFS_I(inode)->flags; ++ ++ /* Matched by references in pnfs_set_layoutcommit */ ++ list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { ++ list_del_init(&lseg->pls_lc_list); ++ put_lseg(lseg); ++ } ++ ++ clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); ++ smp_mb__after_clear_bit(); ++ wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); ++} ++ + void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) + { + if (lseg->pls_range.iomode == IOMODE_RW) { +@@ -1434,6 +1450,7 @@ void pnfs_cleanup_layoutcommit(struct nf + + if (nfss->pnfs_curr_ld->cleanup_layoutcommit) + nfss->pnfs_curr_ld->cleanup_layoutcommit(data); ++ pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); + } + + /* diff --git a/queue-3.4/nfsv4.1-handle-nfs4err_delay-when-resetting-the-nfsv4.1-session.patch b/queue-3.4/nfsv4.1-handle-nfs4err_delay-when-resetting-the-nfsv4.1-session.patch new file mode 100644 index 00000000000..8dd7b1e04c7 --- /dev/null +++ b/queue-3.4/nfsv4.1-handle-nfs4err_delay-when-resetting-the-nfsv4.1-session.patch @@ -0,0 +1,49 @@ +From 78e3ae2d57c0df313b079a07f6ffc16e4041e56c Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Wed, 30 Jan 2013 13:04:10 -0500 +Subject: NFSv4.1: Handle NFS4ERR_DELAY when resetting the NFSv4.1 session + +From: Trond Myklebust + +commit c489ee290bdbbace6bb63ebe6ebd4dd605819495 upstream. + +NFS4ERR_DELAY is a legal reply when we call DESTROY_SESSION. It +usually means that the server is busy handling an unfinished RPC +request. Just sleep for a second and then retry. +We also need to be able to handle the NFS4ERR_BACK_CHAN_BUSY return +value. If the NFS server has outstanding callbacks, we just want to +similarly sleep & retry. + +Signed-off-by: Trond Myklebust +[bwh: Backported to 3.2: adjust context] +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/nfs4state.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/fs/nfs/nfs4state.c ++++ b/fs/nfs/nfs4state.c +@@ -1651,8 +1651,18 @@ static int nfs4_reset_session(struct nfs + + nfs4_begin_drain_session(clp); + status = nfs4_proc_destroy_session(clp->cl_session); +- if (status && status != -NFS4ERR_BADSESSION && +- status != -NFS4ERR_DEADSESSION) { ++ switch (status) { ++ case 0: ++ case -NFS4ERR_BADSESSION: ++ case -NFS4ERR_DEADSESSION: ++ break; ++ case -NFS4ERR_BACK_CHAN_BUSY: ++ case -NFS4ERR_DELAY: ++ set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); ++ status = 0; ++ ssleep(1); ++ goto out; ++ default: + status = nfs4_recovery_handle_error(clp, status); + goto out; + } diff --git a/queue-3.4/nfsv4.1-integer-overflow-in-decode_cb_sequence_args.patch b/queue-3.4/nfsv4.1-integer-overflow-in-decode_cb_sequence_args.patch new file mode 100644 index 00000000000..9b3b8ebd71f --- /dev/null +++ b/queue-3.4/nfsv4.1-integer-overflow-in-decode_cb_sequence_args.patch @@ -0,0 +1,37 @@ +From 70bea7f2c038f04b5bc2e84f12615f79ed394d13 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Tue, 12 Jun 2012 10:37:08 +0300 +Subject: NFSv4.1: integer overflow in decode_cb_sequence_args() + +From: Dan Carpenter + +commit 0439f31c35d1da0b28988b308ea455e38e6a350d upstream. + +This seems like it could overflow on 32 bits. Use kmalloc_array() which +has overflow protection built in. + +Signed-off-by: Dan Carpenter +Signed-off-by: Trond Myklebust +Signed-off-by: Ben Hutchings +Cc: Rui Xiang +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/callback_xdr.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/nfs/callback_xdr.c ++++ b/fs/nfs/callback_xdr.c +@@ -455,9 +455,9 @@ static __be32 decode_cb_sequence_args(st + args->csa_nrclists = ntohl(*p++); + args->csa_rclists = NULL; + if (args->csa_nrclists) { +- args->csa_rclists = kmalloc(args->csa_nrclists * +- sizeof(*args->csa_rclists), +- GFP_KERNEL); ++ args->csa_rclists = kmalloc_array(args->csa_nrclists, ++ sizeof(*args->csa_rclists), ++ GFP_KERNEL); + if (unlikely(args->csa_rclists == NULL)) + goto out; + diff --git a/queue-3.4/series b/queue-3.4/series index 898e79d3dec..1db0b9672b5 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -84,3 +84,18 @@ perf-fix-error-return-code.patch tracing-keep-overwrite-in-sync-between-regular-and-snapshot-buffers.patch vfs-make-vfs_fstat-use-f_light.patch cifs-delay-super-block-destruction-until-all-cifsfileinfo-objects-are-gone.patch +nfsv4-wait-on-recovery-for-async-session-errors.patch +nfsd4-fix-xdr-decoding-of-large-non-write-compounds.patch +nfsv4.1-integer-overflow-in-decode_cb_sequence_args.patch +nfsd-don-t-run-get_file-if-nfs4_preprocess_stateid_op-return-error.patch +nfs-nfs_getaclargs.acl_len-is-a-size_t.patch +nfsv4.1-fix-a-race-in-pnfs-layoutcommit.patch +nfsv4.1-don-t-decode-skipped-layoutgets.patch +nfsv4.1-handle-nfs4err_delay-when-resetting-the-nfsv4.1-session.patch +dm-bufio-avoid-a-possible-__vmalloc-deadlock.patch +dm-snapshot-add-missing-module-aliases.patch +md-raid10-fix-enough-function-for-detecting-if-array-is-failed.patch +nfsd-nfsd_open-when-dentry_open-returns-an-error-do-not-propagate-as-struct-file.patch +dm-snapshot-avoid-snapshot-space-leak-on-crash.patch +dm-mpath-fix-race-condition-between-multipath_dtr-and-pg_init_done.patch +dm-thin-fix-discard-corruption.patch