From: Greg Kroah-Hartman Date: Sun, 19 Oct 2014 23:08:43 +0000 (+0800) Subject: 3.17-stable patches X-Git-Tag: v3.10.59~43 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2792dbfd1e02a70a8eef7e0cc3f44cb77d6c100f;p=thirdparty%2Fkernel%2Fstable-queue.git 3.17-stable patches added patches: btrfs-add-missing-compression-property-remove-in-btrfs_ioctl_setflags.patch btrfs-cleanup-error-handling-in-build_backref_tree.patch btrfs-don-t-do-async-reclaim-during-log-replay.patch btrfs-don-t-go-readonly-on-existing-qgroup-items.patch btrfs-fix-a-deadlock-in-btrfs_dev_replace_finishing.patch btrfs-fix-and-enhance-merge_extent_mapping-to-insert-best-fitted-extent-map.patch btrfs-fix-build_backref_tree-issue-with-multiple-shared-blocks.patch btrfs-fix-race-in-wait_sync-ioctl.patch btrfs-fix-the-wrong-condition-judgment-about-subset-extent-map.patch btrfs-fix-up-bounds-checking-in-lseek.patch btrfs-try-not-to-enospc-on-log-replay.patch btrfs-wake-up-transaction-thread-from-sync_fs-ioctl.patch revert-btrfs-race-free-update-of-commit-root-for-ro-snapshots.patch --- diff --git a/queue-3.17/btrfs-add-missing-compression-property-remove-in-btrfs_ioctl_setflags.patch b/queue-3.17/btrfs-add-missing-compression-property-remove-in-btrfs_ioctl_setflags.patch new file mode 100644 index 00000000000..bad832e5950 --- /dev/null +++ b/queue-3.17/btrfs-add-missing-compression-property-remove-in-btrfs_ioctl_setflags.patch @@ -0,0 +1,53 @@ +From 78a017a2c92df9b571db0a55a016280f9019c65e Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 11 Sep 2014 11:44:49 +0100 +Subject: Btrfs: add missing compression property remove in btrfs_ioctl_setflags + +From: Filipe Manana + +commit 78a017a2c92df9b571db0a55a016280f9019c65e upstream. + +The behaviour of a 'chattr -c' consists of getting the current flags, +clearing the FS_COMPR_FL bit and then sending the result to the set +flags ioctl - this means the bit FS_NOCOMP_FL isn't set in the flags +passed to the ioctl. This results in the compression property not being +cleared from the inode - it was cleared only if the bit FS_NOCOMP_FL +was set in the received flags. + +Reproducer: + + $ mkfs.btrfs -f /dev/sdd + $ mount /dev/sdd /mnt && cd /mnt + $ mkdir a + $ chattr +c a + $ touch a/file + $ lsattr a/file + --------c------- a/file + $ chattr -c a + $ touch a/file2 + $ lsattr a/file2 + --------c------- a/file2 + $ lsattr -d a + ---------------- a + +Reported-by: Andreas Schneider +Signed-off-by: Filipe Manana +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ioctl.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -332,6 +332,9 @@ static int btrfs_ioctl_setflags(struct f + goto out_drop; + + } else { ++ ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); ++ if (ret && ret != -ENODATA) ++ goto out_drop; + ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); + } + diff --git a/queue-3.17/btrfs-cleanup-error-handling-in-build_backref_tree.patch b/queue-3.17/btrfs-cleanup-error-handling-in-build_backref_tree.patch new file mode 100644 index 00000000000..66eb50003f8 --- /dev/null +++ b/queue-3.17/btrfs-cleanup-error-handling-in-build_backref_tree.patch @@ -0,0 +1,215 @@ +From 75bfb9aff45e44625260f52a5fd581b92ace3e62 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 19 Sep 2014 10:40:00 -0400 +Subject: Btrfs: cleanup error handling in build_backref_tree + +From: Josef Bacik + +commit 75bfb9aff45e44625260f52a5fd581b92ace3e62 upstream. + +When balance panics it tends to panic in the + +BUG_ON(!upper->checked); + +test, because it means it couldn't build the backref tree properly. This is +annoying to users and frankly a recoverable error, nothing in this function is +actually fatal since it is just an in-memory building of the backrefs for a +given bytenr. So go through and change all the BUG_ON()'s to ASSERT()'s, and +fix the BUG_ON(!upper->checked) thing to just return an error. + +This patch also fixes the error handling so it tears down the work we've done +properly. This code was horribly broken since we always just panic'ed instead +of actually erroring out, so it needed to be completely re-worked. With this +patch my broken image no longer panics when I mount it. Thanks, + +Signed-off-by: Josef Bacik +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/relocation.c | 88 +++++++++++++++++++++++++++++++++----------------- + 1 file changed, 59 insertions(+), 29 deletions(-) + +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -736,7 +736,8 @@ again: + err = ret; + goto out; + } +- BUG_ON(!ret || !path1->slots[0]); ++ ASSERT(ret); ++ ASSERT(path1->slots[0]); + + path1->slots[0]--; + +@@ -746,10 +747,10 @@ again: + * the backref was added previously when processing + * backref of type BTRFS_TREE_BLOCK_REF_KEY + */ +- BUG_ON(!list_is_singular(&cur->upper)); ++ ASSERT(list_is_singular(&cur->upper)); + edge = list_entry(cur->upper.next, struct backref_edge, + list[LOWER]); +- BUG_ON(!list_empty(&edge->list[UPPER])); ++ ASSERT(list_empty(&edge->list[UPPER])); + exist = edge->node[UPPER]; + /* + * add the upper level block to pending list if we need +@@ -831,7 +832,7 @@ again: + cur->cowonly = 1; + } + #else +- BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); ++ ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY); + if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { + #endif + if (key.objectid == key.offset) { +@@ -840,7 +841,7 @@ again: + * backref of this type. + */ + root = find_reloc_root(rc, cur->bytenr); +- BUG_ON(!root); ++ ASSERT(root); + cur->root = root; + break; + } +@@ -868,7 +869,7 @@ again: + } else { + upper = rb_entry(rb_node, struct backref_node, + rb_node); +- BUG_ON(!upper->checked); ++ ASSERT(upper->checked); + INIT_LIST_HEAD(&edge->list[UPPER]); + } + list_add_tail(&edge->list[LOWER], &cur->upper); +@@ -892,7 +893,7 @@ again: + + if (btrfs_root_level(&root->root_item) == cur->level) { + /* tree root */ +- BUG_ON(btrfs_root_bytenr(&root->root_item) != ++ ASSERT(btrfs_root_bytenr(&root->root_item) == + cur->bytenr); + if (should_ignore_root(root)) + list_add(&cur->list, &useless); +@@ -927,7 +928,7 @@ again: + need_check = true; + for (; level < BTRFS_MAX_LEVEL; level++) { + if (!path2->nodes[level]) { +- BUG_ON(btrfs_root_bytenr(&root->root_item) != ++ ASSERT(btrfs_root_bytenr(&root->root_item) == + lower->bytenr); + if (should_ignore_root(root)) + list_add(&lower->list, &useless); +@@ -982,7 +983,7 @@ again: + } else { + upper = rb_entry(rb_node, struct backref_node, + rb_node); +- BUG_ON(!upper->checked); ++ ASSERT(upper->checked); + INIT_LIST_HEAD(&edge->list[UPPER]); + if (!upper->owner) + upper->owner = btrfs_header_owner(eb); +@@ -1026,7 +1027,7 @@ next: + * everything goes well, connect backref nodes and insert backref nodes + * into the cache. + */ +- BUG_ON(!node->checked); ++ ASSERT(node->checked); + cowonly = node->cowonly; + if (!cowonly) { + rb_node = tree_insert(&cache->rb_root, node->bytenr, +@@ -1062,8 +1063,21 @@ next: + continue; + } + +- BUG_ON(!upper->checked); +- BUG_ON(cowonly != upper->cowonly); ++ if (!upper->checked) { ++ /* ++ * Still want to blow up for developers since this is a ++ * logic bug. ++ */ ++ ASSERT(0); ++ err = -EINVAL; ++ goto out; ++ } ++ if (cowonly != upper->cowonly) { ++ ASSERT(0); ++ err = -EINVAL; ++ goto out; ++ } ++ + if (!cowonly) { + rb_node = tree_insert(&cache->rb_root, upper->bytenr, + &upper->rb_node); +@@ -1086,7 +1100,7 @@ next: + while (!list_empty(&useless)) { + upper = list_entry(useless.next, struct backref_node, list); + list_del_init(&upper->list); +- BUG_ON(!list_empty(&upper->upper)); ++ ASSERT(list_empty(&upper->upper)); + if (upper == node) + node = NULL; + if (upper->lowest) { +@@ -1119,29 +1133,45 @@ out: + if (err) { + while (!list_empty(&useless)) { + lower = list_entry(useless.next, +- struct backref_node, upper); +- list_del_init(&lower->upper); ++ struct backref_node, list); ++ list_del_init(&lower->list); + } +- upper = node; +- INIT_LIST_HEAD(&list); +- while (upper) { +- if (RB_EMPTY_NODE(&upper->rb_node)) { +- list_splice_tail(&upper->upper, &list); +- free_backref_node(cache, upper); +- } +- +- if (list_empty(&list)) +- break; +- +- edge = list_entry(list.next, struct backref_edge, +- list[LOWER]); ++ while (!list_empty(&list)) { ++ edge = list_first_entry(&list, struct backref_edge, ++ list[UPPER]); ++ list_del(&edge->list[UPPER]); + list_del(&edge->list[LOWER]); ++ lower = edge->node[LOWER]; + upper = edge->node[UPPER]; + free_backref_edge(cache, edge); ++ ++ /* ++ * Lower is no longer linked to any upper backref nodes ++ * and isn't in the cache, we can free it ourselves. ++ */ ++ if (list_empty(&lower->upper) && ++ RB_EMPTY_NODE(&lower->rb_node)) ++ list_add(&lower->list, &useless); ++ ++ if (!RB_EMPTY_NODE(&upper->rb_node)) ++ continue; ++ ++ /* Add this guy's upper edges to the list to proces */ ++ list_for_each_entry(edge, &upper->upper, list[LOWER]) ++ list_add_tail(&edge->list[UPPER], &list); ++ if (list_empty(&upper->upper)) ++ list_add(&upper->list, &useless); ++ } ++ ++ while (!list_empty(&useless)) { ++ lower = list_entry(useless.next, ++ struct backref_node, list); ++ list_del_init(&lower->list); ++ free_backref_node(cache, lower); + } + return ERR_PTR(err); + } +- BUG_ON(node && node->detached); ++ ASSERT(!node || !node->detached); + return node; + } + diff --git a/queue-3.17/btrfs-don-t-do-async-reclaim-during-log-replay.patch b/queue-3.17/btrfs-don-t-do-async-reclaim-during-log-replay.patch new file mode 100644 index 00000000000..75b5f6d39c1 --- /dev/null +++ b/queue-3.17/btrfs-don-t-do-async-reclaim-during-log-replay.patch @@ -0,0 +1,39 @@ +From f6acfd50110b335c7af636cf1fc8e55319cae5fc Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 18 Sep 2014 11:27:17 -0400 +Subject: Btrfs: don't do async reclaim during log replay + +From: Josef Bacik + +commit f6acfd50110b335c7af636cf1fc8e55319cae5fc upstream. + +Trying to reproduce a log enospc bug I hit a panic in the async reclaim code +during log replay. This is because we use fs_info->fs_root as our root for +shrinking and such. Technically we can use whatever root we want, but let's +just not allow async reclaim while we're doing log replay. Thanks, + +Signed-off-by: Josef Bacik +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent-tree.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -4502,7 +4502,13 @@ again: + space_info->flush = 1; + } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { + used += orig_bytes; +- if (need_do_async_reclaim(space_info, root->fs_info, used) && ++ /* ++ * We will do the space reservation dance during log replay, ++ * which means we won't have fs_info->fs_root set, so don't do ++ * the async reclaim as we will panic. ++ */ ++ if (!root->fs_info->log_root_recovering && ++ need_do_async_reclaim(space_info, root->fs_info, used) && + !work_busy(&root->fs_info->async_reclaim_work)) + queue_work(system_unbound_wq, + &root->fs_info->async_reclaim_work); diff --git a/queue-3.17/btrfs-don-t-go-readonly-on-existing-qgroup-items.patch b/queue-3.17/btrfs-don-t-go-readonly-on-existing-qgroup-items.patch new file mode 100644 index 00000000000..0c83e3d998c --- /dev/null +++ b/queue-3.17/btrfs-don-t-go-readonly-on-existing-qgroup-items.patch @@ -0,0 +1,66 @@ +From 0b4699dcb65c2cff793210b07f40b98c2d423a43 Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Mon, 18 Aug 2014 14:01:17 -0700 +Subject: btrfs: don't go readonly on existing qgroup items + +From: Mark Fasheh + +commit 0b4699dcb65c2cff793210b07f40b98c2d423a43 upstream. + +btrfs_drop_snapshot() leaves subvolume qgroup items on disk after +completion. This can cause problems with snapshot creation. If a new +snapshot tries to claim the deleted subvolumes id, btrfs will get -EEXIST +from add_qgroup_item() and go read-only. The following commands will +reproduce this problem (assume btrfs is on /dev/sda and is mounted at +/btrfs) + +mkfs.btrfs -f /dev/sda +mount -t btrfs /dev/sda /btrfs/ +btrfs quota enable /btrfs/ +btrfs su sna /btrfs/ /btrfs/snap +btrfs su de /btrfs/snap +sleep 45 +umount /btrfs/ +mount -t btrfs /dev/sda /btrfs/ + +We can fix this by catching -EEXIST in add_qgroup_item() and +initializing the existing items. We have the problem of orphaned +relation items being on disk from an old snapshot but that is outside +the scope of this patch. + +Signed-off-by: Mark Fasheh +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/qgroup.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -551,9 +551,15 @@ static int add_qgroup_item(struct btrfs_ + key.type = BTRFS_QGROUP_INFO_KEY; + key.offset = qgroupid; + ++ /* ++ * Avoid a transaction abort by catching -EEXIST here. In that ++ * case, we proceed by re-initializing the existing structure ++ * on disk. ++ */ ++ + ret = btrfs_insert_empty_item(trans, quota_root, path, &key, + sizeof(*qgroup_info)); +- if (ret) ++ if (ret && ret != -EEXIST) + goto out; + + leaf = path->nodes[0]; +@@ -572,7 +578,7 @@ static int add_qgroup_item(struct btrfs_ + key.type = BTRFS_QGROUP_LIMIT_KEY; + ret = btrfs_insert_empty_item(trans, quota_root, path, &key, + sizeof(*qgroup_limit)); +- if (ret) ++ if (ret && ret != -EEXIST) + goto out; + + leaf = path->nodes[0]; diff --git a/queue-3.17/btrfs-fix-a-deadlock-in-btrfs_dev_replace_finishing.patch b/queue-3.17/btrfs-fix-a-deadlock-in-btrfs_dev_replace_finishing.patch new file mode 100644 index 00000000000..c1315402907 --- /dev/null +++ b/queue-3.17/btrfs-fix-a-deadlock-in-btrfs_dev_replace_finishing.patch @@ -0,0 +1,67 @@ +From 12b894cb288d57292b01cf158177b6d5c89a6272 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Wed, 20 Aug 2014 16:10:15 +0800 +Subject: btrfs: Fix a deadlock in btrfs_dev_replace_finishing() + +From: Qu Wenruo + +commit 12b894cb288d57292b01cf158177b6d5c89a6272 upstream. + +btrfs-transacion:5657 +[stack snip] +btrfs_bio_map() + btrfs_bio_counter_inc_blocked() + percpu_counter_inc(&fs_info->bio_counter) ###bio_counter > 0(A) + __btrfs_bio_map() + btrfs_dev_replace_lock() + mutex_lock(dev_replace->lock) ###wait mutex(B) + +btrfs:32612 +[stack snip] +btrfs_dev_replace_start() + btrfs_dev_replace_lock() + mutex_lock(dev_replace->lock) ###hold mutex(B) + btrfs_dev_replace_finishing() + btrfs_rm_dev_replace_blocked() + wait until percpu_counter_sum == 0 ###wait on bio_counter(A) + +This bug can be triggered quite easily by the following test script: +http://pastebin.com/MQmb37Cy + +This patch will fix the ABBA problem by calling +btrfs_dev_replace_unlock() before btrfs_rm_dev_replace_blocked(). + +The consistency of btrfs devices list and their superblocks is protected +by device_list_mutex, not btrfs_dev_replace_lock/unlock(). +So it is safe the move btrfs_dev_replace_unlock() before +btrfs_rm_dev_replace_blocked(). + +Reported-by: Zhao Lei +Signed-off-by: Qu Wenruo +Cc: Stefan Behrens +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/dev-replace.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/dev-replace.c ++++ b/fs/btrfs/dev-replace.c +@@ -567,6 +567,8 @@ static int btrfs_dev_replace_finishing(s + btrfs_kobj_rm_device(fs_info, src_device); + btrfs_kobj_add_device(fs_info, tgt_device); + ++ btrfs_dev_replace_unlock(dev_replace); ++ + btrfs_rm_dev_replace_blocked(fs_info); + + btrfs_rm_dev_replace_srcdev(fs_info, src_device); +@@ -580,7 +582,6 @@ static int btrfs_dev_replace_finishing(s + * superblock is scratched out so that it is no longer marked to + * belong to this filesystem. + */ +- btrfs_dev_replace_unlock(dev_replace); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); + diff --git a/queue-3.17/btrfs-fix-and-enhance-merge_extent_mapping-to-insert-best-fitted-extent-map.patch b/queue-3.17/btrfs-fix-and-enhance-merge_extent_mapping-to-insert-best-fitted-extent-map.patch new file mode 100644 index 00000000000..a060978b7d1 --- /dev/null +++ b/queue-3.17/btrfs-fix-and-enhance-merge_extent_mapping-to-insert-best-fitted-extent-map.patch @@ -0,0 +1,167 @@ +From e6c4efd87ab04e5ead363f24e6ac35ed3506d401 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Wed, 17 Sep 2014 11:53:35 +0800 +Subject: btrfs: Fix and enhance merge_extent_mapping() to insert best fitted extent map + +From: Qu Wenruo + +commit e6c4efd87ab04e5ead363f24e6ac35ed3506d401 upstream. + +The following commit enhanced the merge_extent_mapping() to reduce +fragment in extent map tree, but it can't handle case which existing +lies before map_start: +51f39 btrfs: Use right extent length when inserting overlap extent map. + +[BUG] +When existing extent map's start is before map_start, +the em->len will be minus, which will corrupt the extent map and fail to +insert the new extent map. +This will happen when someone get a large extent map, but when it is +going to insert it into extent map tree, some one has already commit +some write and split the huge extent into small parts. + +[REPRODUCER] +It is very easy to tiger using filebench with randomrw personality. +It is about 100% to reproduce when using 8G preallocated file in 60s +randonrw test. + +[FIX] +This patch can now handle any existing extent position. +Since it does not directly use existing->start, now it will find the +previous and next extent around map_start. +So the old existing->start < map_start bug will never happen again. + +[ENHANCE] +This patch will insert the best fitted extent map into extent map tree, +other than the oldest [map_start, map_start + sectorsize) or the +relatively newer but not perfect [map_start, existing->start). + +The patch will first search existing extent that does not intersects with +the desired map range [map_start, map_start + len). +The existing extent will be either before or behind map_start, and based +on the existing extent, we can find out the previous and next extent +around map_start. + +So the best fitted extent would be [prev->end, next->start). +For prev or next is not found, em->start would be prev->end and em->end +wold be next->start. + +With this patch, the fragment in extent map tree should be reduced much +more than the 51f39 commit and reduce an unneeded extent map tree search. + +Reported-by: Tsutomu Itoh +Signed-off-by: Qu Wenruo +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 79 +++++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 57 insertions(+), 22 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -6191,21 +6191,60 @@ out_fail_inode: + goto out_fail; + } + ++/* Find next extent map of a given extent map, caller needs to ensure locks */ ++static struct extent_map *next_extent_map(struct extent_map *em) ++{ ++ struct rb_node *next; ++ ++ next = rb_next(&em->rb_node); ++ if (!next) ++ return NULL; ++ return container_of(next, struct extent_map, rb_node); ++} ++ ++static struct extent_map *prev_extent_map(struct extent_map *em) ++{ ++ struct rb_node *prev; ++ ++ prev = rb_prev(&em->rb_node); ++ if (!prev) ++ return NULL; ++ return container_of(prev, struct extent_map, rb_node); ++} ++ + /* helper for btfs_get_extent. Given an existing extent in the tree, ++ * the existing extent is the nearest extent to map_start, + * and an extent that you want to insert, deal with overlap and insert +- * the new extent into the tree. ++ * the best fitted new extent into the tree. + */ + static int merge_extent_mapping(struct extent_map_tree *em_tree, + struct extent_map *existing, + struct extent_map *em, + u64 map_start) + { ++ struct extent_map *prev; ++ struct extent_map *next; ++ u64 start; ++ u64 end; + u64 start_diff; + + BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); +- start_diff = map_start - em->start; +- em->start = map_start; +- em->len = existing->start - em->start; ++ ++ if (existing->start > map_start) { ++ next = existing; ++ prev = prev_extent_map(next); ++ } else { ++ prev = existing; ++ next = next_extent_map(prev); ++ } ++ ++ start = prev ? extent_map_end(prev) : em->start; ++ start = max_t(u64, start, em->start); ++ end = next ? next->start : extent_map_end(em); ++ end = min_t(u64, end, extent_map_end(em)); ++ start_diff = start - em->start; ++ em->start = start; ++ em->len = end - start; + if (em->block_start < EXTENT_MAP_LAST_BYTE && + !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { + em->block_start += start_diff; +@@ -6482,25 +6521,21 @@ insert: + + ret = 0; + +- existing = lookup_extent_mapping(em_tree, start, len); +- if (existing && (existing->start > start || +- existing->start + existing->len <= start)) { ++ existing = search_extent_mapping(em_tree, start, len); ++ /* ++ * existing will always be non-NULL, since there must be ++ * extent causing the -EEXIST. ++ */ ++ if (start >= extent_map_end(existing) || ++ start + len <= existing->start) { ++ /* ++ * The existing extent map is the one nearest to ++ * the [start, start + len) range which overlaps ++ */ ++ err = merge_extent_mapping(em_tree, existing, ++ em, start); + free_extent_map(existing); +- existing = NULL; +- } +- if (!existing) { +- existing = lookup_extent_mapping(em_tree, em->start, +- em->len); +- if (existing) { +- err = merge_extent_mapping(em_tree, existing, +- em, start); +- free_extent_map(existing); +- if (err) { +- free_extent_map(em); +- em = NULL; +- } +- } else { +- err = -EIO; ++ if (err) { + free_extent_map(em); + em = NULL; + } diff --git a/queue-3.17/btrfs-fix-build_backref_tree-issue-with-multiple-shared-blocks.patch b/queue-3.17/btrfs-fix-build_backref_tree-issue-with-multiple-shared-blocks.patch new file mode 100644 index 00000000000..3f5d12ebc95 --- /dev/null +++ b/queue-3.17/btrfs-fix-build_backref_tree-issue-with-multiple-shared-blocks.patch @@ -0,0 +1,63 @@ +From bbe9051441effce51c9a533d2c56440df64db2d7 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 19 Sep 2014 15:43:34 -0400 +Subject: Btrfs: fix build_backref_tree issue with multiple shared blocks + +From: Josef Bacik + +commit bbe9051441effce51c9a533d2c56440df64db2d7 upstream. + +Marc Merlin sent me a broken fs image months ago where it would blow up in the +upper->checked BUG_ON() in build_backref_tree. This is because we had a +scenario like this + +block a -- level 4 (not shared) + | +block b -- level 3 (reloc block, shared) + | +block c -- level 2 (not shared) + | +block d -- level 1 (shared) + | +block e -- level 0 (shared) + +We go to build a backref tree for block e, we notice block d is shared and add +it to the list of blocks to lookup it's backrefs for. Now when we loop around +we will check edges for the block, so we will see we looked up block c last +time. So we lookup block d and then see that the block that points to it is +block c and we can just skip that edge since we've already been up this path. +The problem is because we clear need_check when we see block d (as it is shared) +we never add block b as needing to be checked. And because block c is in our +path already we bail out before we walk up to block b and add it to the backref +check list. + +To fix this we need to reset need_check if we trip over a block that doesn't +need to be checked. This will make sure that any subsequent blocks in the path +as we're walking up afterwards are added to the list to be processed. With this +patch I can now mount Marc's fs image and it'll complete the balance without +panicing. Thanks, + +Reported-by: Marc MERLIN +Signed-off-by: Josef Bacik +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/relocation.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -978,8 +978,11 @@ again: + need_check = false; + list_add_tail(&edge->list[UPPER], + &list); +- } else ++ } else { ++ if (upper->checked) ++ need_check = true; + INIT_LIST_HEAD(&edge->list[UPPER]); ++ } + } else { + upper = rb_entry(rb_node, struct backref_node, + rb_node); diff --git a/queue-3.17/btrfs-fix-race-in-wait_sync-ioctl.patch b/queue-3.17/btrfs-fix-race-in-wait_sync-ioctl.patch new file mode 100644 index 00000000000..dc00af6c422 --- /dev/null +++ b/queue-3.17/btrfs-fix-race-in-wait_sync-ioctl.patch @@ -0,0 +1,56 @@ +From 42383020beb1cfb05f5d330cc311931bc4917a97 Mon Sep 17 00:00:00 2001 +From: Sage Weil +Date: Fri, 26 Sep 2014 08:30:06 -0700 +Subject: Btrfs: fix race in WAIT_SYNC ioctl + +From: Sage Weil + +commit 42383020beb1cfb05f5d330cc311931bc4917a97 upstream. + +We check whether transid is already committed via last_trans_committed and +then search through trans_list for pending transactions. If +last_trans_committed is updated by btrfs_commit_transaction after we check +it (there is no locking), we will fail to find the committed transaction +and return EINVAL to the caller. This has been observed occasionally by +ceph-osd (which uses this ioctl heavily). + +Fix by rechecking whether the provided transid <= last_trans_committed +after the search fails, and if so return 0. + +Signed-off-by: Sage Weil +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/transaction.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -609,7 +609,6 @@ int btrfs_wait_for_commit(struct btrfs_r + if (transid <= root->fs_info->last_trans_committed) + goto out; + +- ret = -EINVAL; + /* find specified transaction */ + spin_lock(&root->fs_info->trans_lock); + list_for_each_entry(t, &root->fs_info->trans_list, list) { +@@ -625,9 +624,16 @@ int btrfs_wait_for_commit(struct btrfs_r + } + } + spin_unlock(&root->fs_info->trans_lock); +- /* The specified transaction doesn't exist */ +- if (!cur_trans) ++ ++ /* ++ * The specified transaction doesn't exist, or we ++ * raced with btrfs_commit_transaction ++ */ ++ if (!cur_trans) { ++ if (transid > root->fs_info->last_trans_committed) ++ ret = -EINVAL; + goto out; ++ } + } else { + /* find newest transaction that is committing | committed */ + spin_lock(&root->fs_info->trans_lock); diff --git a/queue-3.17/btrfs-fix-the-wrong-condition-judgment-about-subset-extent-map.patch b/queue-3.17/btrfs-fix-the-wrong-condition-judgment-about-subset-extent-map.patch new file mode 100644 index 00000000000..257832c1608 --- /dev/null +++ b/queue-3.17/btrfs-fix-the-wrong-condition-judgment-about-subset-extent-map.patch @@ -0,0 +1,37 @@ +From 32be3a1ac6d09576c57063c6c350ca36eaebdbd3 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Mon, 22 Sep 2014 09:13:03 +0800 +Subject: btrfs: Fix the wrong condition judgment about subset extent map + +From: Qu Wenruo + +commit 32be3a1ac6d09576c57063c6c350ca36eaebdbd3 upstream. + +Previous commit: btrfs: Fix and enhance merge_extent_mapping() to insert +best fitted extent map +is using wrong condition to judgement whether the range is a subset of a +existing extent map. + +This may cause bug in btrfs no-holes mode. + +This patch will correct the judgment and fix the bug. + +Signed-off-by: Qu Wenruo +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -6528,7 +6528,7 @@ insert: + * extent causing the -EEXIST. + */ + if (start >= extent_map_end(existing) || +- start + len <= existing->start) { ++ start <= existing->start) { + /* + * The existing extent map is the one nearest to + * the [start, start + len) range which overlaps diff --git a/queue-3.17/btrfs-fix-up-bounds-checking-in-lseek.patch b/queue-3.17/btrfs-fix-up-bounds-checking-in-lseek.patch new file mode 100644 index 00000000000..9674f542051 --- /dev/null +++ b/queue-3.17/btrfs-fix-up-bounds-checking-in-lseek.patch @@ -0,0 +1,88 @@ +From 4d1a40c66bed0b3fa43b9da5fbd5cbe332e4eccf Mon Sep 17 00:00:00 2001 +From: Liu Bo +Date: Tue, 16 Sep 2014 17:49:30 +0800 +Subject: Btrfs: fix up bounds checking in lseek +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Liu Bo + +commit 4d1a40c66bed0b3fa43b9da5fbd5cbe332e4eccf upstream. + +An user reported this, it is because that lseek's SEEK_SET/SEEK_CUR/SEEK_END +allow a negative value for @offset, but btrfs's SEEK_DATA/SEEK_HOLE don't +prepare for that and convert the negative @offset into unsigned type, +so we get (end < start) warning. + +[ 1269.835374] ------------[ cut here ]------------ +[ 1269.836809] WARNING: CPU: 0 PID: 1241 at fs/btrfs/extent_io.c:430 insert_state+0x11d/0x140() +[ 1269.838816] BTRFS: end < start 4094 18446744073709551615 +[ 1269.840334] CPU: 0 PID: 1241 Comm: a.out Tainted: G W 3.16.0+ #306 +[ 1269.858229] Call Trace: +[ 1269.858612] [] dump_stack+0x4e/0x68 +[ 1269.858952] [] warn_slowpath_common+0x8c/0xc0 +[ 1269.859416] [] warn_slowpath_fmt+0x46/0x50 +[ 1269.859929] [] insert_state+0x11d/0x140 +[ 1269.860409] [] __set_extent_bit+0x3b6/0x4e0 +[ 1269.860805] [] lock_extent_bits+0x87/0x200 +[ 1269.861697] [] btrfs_file_llseek+0x148/0x2a0 +[ 1269.862168] [] SyS_lseek+0xae/0xc0 +[ 1269.862620] [] system_call_fastpath+0x16/0x1b +[ 1269.862970] ---[ end trace 4d33ea885832054b ]--- + +This assumes that btrfs starts finding DATA/HOLE from the beginning of file +if the assigned @offset is negative. + +Also we add alignment for lock_extent_bits 's range. + +Reported-by: Toralf Förster +Signed-off-by: Liu Bo +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/file.c | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -2621,23 +2621,28 @@ static int find_desired_extent(struct in + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map *em = NULL; + struct extent_state *cached_state = NULL; +- u64 lockstart = *offset; +- u64 lockend = i_size_read(inode); +- u64 start = *offset; +- u64 len = i_size_read(inode); ++ u64 lockstart; ++ u64 lockend; ++ u64 start; ++ u64 len; + int ret = 0; + +- lockend = max_t(u64, root->sectorsize, lockend); ++ if (inode->i_size == 0) ++ return -ENXIO; ++ ++ /* ++ * *offset can be negative, in this case we start finding DATA/HOLE from ++ * the very start of the file. ++ */ ++ start = max_t(loff_t, 0, *offset); ++ ++ lockstart = round_down(start, root->sectorsize); ++ lockend = round_up(i_size_read(inode), root->sectorsize); + if (lockend <= lockstart) + lockend = lockstart + root->sectorsize; +- + lockend--; + len = lockend - lockstart + 1; + +- len = max_t(u64, len, root->sectorsize); +- if (inode->i_size == 0) +- return -ENXIO; +- + lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, + &cached_state); + diff --git a/queue-3.17/btrfs-try-not-to-enospc-on-log-replay.patch b/queue-3.17/btrfs-try-not-to-enospc-on-log-replay.patch new file mode 100644 index 00000000000..539b40ea6fe --- /dev/null +++ b/queue-3.17/btrfs-try-not-to-enospc-on-log-replay.patch @@ -0,0 +1,40 @@ +From 1d52c78afbbf80b58299e076a159617d6b42fe3c Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 18 Sep 2014 11:30:44 -0400 +Subject: Btrfs: try not to ENOSPC on log replay + +From: Josef Bacik + +commit 1d52c78afbbf80b58299e076a159617d6b42fe3c upstream. + +When doing log replay we may have to update inodes, which traditionally goes +through our delayed inode stuff. This will try to move space over from the +trans handle, but we don't reserve space in our trans handle on replay since we +don't know how much we will need, so instead we try to flush. But because we +have a trans handle open we won't flush anything, so if we are out of reserve +space we will simply return ENOSPC. Since we know that if an operation made it +into the log then we definitely had space before the box bought the farm then we +don't need to worry about doing this space reservation. Use the +fs_info->log_root_recovering flag to skip the delayed inode stuff and update the +item directly. Thanks, + +Signed-off-by: Josef Bacik +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -3662,7 +3662,8 @@ noinline int btrfs_update_inode(struct b + * without delay + */ + if (!btrfs_is_free_space_inode(inode) +- && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { ++ && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID ++ && !root->fs_info->log_root_recovering) { + btrfs_update_root_times(trans, root); + + ret = btrfs_delayed_update_inode(trans, root, inode); diff --git a/queue-3.17/btrfs-wake-up-transaction-thread-from-sync_fs-ioctl.patch b/queue-3.17/btrfs-wake-up-transaction-thread-from-sync_fs-ioctl.patch new file mode 100644 index 00000000000..d2a7d4645f4 --- /dev/null +++ b/queue-3.17/btrfs-wake-up-transaction-thread-from-sync_fs-ioctl.patch @@ -0,0 +1,39 @@ +From 2fad4e83e12591eb3bd213875b9edc2d18e93383 Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Wed, 23 Jul 2014 14:39:35 +0200 +Subject: btrfs: wake up transaction thread from SYNC_FS ioctl + +From: David Sterba + +commit 2fad4e83e12591eb3bd213875b9edc2d18e93383 upstream. + +The transaction thread may want to do more work, namely it pokes the +cleaner ktread that will start processing uncleaned subvols. + +This can be triggered by user via the 'btrfs fi sync' command, otherwise +there was a delay up to 30 seconds before the cleaner started to clean +old snapshots. + +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ioctl.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -5283,6 +5283,12 @@ long btrfs_ioctl(struct file *file, unsi + if (ret) + return ret; + ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); ++ /* ++ * The transaction thread may want to do more work, ++ * namely it pokes the cleaner ktread that will start ++ * processing uncleaned subvols. ++ */ ++ wake_up_process(root->fs_info->transaction_kthread); + return ret; + } + case BTRFS_IOC_START_SYNC: diff --git a/queue-3.17/revert-btrfs-race-free-update-of-commit-root-for-ro-snapshots.patch b/queue-3.17/revert-btrfs-race-free-update-of-commit-root-for-ro-snapshots.patch new file mode 100644 index 00000000000..6ae774d5284 --- /dev/null +++ b/queue-3.17/revert-btrfs-race-free-update-of-commit-root-for-ro-snapshots.patch @@ -0,0 +1,120 @@ +From d37973082b453ba6b89ec07eb7b84305895d35e1 Mon Sep 17 00:00:00 2001 +From: Chris Mason +Date: Wed, 15 Oct 2014 13:50:56 -0700 +Subject: Revert "Btrfs: race free update of commit root for ro snapshots" + +From: Chris Mason + +commit d37973082b453ba6b89ec07eb7b84305895d35e1 upstream. + +This reverts commit 9c3b306e1c9e6be4be09e99a8fe2227d1005effc. + +Switching only one commit root during a transaction is wrong because it +leads the fs into an inconsistent state. All commit roots should be +switched at once, at transaction commit time, otherwise backref walking +can often miss important references that were only accessible through +the old commit root. Plus, the root item for the snapshot's root wasn't +getting updated and preventing the next transaction commit to do it. + +This made several users get into random corruption issues after creation +of readonly snapshots. + +A regression test for xfstests will follow soon. + +Cc: stable@vger.kernel.org # 3.17 +Signed-off-by: Filipe Manana +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 36 ------------------------------------ + fs/btrfs/ioctl.c | 33 +++++++++++++++++++++++++++++++++ + 2 files changed, 33 insertions(+), 36 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -5203,42 +5203,6 @@ struct inode *btrfs_lookup_dentry(struct + iput(inode); + inode = ERR_PTR(ret); + } +- /* +- * If orphan cleanup did remove any orphans, it means the tree +- * was modified and therefore the commit root is not the same as +- * the current root anymore. This is a problem, because send +- * uses the commit root and therefore can see inode items that +- * don't exist in the current root anymore, and for example make +- * calls to btrfs_iget, which will do tree lookups based on the +- * current root and not on the commit root. Those lookups will +- * fail, returning a -ESTALE error, and making send fail with +- * that error. So make sure a send does not see any orphans we +- * have just removed, and that it will see the same inodes +- * regardless of whether a transaction commit happened before +- * it started (meaning that the commit root will be the same as +- * the current root) or not. +- */ +- if (sub_root->node != sub_root->commit_root) { +- u64 sub_flags = btrfs_root_flags(&sub_root->root_item); +- +- if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) { +- struct extent_buffer *eb; +- +- /* +- * Assert we can't have races between dentry +- * lookup called through the snapshot creation +- * ioctl and the VFS. +- */ +- ASSERT(mutex_is_locked(&dir->i_mutex)); +- +- down_write(&root->fs_info->commit_root_sem); +- eb = sub_root->commit_root; +- sub_root->commit_root = +- btrfs_root_node(sub_root); +- up_write(&root->fs_info->commit_root_sem); +- free_extent_buffer(eb); +- } +- } + } + + return inode; +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -714,6 +714,39 @@ static int create_snapshot(struct btrfs_ + if (ret) + goto fail; + ++ ret = btrfs_orphan_cleanup(pending_snapshot->snap); ++ if (ret) ++ goto fail; ++ ++ /* ++ * If orphan cleanup did remove any orphans, it means the tree was ++ * modified and therefore the commit root is not the same as the ++ * current root anymore. This is a problem, because send uses the ++ * commit root and therefore can see inode items that don't exist ++ * in the current root anymore, and for example make calls to ++ * btrfs_iget, which will do tree lookups based on the current root ++ * and not on the commit root. Those lookups will fail, returning a ++ * -ESTALE error, and making send fail with that error. So make sure ++ * a send does not see any orphans we have just removed, and that it ++ * will see the same inodes regardless of whether a transaction ++ * commit happened before it started (meaning that the commit root ++ * will be the same as the current root) or not. ++ */ ++ if (readonly && pending_snapshot->snap->node != ++ pending_snapshot->snap->commit_root) { ++ trans = btrfs_join_transaction(pending_snapshot->snap); ++ if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { ++ ret = PTR_ERR(trans); ++ goto fail; ++ } ++ if (!IS_ERR(trans)) { ++ ret = btrfs_commit_transaction(trans, ++ pending_snapshot->snap); ++ if (ret) ++ goto fail; ++ } ++ } ++ + inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); diff --git a/queue-3.17/series b/queue-3.17/series index 19607c0b9ad..629a06e7e47 100644 --- a/queue-3.17/series +++ b/queue-3.17/series @@ -1 +1,13 @@ btrfs-wake-up-transaction-thread-from-sync_fs-ioctl.patch +btrfs-don-t-go-readonly-on-existing-qgroup-items.patch +btrfs-fix-a-deadlock-in-btrfs_dev_replace_finishing.patch +btrfs-add-missing-compression-property-remove-in-btrfs_ioctl_setflags.patch +btrfs-fix-up-bounds-checking-in-lseek.patch +btrfs-fix-and-enhance-merge_extent_mapping-to-insert-best-fitted-extent-map.patch +btrfs-don-t-do-async-reclaim-during-log-replay.patch +btrfs-try-not-to-enospc-on-log-replay.patch +btrfs-cleanup-error-handling-in-build_backref_tree.patch +btrfs-fix-build_backref_tree-issue-with-multiple-shared-blocks.patch +btrfs-fix-the-wrong-condition-judgment-about-subset-extent-map.patch +btrfs-fix-race-in-wait_sync-ioctl.patch +revert-btrfs-race-free-update-of-commit-root-for-ro-snapshots.patch