--- /dev/null
+From 78a017a2c92df9b571db0a55a016280f9019c65e Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 11 Sep 2014 11:44:49 +0100
+Subject: Btrfs: add missing compression property remove in btrfs_ioctl_setflags
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 78a017a2c92df9b571db0a55a016280f9019c65e upstream.
+
+The behaviour of a 'chattr -c' consists of getting the current flags,
+clearing the FS_COMPR_FL bit and then sending the result to the set
+flags ioctl - this means the bit FS_NOCOMP_FL isn't set in the flags
+passed to the ioctl. This results in the compression property not being
+cleared from the inode - it was cleared only if the bit FS_NOCOMP_FL
+was set in the received flags.
+
+Reproducer:
+
+ $ mkfs.btrfs -f /dev/sdd
+ $ mount /dev/sdd /mnt && cd /mnt
+ $ mkdir a
+ $ chattr +c a
+ $ touch a/file
+ $ lsattr a/file
+ --------c------- a/file
+ $ chattr -c a
+ $ touch a/file2
+ $ lsattr a/file2
+ --------c------- a/file2
+ $ lsattr -d a
+ ---------------- a
+
+Reported-by: Andreas Schneider <asn@cryptomilk.org>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -332,6 +332,9 @@ static int btrfs_ioctl_setflags(struct f
+ goto out_drop;
+
+ } else {
++ ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
++ if (ret && ret != -ENODATA)
++ goto out_drop;
+ ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
+ }
+
--- /dev/null
+From 75bfb9aff45e44625260f52a5fd581b92ace3e62 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Fri, 19 Sep 2014 10:40:00 -0400
+Subject: Btrfs: cleanup error handling in build_backref_tree
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit 75bfb9aff45e44625260f52a5fd581b92ace3e62 upstream.
+
+When balance panics it tends to panic in the
+
+BUG_ON(!upper->checked);
+
+test, because it means it couldn't build the backref tree properly. This is
+annoying to users and frankly a recoverable error, nothing in this function is
+actually fatal since it is just an in-memory building of the backrefs for a
+given bytenr. So go through and change all the BUG_ON()'s to ASSERT()'s, and
+fix the BUG_ON(!upper->checked) thing to just return an error.
+
+This patch also fixes the error handling so it tears down the work we've done
+properly. This code was horribly broken since we always just panic'ed instead
+of actually erroring out, so it needed to be completely re-worked. With this
+patch my broken image no longer panics when I mount it. Thanks,
+
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/relocation.c | 88 +++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 59 insertions(+), 29 deletions(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -736,7 +736,8 @@ again:
+ err = ret;
+ goto out;
+ }
+- BUG_ON(!ret || !path1->slots[0]);
++ ASSERT(ret);
++ ASSERT(path1->slots[0]);
+
+ path1->slots[0]--;
+
+@@ -746,10 +747,10 @@ again:
+ * the backref was added previously when processing
+ * backref of type BTRFS_TREE_BLOCK_REF_KEY
+ */
+- BUG_ON(!list_is_singular(&cur->upper));
++ ASSERT(list_is_singular(&cur->upper));
+ edge = list_entry(cur->upper.next, struct backref_edge,
+ list[LOWER]);
+- BUG_ON(!list_empty(&edge->list[UPPER]));
++ ASSERT(list_empty(&edge->list[UPPER]));
+ exist = edge->node[UPPER];
+ /*
+ * add the upper level block to pending list if we need
+@@ -831,7 +832,7 @@ again:
+ cur->cowonly = 1;
+ }
+ #else
+- BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
++ ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY);
+ if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
+ #endif
+ if (key.objectid == key.offset) {
+@@ -840,7 +841,7 @@ again:
+ * backref of this type.
+ */
+ root = find_reloc_root(rc, cur->bytenr);
+- BUG_ON(!root);
++ ASSERT(root);
+ cur->root = root;
+ break;
+ }
+@@ -868,7 +869,7 @@ again:
+ } else {
+ upper = rb_entry(rb_node, struct backref_node,
+ rb_node);
+- BUG_ON(!upper->checked);
++ ASSERT(upper->checked);
+ INIT_LIST_HEAD(&edge->list[UPPER]);
+ }
+ list_add_tail(&edge->list[LOWER], &cur->upper);
+@@ -892,7 +893,7 @@ again:
+
+ if (btrfs_root_level(&root->root_item) == cur->level) {
+ /* tree root */
+- BUG_ON(btrfs_root_bytenr(&root->root_item) !=
++ ASSERT(btrfs_root_bytenr(&root->root_item) ==
+ cur->bytenr);
+ if (should_ignore_root(root))
+ list_add(&cur->list, &useless);
+@@ -927,7 +928,7 @@ again:
+ need_check = true;
+ for (; level < BTRFS_MAX_LEVEL; level++) {
+ if (!path2->nodes[level]) {
+- BUG_ON(btrfs_root_bytenr(&root->root_item) !=
++ ASSERT(btrfs_root_bytenr(&root->root_item) ==
+ lower->bytenr);
+ if (should_ignore_root(root))
+ list_add(&lower->list, &useless);
+@@ -982,7 +983,7 @@ again:
+ } else {
+ upper = rb_entry(rb_node, struct backref_node,
+ rb_node);
+- BUG_ON(!upper->checked);
++ ASSERT(upper->checked);
+ INIT_LIST_HEAD(&edge->list[UPPER]);
+ if (!upper->owner)
+ upper->owner = btrfs_header_owner(eb);
+@@ -1026,7 +1027,7 @@ next:
+ * everything goes well, connect backref nodes and insert backref nodes
+ * into the cache.
+ */
+- BUG_ON(!node->checked);
++ ASSERT(node->checked);
+ cowonly = node->cowonly;
+ if (!cowonly) {
+ rb_node = tree_insert(&cache->rb_root, node->bytenr,
+@@ -1062,8 +1063,21 @@ next:
+ continue;
+ }
+
+- BUG_ON(!upper->checked);
+- BUG_ON(cowonly != upper->cowonly);
++ if (!upper->checked) {
++ /*
++ * Still want to blow up for developers since this is a
++ * logic bug.
++ */
++ ASSERT(0);
++ err = -EINVAL;
++ goto out;
++ }
++ if (cowonly != upper->cowonly) {
++ ASSERT(0);
++ err = -EINVAL;
++ goto out;
++ }
++
+ if (!cowonly) {
+ rb_node = tree_insert(&cache->rb_root, upper->bytenr,
+ &upper->rb_node);
+@@ -1086,7 +1100,7 @@ next:
+ while (!list_empty(&useless)) {
+ upper = list_entry(useless.next, struct backref_node, list);
+ list_del_init(&upper->list);
+- BUG_ON(!list_empty(&upper->upper));
++ ASSERT(list_empty(&upper->upper));
+ if (upper == node)
+ node = NULL;
+ if (upper->lowest) {
+@@ -1119,29 +1133,45 @@ out:
+ if (err) {
+ while (!list_empty(&useless)) {
+ lower = list_entry(useless.next,
+- struct backref_node, upper);
+- list_del_init(&lower->upper);
++ struct backref_node, list);
++ list_del_init(&lower->list);
+ }
+- upper = node;
+- INIT_LIST_HEAD(&list);
+- while (upper) {
+- if (RB_EMPTY_NODE(&upper->rb_node)) {
+- list_splice_tail(&upper->upper, &list);
+- free_backref_node(cache, upper);
+- }
+-
+- if (list_empty(&list))
+- break;
+-
+- edge = list_entry(list.next, struct backref_edge,
+- list[LOWER]);
++ while (!list_empty(&list)) {
++ edge = list_first_entry(&list, struct backref_edge,
++ list[UPPER]);
++ list_del(&edge->list[UPPER]);
+ list_del(&edge->list[LOWER]);
++ lower = edge->node[LOWER];
+ upper = edge->node[UPPER];
+ free_backref_edge(cache, edge);
++
++ /*
++ * Lower is no longer linked to any upper backref nodes
++ * and isn't in the cache, we can free it ourselves.
++ */
++ if (list_empty(&lower->upper) &&
++ RB_EMPTY_NODE(&lower->rb_node))
++ list_add(&lower->list, &useless);
++
++ if (!RB_EMPTY_NODE(&upper->rb_node))
++ continue;
++
++ /* Add this guy's upper edges to the list to proces */
++ list_for_each_entry(edge, &upper->upper, list[LOWER])
++ list_add_tail(&edge->list[UPPER], &list);
++ if (list_empty(&upper->upper))
++ list_add(&upper->list, &useless);
++ }
++
++ while (!list_empty(&useless)) {
++ lower = list_entry(useless.next,
++ struct backref_node, list);
++ list_del_init(&lower->list);
++ free_backref_node(cache, lower);
+ }
+ return ERR_PTR(err);
+ }
+- BUG_ON(node && node->detached);
++ ASSERT(!node || !node->detached);
+ return node;
+ }
+
--- /dev/null
+From f6acfd50110b335c7af636cf1fc8e55319cae5fc Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Thu, 18 Sep 2014 11:27:17 -0400
+Subject: Btrfs: don't do async reclaim during log replay
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit f6acfd50110b335c7af636cf1fc8e55319cae5fc upstream.
+
+Trying to reproduce a log enospc bug I hit a panic in the async reclaim code
+during log replay. This is because we use fs_info->fs_root as our root for
+shrinking and such. Technically we can use whatever root we want, but let's
+just not allow async reclaim while we're doing log replay. Thanks,
+
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4502,7 +4502,13 @@ again:
+ space_info->flush = 1;
+ } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
+ used += orig_bytes;
+- if (need_do_async_reclaim(space_info, root->fs_info, used) &&
++ /*
++ * We will do the space reservation dance during log replay,
++ * which means we won't have fs_info->fs_root set, so don't do
++ * the async reclaim as we will panic.
++ */
++ if (!root->fs_info->log_root_recovering &&
++ need_do_async_reclaim(space_info, root->fs_info, used) &&
+ !work_busy(&root->fs_info->async_reclaim_work))
+ queue_work(system_unbound_wq,
+ &root->fs_info->async_reclaim_work);
--- /dev/null
+From 0b4699dcb65c2cff793210b07f40b98c2d423a43 Mon Sep 17 00:00:00 2001
+From: Mark Fasheh <mfasheh@suse.de>
+Date: Mon, 18 Aug 2014 14:01:17 -0700
+Subject: btrfs: don't go readonly on existing qgroup items
+
+From: Mark Fasheh <mfasheh@suse.de>
+
+commit 0b4699dcb65c2cff793210b07f40b98c2d423a43 upstream.
+
+btrfs_drop_snapshot() leaves subvolume qgroup items on disk after
+completion. This can cause problems with snapshot creation. If a new
+snapshot tries to claim the deleted subvolumes id, btrfs will get -EEXIST
+from add_qgroup_item() and go read-only. The following commands will
+reproduce this problem (assume btrfs is on /dev/sda and is mounted at
+/btrfs)
+
+mkfs.btrfs -f /dev/sda
+mount -t btrfs /dev/sda /btrfs/
+btrfs quota enable /btrfs/
+btrfs su sna /btrfs/ /btrfs/snap
+btrfs su de /btrfs/snap
+sleep 45
+umount /btrfs/
+mount -t btrfs /dev/sda /btrfs/
+
+We can fix this by catching -EEXIST in add_qgroup_item() and
+initializing the existing items. We have the problem of orphaned
+relation items being on disk from an old snapshot but that is outside
+the scope of this patch.
+
+Signed-off-by: Mark Fasheh <mfasheh@suse.de>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -551,9 +551,15 @@ static int add_qgroup_item(struct btrfs_
+ key.type = BTRFS_QGROUP_INFO_KEY;
+ key.offset = qgroupid;
+
++ /*
++ * Avoid a transaction abort by catching -EEXIST here. In that
++ * case, we proceed by re-initializing the existing structure
++ * on disk.
++ */
++
+ ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
+ sizeof(*qgroup_info));
+- if (ret)
++ if (ret && ret != -EEXIST)
+ goto out;
+
+ leaf = path->nodes[0];
+@@ -572,7 +578,7 @@ static int add_qgroup_item(struct btrfs_
+ key.type = BTRFS_QGROUP_LIMIT_KEY;
+ ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
+ sizeof(*qgroup_limit));
+- if (ret)
++ if (ret && ret != -EEXIST)
+ goto out;
+
+ leaf = path->nodes[0];
--- /dev/null
+From 12b894cb288d57292b01cf158177b6d5c89a6272 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <quwenruo@cn.fujitsu.com>
+Date: Wed, 20 Aug 2014 16:10:15 +0800
+Subject: btrfs: Fix a deadlock in btrfs_dev_replace_finishing()
+
+From: Qu Wenruo <quwenruo@cn.fujitsu.com>
+
+commit 12b894cb288d57292b01cf158177b6d5c89a6272 upstream.
+
+btrfs-transacion:5657
+[stack snip]
+btrfs_bio_map()
+ btrfs_bio_counter_inc_blocked()
+ percpu_counter_inc(&fs_info->bio_counter) ###bio_counter > 0(A)
+ __btrfs_bio_map()
+ btrfs_dev_replace_lock()
+ mutex_lock(dev_replace->lock) ###wait mutex(B)
+
+btrfs:32612
+[stack snip]
+btrfs_dev_replace_start()
+ btrfs_dev_replace_lock()
+ mutex_lock(dev_replace->lock) ###hold mutex(B)
+ btrfs_dev_replace_finishing()
+ btrfs_rm_dev_replace_blocked()
+ wait until percpu_counter_sum == 0 ###wait on bio_counter(A)
+
+This bug can be triggered quite easily by the following test script:
+http://pastebin.com/MQmb37Cy
+
+This patch will fix the ABBA problem by calling
+btrfs_dev_replace_unlock() before btrfs_rm_dev_replace_blocked().
+
+The consistency of btrfs devices list and their superblocks is protected
+by device_list_mutex, not btrfs_dev_replace_lock/unlock().
+So it is safe the move btrfs_dev_replace_unlock() before
+btrfs_rm_dev_replace_blocked().
+
+Reported-by: Zhao Lei <zhaolei@cn.fujitsu.com>
+Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
+Cc: Stefan Behrens <sbehrens@giantdisaster.de>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/dev-replace.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -567,6 +567,8 @@ static int btrfs_dev_replace_finishing(s
+ btrfs_kobj_rm_device(fs_info, src_device);
+ btrfs_kobj_add_device(fs_info, tgt_device);
+
++ btrfs_dev_replace_unlock(dev_replace);
++
+ btrfs_rm_dev_replace_blocked(fs_info);
+
+ btrfs_rm_dev_replace_srcdev(fs_info, src_device);
+@@ -580,7 +582,6 @@ static int btrfs_dev_replace_finishing(s
+ * superblock is scratched out so that it is no longer marked to
+ * belong to this filesystem.
+ */
+- btrfs_dev_replace_unlock(dev_replace);
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&root->fs_info->chunk_mutex);
+
--- /dev/null
+From e6c4efd87ab04e5ead363f24e6ac35ed3506d401 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <quwenruo@cn.fujitsu.com>
+Date: Wed, 17 Sep 2014 11:53:35 +0800
+Subject: btrfs: Fix and enhance merge_extent_mapping() to insert best fitted extent map
+
+From: Qu Wenruo <quwenruo@cn.fujitsu.com>
+
+commit e6c4efd87ab04e5ead363f24e6ac35ed3506d401 upstream.
+
+The following commit enhanced the merge_extent_mapping() to reduce
+fragment in extent map tree, but it can't handle case which existing
+lies before map_start:
+51f39 btrfs: Use right extent length when inserting overlap extent map.
+
+[BUG]
+When existing extent map's start is before map_start,
+the em->len will be minus, which will corrupt the extent map and fail to
+insert the new extent map.
+This will happen when someone get a large extent map, but when it is
+going to insert it into extent map tree, some one has already commit
+some write and split the huge extent into small parts.
+
+[REPRODUCER]
+It is very easy to tiger using filebench with randomrw personality.
+It is about 100% to reproduce when using 8G preallocated file in 60s
+randonrw test.
+
+[FIX]
+This patch can now handle any existing extent position.
+Since it does not directly use existing->start, now it will find the
+previous and next extent around map_start.
+So the old existing->start < map_start bug will never happen again.
+
+[ENHANCE]
+This patch will insert the best fitted extent map into extent map tree,
+other than the oldest [map_start, map_start + sectorsize) or the
+relatively newer but not perfect [map_start, existing->start).
+
+The patch will first search existing extent that does not intersects with
+the desired map range [map_start, map_start + len).
+The existing extent will be either before or behind map_start, and based
+on the existing extent, we can find out the previous and next extent
+around map_start.
+
+So the best fitted extent would be [prev->end, next->start).
+For prev or next is not found, em->start would be prev->end and em->end
+wold be next->start.
+
+With this patch, the fragment in extent map tree should be reduced much
+more than the 51f39 commit and reduce an unneeded extent map tree search.
+
+Reported-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
+Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c | 79 +++++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 57 insertions(+), 22 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -6191,21 +6191,60 @@ out_fail_inode:
+ goto out_fail;
+ }
+
++/* Find next extent map of a given extent map, caller needs to ensure locks */
++static struct extent_map *next_extent_map(struct extent_map *em)
++{
++ struct rb_node *next;
++
++ next = rb_next(&em->rb_node);
++ if (!next)
++ return NULL;
++ return container_of(next, struct extent_map, rb_node);
++}
++
++static struct extent_map *prev_extent_map(struct extent_map *em)
++{
++ struct rb_node *prev;
++
++ prev = rb_prev(&em->rb_node);
++ if (!prev)
++ return NULL;
++ return container_of(prev, struct extent_map, rb_node);
++}
++
+ /* helper for btfs_get_extent. Given an existing extent in the tree,
++ * the existing extent is the nearest extent to map_start,
+ * and an extent that you want to insert, deal with overlap and insert
+- * the new extent into the tree.
++ * the best fitted new extent into the tree.
+ */
+ static int merge_extent_mapping(struct extent_map_tree *em_tree,
+ struct extent_map *existing,
+ struct extent_map *em,
+ u64 map_start)
+ {
++ struct extent_map *prev;
++ struct extent_map *next;
++ u64 start;
++ u64 end;
+ u64 start_diff;
+
+ BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
+- start_diff = map_start - em->start;
+- em->start = map_start;
+- em->len = existing->start - em->start;
++
++ if (existing->start > map_start) {
++ next = existing;
++ prev = prev_extent_map(next);
++ } else {
++ prev = existing;
++ next = next_extent_map(prev);
++ }
++
++ start = prev ? extent_map_end(prev) : em->start;
++ start = max_t(u64, start, em->start);
++ end = next ? next->start : extent_map_end(em);
++ end = min_t(u64, end, extent_map_end(em));
++ start_diff = start - em->start;
++ em->start = start;
++ em->len = end - start;
+ if (em->block_start < EXTENT_MAP_LAST_BYTE &&
+ !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
+ em->block_start += start_diff;
+@@ -6482,25 +6521,21 @@ insert:
+
+ ret = 0;
+
+- existing = lookup_extent_mapping(em_tree, start, len);
+- if (existing && (existing->start > start ||
+- existing->start + existing->len <= start)) {
++ existing = search_extent_mapping(em_tree, start, len);
++ /*
++ * existing will always be non-NULL, since there must be
++ * extent causing the -EEXIST.
++ */
++ if (start >= extent_map_end(existing) ||
++ start + len <= existing->start) {
++ /*
++ * The existing extent map is the one nearest to
++ * the [start, start + len) range which overlaps
++ */
++ err = merge_extent_mapping(em_tree, existing,
++ em, start);
+ free_extent_map(existing);
+- existing = NULL;
+- }
+- if (!existing) {
+- existing = lookup_extent_mapping(em_tree, em->start,
+- em->len);
+- if (existing) {
+- err = merge_extent_mapping(em_tree, existing,
+- em, start);
+- free_extent_map(existing);
+- if (err) {
+- free_extent_map(em);
+- em = NULL;
+- }
+- } else {
+- err = -EIO;
++ if (err) {
+ free_extent_map(em);
+ em = NULL;
+ }
--- /dev/null
+From bbe9051441effce51c9a533d2c56440df64db2d7 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Fri, 19 Sep 2014 15:43:34 -0400
+Subject: Btrfs: fix build_backref_tree issue with multiple shared blocks
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit bbe9051441effce51c9a533d2c56440df64db2d7 upstream.
+
+Marc Merlin sent me a broken fs image months ago where it would blow up in the
+upper->checked BUG_ON() in build_backref_tree. This is because we had a
+scenario like this
+
+block a -- level 4 (not shared)
+ |
+block b -- level 3 (reloc block, shared)
+ |
+block c -- level 2 (not shared)
+ |
+block d -- level 1 (shared)
+ |
+block e -- level 0 (shared)
+
+We go to build a backref tree for block e, we notice block d is shared and add
+it to the list of blocks to lookup it's backrefs for. Now when we loop around
+we will check edges for the block, so we will see we looked up block c last
+time. So we lookup block d and then see that the block that points to it is
+block c and we can just skip that edge since we've already been up this path.
+The problem is because we clear need_check when we see block d (as it is shared)
+we never add block b as needing to be checked. And because block c is in our
+path already we bail out before we walk up to block b and add it to the backref
+check list.
+
+To fix this we need to reset need_check if we trip over a block that doesn't
+need to be checked. This will make sure that any subsequent blocks in the path
+as we're walking up afterwards are added to the list to be processed. With this
+patch I can now mount Marc's fs image and it'll complete the balance without
+panicing. Thanks,
+
+Reported-by: Marc MERLIN <marc@merlins.org>
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/relocation.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -978,8 +978,11 @@ again:
+ need_check = false;
+ list_add_tail(&edge->list[UPPER],
+ &list);
+- } else
++ } else {
++ if (upper->checked)
++ need_check = true;
+ INIT_LIST_HEAD(&edge->list[UPPER]);
++ }
+ } else {
+ upper = rb_entry(rb_node, struct backref_node,
+ rb_node);
--- /dev/null
+From 42383020beb1cfb05f5d330cc311931bc4917a97 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage@redhat.com>
+Date: Fri, 26 Sep 2014 08:30:06 -0700
+Subject: Btrfs: fix race in WAIT_SYNC ioctl
+
+From: Sage Weil <sage@redhat.com>
+
+commit 42383020beb1cfb05f5d330cc311931bc4917a97 upstream.
+
+We check whether transid is already committed via last_trans_committed and
+then search through trans_list for pending transactions. If
+last_trans_committed is updated by btrfs_commit_transaction after we check
+it (there is no locking), we will fail to find the committed transaction
+and return EINVAL to the caller. This has been observed occasionally by
+ceph-osd (which uses this ioctl heavily).
+
+Fix by rechecking whether the provided transid <= last_trans_committed
+after the search fails, and if so return 0.
+
+Signed-off-by: Sage Weil <sage@redhat.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/transaction.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -609,7 +609,6 @@ int btrfs_wait_for_commit(struct btrfs_r
+ if (transid <= root->fs_info->last_trans_committed)
+ goto out;
+
+- ret = -EINVAL;
+ /* find specified transaction */
+ spin_lock(&root->fs_info->trans_lock);
+ list_for_each_entry(t, &root->fs_info->trans_list, list) {
+@@ -625,9 +624,16 @@ int btrfs_wait_for_commit(struct btrfs_r
+ }
+ }
+ spin_unlock(&root->fs_info->trans_lock);
+- /* The specified transaction doesn't exist */
+- if (!cur_trans)
++
++ /*
++ * The specified transaction doesn't exist, or we
++ * raced with btrfs_commit_transaction
++ */
++ if (!cur_trans) {
++ if (transid > root->fs_info->last_trans_committed)
++ ret = -EINVAL;
+ goto out;
++ }
+ } else {
+ /* find newest transaction that is committing | committed */
+ spin_lock(&root->fs_info->trans_lock);
--- /dev/null
+From 32be3a1ac6d09576c57063c6c350ca36eaebdbd3 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <quwenruo@cn.fujitsu.com>
+Date: Mon, 22 Sep 2014 09:13:03 +0800
+Subject: btrfs: Fix the wrong condition judgment about subset extent map
+
+From: Qu Wenruo <quwenruo@cn.fujitsu.com>
+
+commit 32be3a1ac6d09576c57063c6c350ca36eaebdbd3 upstream.
+
+Previous commit: btrfs: Fix and enhance merge_extent_mapping() to insert
+best fitted extent map
+is using wrong condition to judgement whether the range is a subset of a
+existing extent map.
+
+This may cause bug in btrfs no-holes mode.
+
+This patch will correct the judgment and fix the bug.
+
+Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -6528,7 +6528,7 @@ insert:
+ * extent causing the -EEXIST.
+ */
+ if (start >= extent_map_end(existing) ||
+- start + len <= existing->start) {
++ start <= existing->start) {
+ /*
+ * The existing extent map is the one nearest to
+ * the [start, start + len) range which overlaps
--- /dev/null
+From 4d1a40c66bed0b3fa43b9da5fbd5cbe332e4eccf Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Tue, 16 Sep 2014 17:49:30 +0800
+Subject: Btrfs: fix up bounds checking in lseek
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit 4d1a40c66bed0b3fa43b9da5fbd5cbe332e4eccf upstream.
+
+An user reported this, it is because that lseek's SEEK_SET/SEEK_CUR/SEEK_END
+allow a negative value for @offset, but btrfs's SEEK_DATA/SEEK_HOLE don't
+prepare for that and convert the negative @offset into unsigned type,
+so we get (end < start) warning.
+
+[ 1269.835374] ------------[ cut here ]------------
+[ 1269.836809] WARNING: CPU: 0 PID: 1241 at fs/btrfs/extent_io.c:430 insert_state+0x11d/0x140()
+[ 1269.838816] BTRFS: end < start 4094 18446744073709551615
+[ 1269.840334] CPU: 0 PID: 1241 Comm: a.out Tainted: G W 3.16.0+ #306
+[ 1269.858229] Call Trace:
+[ 1269.858612] [<ffffffff81801a69>] dump_stack+0x4e/0x68
+[ 1269.858952] [<ffffffff8107894c>] warn_slowpath_common+0x8c/0xc0
+[ 1269.859416] [<ffffffff81078a36>] warn_slowpath_fmt+0x46/0x50
+[ 1269.859929] [<ffffffff813b0fbd>] insert_state+0x11d/0x140
+[ 1269.860409] [<ffffffff813b1396>] __set_extent_bit+0x3b6/0x4e0
+[ 1269.860805] [<ffffffff813b21c7>] lock_extent_bits+0x87/0x200
+[ 1269.861697] [<ffffffff813a5b28>] btrfs_file_llseek+0x148/0x2a0
+[ 1269.862168] [<ffffffff811f201e>] SyS_lseek+0xae/0xc0
+[ 1269.862620] [<ffffffff8180b212>] system_call_fastpath+0x16/0x1b
+[ 1269.862970] ---[ end trace 4d33ea885832054b ]---
+
+This assumes that btrfs starts finding DATA/HOLE from the beginning of file
+if the assigned @offset is negative.
+
+Also we add alignment for lock_extent_bits 's range.
+
+Reported-by: Toralf Förster <toralf.foerster@gmx.de>
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c | 25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2621,23 +2621,28 @@ static int find_desired_extent(struct in
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_map *em = NULL;
+ struct extent_state *cached_state = NULL;
+- u64 lockstart = *offset;
+- u64 lockend = i_size_read(inode);
+- u64 start = *offset;
+- u64 len = i_size_read(inode);
++ u64 lockstart;
++ u64 lockend;
++ u64 start;
++ u64 len;
+ int ret = 0;
+
+- lockend = max_t(u64, root->sectorsize, lockend);
++ if (inode->i_size == 0)
++ return -ENXIO;
++
++ /*
++ * *offset can be negative, in this case we start finding DATA/HOLE from
++ * the very start of the file.
++ */
++ start = max_t(loff_t, 0, *offset);
++
++ lockstart = round_down(start, root->sectorsize);
++ lockend = round_up(i_size_read(inode), root->sectorsize);
+ if (lockend <= lockstart)
+ lockend = lockstart + root->sectorsize;
+-
+ lockend--;
+ len = lockend - lockstart + 1;
+
+- len = max_t(u64, len, root->sectorsize);
+- if (inode->i_size == 0)
+- return -ENXIO;
+-
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
+ &cached_state);
+
--- /dev/null
+From 1d52c78afbbf80b58299e076a159617d6b42fe3c Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Thu, 18 Sep 2014 11:30:44 -0400
+Subject: Btrfs: try not to ENOSPC on log replay
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit 1d52c78afbbf80b58299e076a159617d6b42fe3c upstream.
+
+When doing log replay we may have to update inodes, which traditionally goes
+through our delayed inode stuff. This will try to move space over from the
+trans handle, but we don't reserve space in our trans handle on replay since we
+don't know how much we will need, so instead we try to flush. But because we
+have a trans handle open we won't flush anything, so if we are out of reserve
+space we will simply return ENOSPC. Since we know that if an operation made it
+into the log then we definitely had space before the box bought the farm then we
+don't need to worry about doing this space reservation. Use the
+fs_info->log_root_recovering flag to skip the delayed inode stuff and update the
+item directly. Thanks,
+
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3662,7 +3662,8 @@ noinline int btrfs_update_inode(struct b
+ * without delay
+ */
+ if (!btrfs_is_free_space_inode(inode)
+- && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
++ && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
++ && !root->fs_info->log_root_recovering) {
+ btrfs_update_root_times(trans, root);
+
+ ret = btrfs_delayed_update_inode(trans, root, inode);
--- /dev/null
+From 2fad4e83e12591eb3bd213875b9edc2d18e93383 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.cz>
+Date: Wed, 23 Jul 2014 14:39:35 +0200
+Subject: btrfs: wake up transaction thread from SYNC_FS ioctl
+
+From: David Sterba <dsterba@suse.cz>
+
+commit 2fad4e83e12591eb3bd213875b9edc2d18e93383 upstream.
+
+The transaction thread may want to do more work, namely it pokes the
+cleaner ktread that will start processing uncleaned subvols.
+
+This can be triggered by user via the 'btrfs fi sync' command, otherwise
+there was a delay up to 30 seconds before the cleaner started to clean
+old snapshots.
+
+Signed-off-by: David Sterba <dsterba@suse.cz>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -5283,6 +5283,12 @@ long btrfs_ioctl(struct file *file, unsi
+ if (ret)
+ return ret;
+ ret = btrfs_sync_fs(file->f_dentry->d_sb, 1);
++ /*
++ * The transaction thread may want to do more work,
++ * namely it pokes the cleaner ktread that will start
++ * processing uncleaned subvols.
++ */
++ wake_up_process(root->fs_info->transaction_kthread);
+ return ret;
+ }
+ case BTRFS_IOC_START_SYNC:
--- /dev/null
+From d37973082b453ba6b89ec07eb7b84305895d35e1 Mon Sep 17 00:00:00 2001
+From: Chris Mason <clm@fb.com>
+Date: Wed, 15 Oct 2014 13:50:56 -0700
+Subject: Revert "Btrfs: race free update of commit root for ro snapshots"
+
+From: Chris Mason <clm@fb.com>
+
+commit d37973082b453ba6b89ec07eb7b84305895d35e1 upstream.
+
+This reverts commit 9c3b306e1c9e6be4be09e99a8fe2227d1005effc.
+
+Switching only one commit root during a transaction is wrong because it
+leads the fs into an inconsistent state. All commit roots should be
+switched at once, at transaction commit time, otherwise backref walking
+can often miss important references that were only accessible through
+the old commit root. Plus, the root item for the snapshot's root wasn't
+getting updated and preventing the next transaction commit to do it.
+
+This made several users get into random corruption issues after creation
+of readonly snapshots.
+
+A regression test for xfstests will follow soon.
+
+Cc: stable@vger.kernel.org # 3.17
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c | 36 ------------------------------------
+ fs/btrfs/ioctl.c | 33 +++++++++++++++++++++++++++++++++
+ 2 files changed, 33 insertions(+), 36 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -5203,42 +5203,6 @@ struct inode *btrfs_lookup_dentry(struct
+ iput(inode);
+ inode = ERR_PTR(ret);
+ }
+- /*
+- * If orphan cleanup did remove any orphans, it means the tree
+- * was modified and therefore the commit root is not the same as
+- * the current root anymore. This is a problem, because send
+- * uses the commit root and therefore can see inode items that
+- * don't exist in the current root anymore, and for example make
+- * calls to btrfs_iget, which will do tree lookups based on the
+- * current root and not on the commit root. Those lookups will
+- * fail, returning a -ESTALE error, and making send fail with
+- * that error. So make sure a send does not see any orphans we
+- * have just removed, and that it will see the same inodes
+- * regardless of whether a transaction commit happened before
+- * it started (meaning that the commit root will be the same as
+- * the current root) or not.
+- */
+- if (sub_root->node != sub_root->commit_root) {
+- u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
+-
+- if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
+- struct extent_buffer *eb;
+-
+- /*
+- * Assert we can't have races between dentry
+- * lookup called through the snapshot creation
+- * ioctl and the VFS.
+- */
+- ASSERT(mutex_is_locked(&dir->i_mutex));
+-
+- down_write(&root->fs_info->commit_root_sem);
+- eb = sub_root->commit_root;
+- sub_root->commit_root =
+- btrfs_root_node(sub_root);
+- up_write(&root->fs_info->commit_root_sem);
+- free_extent_buffer(eb);
+- }
+- }
+ }
+
+ return inode;
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -714,6 +714,39 @@ static int create_snapshot(struct btrfs_
+ if (ret)
+ goto fail;
+
++ ret = btrfs_orphan_cleanup(pending_snapshot->snap);
++ if (ret)
++ goto fail;
++
++ /*
++ * If orphan cleanup did remove any orphans, it means the tree was
++ * modified and therefore the commit root is not the same as the
++ * current root anymore. This is a problem, because send uses the
++ * commit root and therefore can see inode items that don't exist
++ * in the current root anymore, and for example make calls to
++ * btrfs_iget, which will do tree lookups based on the current root
++ * and not on the commit root. Those lookups will fail, returning a
++ * -ESTALE error, and making send fail with that error. So make sure
++ * a send does not see any orphans we have just removed, and that it
++ * will see the same inodes regardless of whether a transaction
++ * commit happened before it started (meaning that the commit root
++ * will be the same as the current root) or not.
++ */
++ if (readonly && pending_snapshot->snap->node !=
++ pending_snapshot->snap->commit_root) {
++ trans = btrfs_join_transaction(pending_snapshot->snap);
++ if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) {
++ ret = PTR_ERR(trans);
++ goto fail;
++ }
++ if (!IS_ERR(trans)) {
++ ret = btrfs_commit_transaction(trans,
++ pending_snapshot->snap);
++ if (ret)
++ goto fail;
++ }
++ }
++
+ inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
btrfs-wake-up-transaction-thread-from-sync_fs-ioctl.patch
+btrfs-don-t-go-readonly-on-existing-qgroup-items.patch
+btrfs-fix-a-deadlock-in-btrfs_dev_replace_finishing.patch
+btrfs-add-missing-compression-property-remove-in-btrfs_ioctl_setflags.patch
+btrfs-fix-up-bounds-checking-in-lseek.patch
+btrfs-fix-and-enhance-merge_extent_mapping-to-insert-best-fitted-extent-map.patch
+btrfs-don-t-do-async-reclaim-during-log-replay.patch
+btrfs-try-not-to-enospc-on-log-replay.patch
+btrfs-cleanup-error-handling-in-build_backref_tree.patch
+btrfs-fix-build_backref_tree-issue-with-multiple-shared-blocks.patch
+btrfs-fix-the-wrong-condition-judgment-about-subset-extent-map.patch
+btrfs-fix-race-in-wait_sync-ioctl.patch
+revert-btrfs-race-free-update-of-commit-root-for-ro-snapshots.patch