--- /dev/null
+From 8d9eddad19467b008e0c881bc3133d7da94b7ec1 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 24 Nov 2016 02:09:04 +0000
+Subject: Btrfs: fix qgroup rescan worker initialization
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 8d9eddad19467b008e0c881bc3133d7da94b7ec1 upstream.
+
+We were setting the qgroup_rescan_running flag to true only after the
+rescan worker started (which is a task run by a queue). So if a user
+space task starts a rescan and immediately after asks to wait for the
+rescan worker to finish, this second call might happen before the rescan
+worker task starts running, in which case the rescan wait ioctl returns
+immediatley, not waiting for the rescan worker to finish.
+
+This was making the fstest btrfs/022 fail very often.
+
+Fixes: d2c609b834d6 (btrfs: properly track when rescan worker is running)
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2283,10 +2283,6 @@ static void btrfs_qgroup_rescan_worker(s
+ int err = -ENOMEM;
+ int ret = 0;
+
+- mutex_lock(&fs_info->qgroup_rescan_lock);
+- fs_info->qgroup_rescan_running = true;
+- mutex_unlock(&fs_info->qgroup_rescan_lock);
+-
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out;
+@@ -2397,6 +2393,7 @@ qgroup_rescan_init(struct btrfs_fs_info
+ sizeof(fs_info->qgroup_rescan_progress));
+ fs_info->qgroup_rescan_progress.objectid = progress_objectid;
+ init_completion(&fs_info->qgroup_rescan_completion);
++ fs_info->qgroup_rescan_running = true;
+
+ spin_unlock(&fs_info->qgroup_lock);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
--- /dev/null
+From 2a7bf53f577e49c43de4ffa7776056de26db65d9 Mon Sep 17 00:00:00 2001
+From: Robbie Ko <robbieko@synology.com>
+Date: Fri, 7 Oct 2016 17:30:47 +0800
+Subject: Btrfs: fix tree search logic when replaying directory entry deletes
+
+From: Robbie Ko <robbieko@synology.com>
+
+commit 2a7bf53f577e49c43de4ffa7776056de26db65d9 upstream.
+
+If a log tree has a layout like the following:
+
+leaf N:
+ ...
+ item 240 key (282 DIR_LOG_ITEM 0) itemoff 8189 itemsize 8
+ dir log end 1275809046
+leaf N + 1:
+ item 0 key (282 DIR_LOG_ITEM 3936149215) itemoff 16275 itemsize 8
+ dir log end 18446744073709551615
+ ...
+
+When we pass the value 1275809046 + 1 as the parameter start_ret to the
+function tree-log.c:find_dir_range() (done by replay_dir_deletes()), we
+end up with path->slots[0] having the value 239 (points to the last item
+of leaf N, item 240). Because the dir log item in that position has an
+offset value smaller than *start_ret (1275809046 + 1) we need to move on
+to the next leaf, however the logic for that is wrong since it compares
+the current slot to the number of items in the leaf, which is smaller
+and therefore we don't lookup for the next leaf but instead we set the
+slot to point to an item that does not exist, at slot 240, and we later
+operate on that slot which has unexpected content or in the worst case
+can result in an invalid memory access (accessing beyond the last page
+of leaf N's extent buffer).
+
+So fix the logic that checks when we need to lookup at the next leaf
+by first incrementing the slot and only after to check if that slot
+is beyond the last item of the current leaf.
+
+Signed-off-by: Robbie Ko <robbieko@synology.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Fixes: e02119d5a7b4 (Btrfs: Add a write ahead tree log to optimize synchronous operations)
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+[Modified changelog for clarity and correctness]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -1923,12 +1923,11 @@ static noinline int find_dir_range(struc
+ next:
+ /* check the next slot in the tree to see if it is a valid item */
+ nritems = btrfs_header_nritems(path->nodes[0]);
++ path->slots[0]++;
+ if (path->slots[0] >= nritems) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret)
+ goto out;
+- } else {
+- path->slots[0]++;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
--- /dev/null
+From 2939e1a86f758b55cdba73e29397dd3d94df13bc Mon Sep 17 00:00:00 2001
+From: Maxim Patlasov <mpatlasov@virtuozzo.com>
+Date: Mon, 12 Dec 2016 14:32:44 -0800
+Subject: btrfs: limit async_work allocation and worker func duration
+
+From: Maxim Patlasov <mpatlasov@virtuozzo.com>
+
+commit 2939e1a86f758b55cdba73e29397dd3d94df13bc upstream.
+
+Problem statement: unprivileged user who has read-write access to more than
+one btrfs subvolume may easily consume all kernel memory (eventually
+triggering oom-killer).
+
+Reproducer (./mkrmdir below essentially loops over mkdir/rmdir):
+
+[root@kteam1 ~]# cat prep.sh
+
+DEV=/dev/sdb
+mkfs.btrfs -f $DEV
+mount $DEV /mnt
+for i in `seq 1 16`
+do
+ mkdir /mnt/$i
+ btrfs subvolume create /mnt/SV_$i
+ ID=`btrfs subvolume list /mnt |grep "SV_$i$" |cut -d ' ' -f 2`
+ mount -t btrfs -o subvolid=$ID $DEV /mnt/$i
+ chmod a+rwx /mnt/$i
+done
+
+[root@kteam1 ~]# sh prep.sh
+
+[maxim@kteam1 ~]$ for i in `seq 1 16`; do ./mkrmdir /mnt/$i 2000 2000 & done
+
+[root@kteam1 ~]# for i in `seq 1 4`; do grep "kmalloc-128" /proc/slabinfo | grep -v dma; sleep 60; done
+kmalloc-128 10144 10144 128 32 1 : tunables 0 0 0 : slabdata 317 317 0
+kmalloc-128 9992352 9992352 128 32 1 : tunables 0 0 0 : slabdata 312261 312261 0
+kmalloc-128 24226752 24226752 128 32 1 : tunables 0 0 0 : slabdata 757086 757086 0
+kmalloc-128 42754240 42754240 128 32 1 : tunables 0 0 0 : slabdata 1336070 1336070 0
+
+The huge numbers above come from insane number of async_work-s allocated
+and queued by btrfs_wq_run_delayed_node.
+
+The problem is caused by btrfs_wq_run_delayed_node() queuing more and more
+works if the number of delayed items is above BTRFS_DELAYED_BACKGROUND. The
+worker func (btrfs_async_run_delayed_root) processes at least
+BTRFS_DELAYED_BATCH items (if they are present in the list). So, the machinery
+works as expected while the list is almost empty. As soon as it is getting
+bigger, worker func starts to process more than one item at a time, it takes
+longer, and the chances to have async_works queued more than needed is getting
+higher.
+
+The problem above is worsened by another flaw of delayed-inode implementation:
+if async_work was queued in a throttling branch (number of items >=
+BTRFS_DELAYED_WRITEBACK), corresponding worker func won't quit until
+the number of items < BTRFS_DELAYED_BACKGROUND / 2. So, it is possible that
+the func occupies CPU infinitely (up to 30sec in my experiments): while the
+func is trying to drain the list, the user activity may add more and more
+items to the list.
+
+The patch fixes both problems in straightforward way: refuse queuing too
+many works in btrfs_wq_run_delayed_node and bail out of worker func if
+at least BTRFS_DELAYED_WRITEBACK items are processed.
+
+Changed in v2: remove support of thresh == NO_THRESHOLD.
+
+Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/async-thread.c | 14 ++++++++++++++
+ fs/btrfs/async-thread.h | 1 +
+ fs/btrfs/delayed-inode.c | 6 ++++--
+ 3 files changed, 19 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/async-thread.c
++++ b/fs/btrfs/async-thread.c
+@@ -70,6 +70,20 @@ void btrfs_##name(struct work_struct *ar
+ normal_work_helper(work); \
+ }
+
++bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq)
++{
++ /*
++ * We could compare wq->normal->pending with num_online_cpus()
++ * to support "thresh == NO_THRESHOLD" case, but it requires
++ * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's
++ * postpone it until someone needs the support of that case.
++ */
++ if (wq->normal->thresh == NO_THRESHOLD)
++ return false;
++
++ return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
++}
++
+ BTRFS_WORK_HELPER(worker_helper);
+ BTRFS_WORK_HELPER(delalloc_helper);
+ BTRFS_WORK_HELPER(flush_delalloc_helper);
+--- a/fs/btrfs/async-thread.h
++++ b/fs/btrfs/async-thread.h
+@@ -80,4 +80,5 @@ void btrfs_queue_work(struct btrfs_workq
+ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
+ void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
+ void btrfs_set_work_high_priority(struct btrfs_work *work);
++bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq);
+ #endif
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1375,7 +1375,8 @@ release_path:
+ total_done++;
+
+ btrfs_release_prepared_delayed_node(delayed_node);
+- if (async_work->nr == 0 || total_done < async_work->nr)
++ if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
++ total_done < async_work->nr)
+ goto again;
+
+ free_path:
+@@ -1391,7 +1392,8 @@ static int btrfs_wq_run_delayed_node(str
+ {
+ struct btrfs_async_delayed_work *async_work;
+
+- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
++ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
++ btrfs_workqueue_normal_congested(fs_info->delayed_workers))
+ return 0;
+
+ async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
--- /dev/null
+From ed0df618b1b06d7431ee4d985317fc5419a5d559 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Tue, 1 Nov 2016 14:21:23 +0100
+Subject: btrfs: store and load values of stripes_min/stripes_max in balance status item
+
+From: David Sterba <dsterba@suse.com>
+
+commit ed0df618b1b06d7431ee4d985317fc5419a5d559 upstream.
+
+The balance status item contains currently known filter values, but the
+stripes filter was unintentionally not among them. This would mean, that
+interrupted and automatically restarted balance does not apply the
+stripe filters.
+
+Fixes: dee32d0ac3719ef8d640efaf0884111df444730f
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3070,6 +3070,8 @@ btrfs_disk_balance_args_to_cpu(struct bt
+ cpu->target = le64_to_cpu(disk->target);
+ cpu->flags = le64_to_cpu(disk->flags);
+ cpu->limit = le64_to_cpu(disk->limit);
++ cpu->stripes_min = le32_to_cpu(disk->stripes_min);
++ cpu->stripes_max = le32_to_cpu(disk->stripes_max);
+ }
+
+ static inline void
+@@ -3088,6 +3090,8 @@ btrfs_cpu_balance_args_to_disk(struct bt
+ disk->target = cpu_to_le64(cpu->target);
+ disk->flags = cpu_to_le64(cpu->flags);
+ disk->limit = cpu_to_le64(cpu->limit);
++ disk->stripes_min = cpu_to_le32(cpu->stripes_min);
++ disk->stripes_max = cpu_to_le32(cpu->stripes_max);
+ }
+
+ /* struct btrfs_super_block */
--- /dev/null
+btrfs-limit-async_work-allocation-and-worker-func-duration.patch
+btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch
+btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch
+btrfs-fix-qgroup-rescan-worker-initialization.patch
--- /dev/null
+aoe-fix-crash-in-page-count-manipulation.patch
+btrfs-limit-async_work-allocation-and-worker-func-duration.patch
+btrfs-fix-bug_on-in-btrfs_mark_buffer_dirty.patch
+btrfs-fix-deadlock-caused-by-fsync-when-logging-directory-entries.patch
+btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch
+btrfs-fix-relocation-incorrectly-dropping-data-references.patch
+btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch
+btrfs-fix-emptiness-check-for-dirtied-extent-buffers-at-check_leaf.patch
+btrfs-fix-qgroup-rescan-worker-initialization.patch