From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 3 Jan 2017 19:24:19 +0000 (+0100)
Subject: 4.4-stable patches
X-Git-Tag: v4.9.1~28
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e56d66ad713e936afb708aeac565289b588e1a32;p=thirdparty%2Fkernel%2Fstable-queue.git

4.4-stable patches

added patches:
	btrfs-fix-qgroup-rescan-worker-initialization.patch
	btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch
	btrfs-limit-async_work-allocation-and-worker-func-duration.patch
	btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch
---

diff --git a/queue-4.4/btrfs-fix-qgroup-rescan-worker-initialization.patch b/queue-4.4/btrfs-fix-qgroup-rescan-worker-initialization.patch
new file mode 100644
index 00000000000..396968fab5c
--- /dev/null
+++ b/queue-4.4/btrfs-fix-qgroup-rescan-worker-initialization.patch
@@ -0,0 +1,48 @@
+From 8d9eddad19467b008e0c881bc3133d7da94b7ec1 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 24 Nov 2016 02:09:04 +0000
+Subject: Btrfs: fix qgroup rescan worker initialization
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 8d9eddad19467b008e0c881bc3133d7da94b7ec1 upstream.
+
+We were setting the qgroup_rescan_running flag to true only after the
+rescan worker started (which is a task run by a queue). So if a user
+space task starts a rescan and immediately after asks to wait for the
+rescan worker to finish, this second call might happen before the rescan
+worker task starts running, in which case the rescan wait ioctl returns
+immediatley, not waiting for the rescan worker to finish.
+
+This was making the fstest btrfs/022 fail very often.
+
+Fixes: d2c609b834d6 (btrfs: properly track when rescan worker is running)
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2283,10 +2283,6 @@ static void btrfs_qgroup_rescan_worker(s
+ 	int err = -ENOMEM;
+ 	int ret = 0;
+ 
+-	mutex_lock(&fs_info->qgroup_rescan_lock);
+-	fs_info->qgroup_rescan_running = true;
+-	mutex_unlock(&fs_info->qgroup_rescan_lock);
+-
+ 	path = btrfs_alloc_path();
+ 	if (!path)
+ 		goto out;
+@@ -2397,6 +2393,7 @@ qgroup_rescan_init(struct btrfs_fs_info
+ 		sizeof(fs_info->qgroup_rescan_progress));
+ 	fs_info->qgroup_rescan_progress.objectid = progress_objectid;
+ 	init_completion(&fs_info->qgroup_rescan_completion);
++	fs_info->qgroup_rescan_running = true;
+ 
+ 	spin_unlock(&fs_info->qgroup_lock);
+ 	mutex_unlock(&fs_info->qgroup_rescan_lock);
diff --git a/queue-4.4/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch b/queue-4.4/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch
new file mode 100644
index 00000000000..91ed83559b0
--- /dev/null
+++ b/queue-4.4/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch
@@ -0,0 +1,64 @@
+From 2a7bf53f577e49c43de4ffa7776056de26db65d9 Mon Sep 17 00:00:00 2001
+From: Robbie Ko <robbieko@synology.com>
+Date: Fri, 7 Oct 2016 17:30:47 +0800
+Subject: Btrfs: fix tree search logic when replaying directory entry deletes
+
+From: Robbie Ko <robbieko@synology.com>
+
+commit 2a7bf53f577e49c43de4ffa7776056de26db65d9 upstream.
+
+If a log tree has a layout like the following:
+
+leaf N:
+        ...
+        item 240 key (282 DIR_LOG_ITEM 0) itemoff 8189 itemsize 8
+                dir log end 1275809046
+leaf N + 1:
+        item 0 key (282 DIR_LOG_ITEM 3936149215) itemoff 16275 itemsize 8
+                dir log end 18446744073709551615
+        ...
+
+When we pass the value 1275809046 + 1 as the parameter start_ret to the
+function tree-log.c:find_dir_range() (done by replay_dir_deletes()), we
+end up with path->slots[0] having the value 239 (points to the last item
+of leaf N, item 240). Because the dir log item in that position has an
+offset value smaller than *start_ret (1275809046 + 1) we need to move on
+to the next leaf, however the logic for that is wrong since it compares
+the current slot to the number of items in the leaf, which is smaller
+and therefore we don't lookup for the next leaf but instead we set the
+slot to point to an item that does not exist, at slot 240, and we later
+operate on that slot which has unexpected content or in the worst case
+can result in an invalid memory access (accessing beyond the last page
+of leaf N's extent buffer).
+
+So fix the logic that checks when we need to lookup at the next leaf
+by first incrementing the slot and only after to check if that slot
+is beyond the last item of the current leaf.
+
+Signed-off-by: Robbie Ko <robbieko@synology.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Fixes: e02119d5a7b4 (Btrfs: Add a write ahead tree log to optimize synchronous operations)
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+[Modified changelog for clarity and correctness]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -1923,12 +1923,11 @@ static noinline int find_dir_range(struc
+ next:
+ 	/* check the next slot in the tree to see if it is a valid item */
+ 	nritems = btrfs_header_nritems(path->nodes[0]);
++	path->slots[0]++;
+ 	if (path->slots[0] >= nritems) {
+ 		ret = btrfs_next_leaf(root, path);
+ 		if (ret)
+ 			goto out;
+-	} else {
+-		path->slots[0]++;
+ 	}
+ 
+ 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
diff --git a/queue-4.4/btrfs-limit-async_work-allocation-and-worker-func-duration.patch b/queue-4.4/btrfs-limit-async_work-allocation-and-worker-func-duration.patch
new file mode 100644
index 00000000000..ae178622058
--- /dev/null
+++ b/queue-4.4/btrfs-limit-async_work-allocation-and-worker-func-duration.patch
@@ -0,0 +1,128 @@
+From 2939e1a86f758b55cdba73e29397dd3d94df13bc Mon Sep 17 00:00:00 2001
+From: Maxim Patlasov <mpatlasov@virtuozzo.com>
+Date: Mon, 12 Dec 2016 14:32:44 -0800
+Subject: btrfs: limit async_work allocation and worker func duration
+
+From: Maxim Patlasov <mpatlasov@virtuozzo.com>
+
+commit 2939e1a86f758b55cdba73e29397dd3d94df13bc upstream.
+
+Problem statement: unprivileged user who has read-write access to more than
+one btrfs subvolume may easily consume all kernel memory (eventually
+triggering oom-killer).
+
+Reproducer (./mkrmdir below essentially loops over mkdir/rmdir):
+
+[root@kteam1 ~]# cat prep.sh
+
+DEV=/dev/sdb
+mkfs.btrfs -f $DEV
+mount $DEV /mnt
+for i in `seq 1 16`
+do
+	mkdir /mnt/$i
+	btrfs subvolume create /mnt/SV_$i
+	ID=`btrfs subvolume list /mnt |grep "SV_$i$" |cut -d ' ' -f 2`
+	mount -t btrfs -o subvolid=$ID $DEV /mnt/$i
+	chmod a+rwx /mnt/$i
+done
+
+[root@kteam1 ~]# sh prep.sh
+
+[maxim@kteam1 ~]$ for i in `seq 1 16`; do ./mkrmdir /mnt/$i 2000 2000 & done
+
+[root@kteam1 ~]# for i in `seq 1 4`; do grep "kmalloc-128" /proc/slabinfo | grep -v dma; sleep 60; done
+kmalloc-128        10144  10144    128   32    1 : tunables    0    0    0 : slabdata    317    317      0
+kmalloc-128       9992352 9992352    128   32    1 : tunables    0    0    0 : slabdata 312261 312261      0
+kmalloc-128       24226752 24226752    128   32    1 : tunables    0    0    0 : slabdata 757086 757086      0
+kmalloc-128       42754240 42754240    128   32    1 : tunables    0    0    0 : slabdata 1336070 1336070      0
+
+The huge numbers above come from insane number of async_work-s allocated
+and queued by btrfs_wq_run_delayed_node.
+
+The problem is caused by btrfs_wq_run_delayed_node() queuing more and more
+works if the number of delayed items is above BTRFS_DELAYED_BACKGROUND. The
+worker func (btrfs_async_run_delayed_root) processes at least
+BTRFS_DELAYED_BATCH items (if they are present in the list). So, the machinery
+works as expected while the list is almost empty. As soon as it is getting
+bigger, worker func starts to process more than one item at a time, it takes
+longer, and the chances to have async_works queued more than needed is getting
+higher.
+
+The problem above is worsened by another flaw of delayed-inode implementation:
+if async_work was queued in a throttling branch (number of items >=
+BTRFS_DELAYED_WRITEBACK), corresponding worker func won't quit until
+the number of items < BTRFS_DELAYED_BACKGROUND / 2. So, it is possible that
+the func occupies CPU infinitely (up to 30sec in my experiments): while the
+func is trying to drain the list, the user activity may add more and more
+items to the list.
+
+The patch fixes both problems in straightforward way: refuse queuing too
+many works in btrfs_wq_run_delayed_node and bail out of worker func if
+at least BTRFS_DELAYED_WRITEBACK items are processed.
+
+Changed in v2: remove support of thresh == NO_THRESHOLD.
+
+Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/async-thread.c  |   14 ++++++++++++++
+ fs/btrfs/async-thread.h  |    1 +
+ fs/btrfs/delayed-inode.c |    6 ++++--
+ 3 files changed, 19 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/async-thread.c
++++ b/fs/btrfs/async-thread.c
+@@ -70,6 +70,20 @@ void btrfs_##name(struct work_struct *ar
+ 	normal_work_helper(work);					\
+ }
+ 
++bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq)
++{
++	/*
++	 * We could compare wq->normal->pending with num_online_cpus()
++	 * to support "thresh == NO_THRESHOLD" case, but it requires
++	 * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's
++	 * postpone it until someone needs the support of that case.
++	 */
++	if (wq->normal->thresh == NO_THRESHOLD)
++		return false;
++
++	return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
++}
++
+ BTRFS_WORK_HELPER(worker_helper);
+ BTRFS_WORK_HELPER(delalloc_helper);
+ BTRFS_WORK_HELPER(flush_delalloc_helper);
+--- a/fs/btrfs/async-thread.h
++++ b/fs/btrfs/async-thread.h
+@@ -80,4 +80,5 @@ void btrfs_queue_work(struct btrfs_workq
+ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
+ void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
+ void btrfs_set_work_high_priority(struct btrfs_work *work);
++bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq);
+ #endif
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1375,7 +1375,8 @@ release_path:
+ 	total_done++;
+ 
+ 	btrfs_release_prepared_delayed_node(delayed_node);
+-	if (async_work->nr == 0 || total_done < async_work->nr)
++	if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
++	    total_done < async_work->nr)
+ 		goto again;
+ 
+ free_path:
+@@ -1391,7 +1392,8 @@ static int btrfs_wq_run_delayed_node(str
+ {
+ 	struct btrfs_async_delayed_work *async_work;
+ 
+-	if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
++	if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
++	    btrfs_workqueue_normal_congested(fs_info->delayed_workers))
+ 		return 0;
+ 
+ 	async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
diff --git a/queue-4.4/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch b/queue-4.4/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch
new file mode 100644
index 00000000000..05ca781ff18
--- /dev/null
+++ b/queue-4.4/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch
@@ -0,0 +1,42 @@
+From ed0df618b1b06d7431ee4d985317fc5419a5d559 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Tue, 1 Nov 2016 14:21:23 +0100
+Subject: btrfs: store and load values of stripes_min/stripes_max in balance status item
+
+From: David Sterba <dsterba@suse.com>
+
+commit ed0df618b1b06d7431ee4d985317fc5419a5d559 upstream.
+
+The balance status item contains currently known filter values, but the
+stripes filter was unintentionally not among them. This would mean, that
+interrupted and automatically restarted balance does not apply the
+stripe filters.
+
+Fixes: dee32d0ac3719ef8d640efaf0884111df444730f
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.h |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3070,6 +3070,8 @@ btrfs_disk_balance_args_to_cpu(struct bt
+ 	cpu->target = le64_to_cpu(disk->target);
+ 	cpu->flags = le64_to_cpu(disk->flags);
+ 	cpu->limit = le64_to_cpu(disk->limit);
++	cpu->stripes_min = le32_to_cpu(disk->stripes_min);
++	cpu->stripes_max = le32_to_cpu(disk->stripes_max);
+ }
+ 
+ static inline void
+@@ -3088,6 +3090,8 @@ btrfs_cpu_balance_args_to_disk(struct bt
+ 	disk->target = cpu_to_le64(cpu->target);
+ 	disk->flags = cpu_to_le64(cpu->flags);
+ 	disk->limit = cpu_to_le64(cpu->limit);
++	disk->stripes_min = cpu_to_le32(cpu->stripes_min);
++	disk->stripes_max = cpu_to_le32(cpu->stripes_max);
+ }
+ 
+ /* struct btrfs_super_block */
diff --git a/queue-4.4/series b/queue-4.4/series
new file mode 100644
index 00000000000..a670dc540ce
--- /dev/null
+++ b/queue-4.4/series
@@ -0,0 +1,4 @@
+btrfs-limit-async_work-allocation-and-worker-func-duration.patch
+btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch
+btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch
+btrfs-fix-qgroup-rescan-worker-initialization.patch
diff --git a/queue-4.8/series b/queue-4.8/series
new file mode 100644
index 00000000000..24348e58bb0
--- /dev/null
+++ b/queue-4.8/series
@@ -0,0 +1,9 @@
+aoe-fix-crash-in-page-count-manipulation.patch
+btrfs-limit-async_work-allocation-and-worker-func-duration.patch
+btrfs-fix-bug_on-in-btrfs_mark_buffer_dirty.patch
+btrfs-fix-deadlock-caused-by-fsync-when-logging-directory-entries.patch
+btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch
+btrfs-fix-relocation-incorrectly-dropping-data-references.patch
+btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch
+btrfs-fix-emptiness-check-for-dirtied-extent-buffers-at-check_leaf.patch
+btrfs-fix-qgroup-rescan-worker-initialization.patch