4.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 3 Jan 2017 19:24:19 +0000 (20:24 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 3 Jan 2017 19:24:19 +0000 (20:24 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 3 Jan 2017 19:24:19 +0000 (20:24 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 3 Jan 2017 19:24:19 +0000 (20:24 +0100)
diff --git a/queue-4.4/btrfs-fix-qgroup-rescan-worker-initialization.patch b/queue-4.4/btrfs-fix-qgroup-rescan-worker-initialization.patch

new file mode 100644 (file)

index 0000000..396968f
--- /dev/null
+++ b/queue-4.4/btrfs-fix-qgroup-rescan-worker-initialization.patch
@@ -0,0 +1,48 @@
+From 8d9eddad19467b008e0c881bc3133d7da94b7ec1 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 24 Nov 2016 02:09:04 +0000
+Subject: Btrfs: fix qgroup rescan worker initialization
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 8d9eddad19467b008e0c881bc3133d7da94b7ec1 upstream.
+
+We were setting the qgroup_rescan_running flag to true only after the
+rescan worker started (which is a task run by a queue). So if a user
+space task starts a rescan and immediately after asks to wait for the
+rescan worker to finish, this second call might happen before the rescan
+worker task starts running, in which case the rescan wait ioctl returns
+immediatley, not waiting for the rescan worker to finish.
+
+This was making the fstest btrfs/022 fail very often.
+
+Fixes: d2c609b834d6 (btrfs: properly track when rescan worker is running)
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2283,10 +2283,6 @@ static void btrfs_qgroup_rescan_worker(s
+       int err = -ENOMEM;
+       int ret = 0;
+ 
+-      mutex_lock(&fs_info->qgroup_rescan_lock);
+-      fs_info->qgroup_rescan_running = true;
+-      mutex_unlock(&fs_info->qgroup_rescan_lock);
+-
+       path = btrfs_alloc_path();
+       if (!path)
+               goto out;
+@@ -2397,6 +2393,7 @@ qgroup_rescan_init(struct btrfs_fs_info
+               sizeof(fs_info->qgroup_rescan_progress));
+       fs_info->qgroup_rescan_progress.objectid = progress_objectid;
+       init_completion(&fs_info->qgroup_rescan_completion);
++      fs_info->qgroup_rescan_running = true;
+ 
+       spin_unlock(&fs_info->qgroup_lock);
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
diff --git a/queue-4.4/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch b/queue-4.4/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch

new file mode 100644 (file)

index 0000000..91ed835
--- /dev/null
+++ b/queue-4.4/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch
@@ -0,0 +1,64 @@
+From 2a7bf53f577e49c43de4ffa7776056de26db65d9 Mon Sep 17 00:00:00 2001
+From: Robbie Ko <robbieko@synology.com>
+Date: Fri, 7 Oct 2016 17:30:47 +0800
+Subject: Btrfs: fix tree search logic when replaying directory entry deletes
+
+From: Robbie Ko <robbieko@synology.com>
+
+commit 2a7bf53f577e49c43de4ffa7776056de26db65d9 upstream.
+
+If a log tree has a layout like the following:
+
+leaf N:
+        ...
+        item 240 key (282 DIR_LOG_ITEM 0) itemoff 8189 itemsize 8
+                dir log end 1275809046
+leaf N + 1:
+        item 0 key (282 DIR_LOG_ITEM 3936149215) itemoff 16275 itemsize 8
+                dir log end 18446744073709551615
+        ...
+
+When we pass the value 1275809046 + 1 as the parameter start_ret to the
+function tree-log.c:find_dir_range() (done by replay_dir_deletes()), we
+end up with path->slots[0] having the value 239 (points to the last item
+of leaf N, item 240). Because the dir log item in that position has an
+offset value smaller than *start_ret (1275809046 + 1) we need to move on
+to the next leaf, however the logic for that is wrong since it compares
+the current slot to the number of items in the leaf, which is smaller
+and therefore we don't lookup for the next leaf but instead we set the
+slot to point to an item that does not exist, at slot 240, and we later
+operate on that slot which has unexpected content or in the worst case
+can result in an invalid memory access (accessing beyond the last page
+of leaf N's extent buffer).
+
+So fix the logic that checks when we need to lookup at the next leaf
+by first incrementing the slot and only after to check if that slot
+is beyond the last item of the current leaf.
+
+Signed-off-by: Robbie Ko <robbieko@synology.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Fixes: e02119d5a7b4 (Btrfs: Add a write ahead tree log to optimize synchronous operations)
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+[Modified changelog for clarity and correctness]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -1923,12 +1923,11 @@ static noinline int find_dir_range(struc
+ next:
+       /* check the next slot in the tree to see if it is a valid item */
+       nritems = btrfs_header_nritems(path->nodes[0]);
++      path->slots[0]++;
+       if (path->slots[0] >= nritems) {
+               ret = btrfs_next_leaf(root, path);
+               if (ret)
+                       goto out;
+-      } else {
+-              path->slots[0]++;
+       }
+ 
+       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
diff --git a/queue-4.4/btrfs-limit-async_work-allocation-and-worker-func-duration.patch b/queue-4.4/btrfs-limit-async_work-allocation-and-worker-func-duration.patch

new file mode 100644 (file)

index 0000000..ae17862
--- /dev/null
+++ b/queue-4.4/btrfs-limit-async_work-allocation-and-worker-func-duration.patch
@@ -0,0 +1,128 @@
+From 2939e1a86f758b55cdba73e29397dd3d94df13bc Mon Sep 17 00:00:00 2001
+From: Maxim Patlasov <mpatlasov@virtuozzo.com>
+Date: Mon, 12 Dec 2016 14:32:44 -0800
+Subject: btrfs: limit async_work allocation and worker func duration
+
+From: Maxim Patlasov <mpatlasov@virtuozzo.com>
+
+commit 2939e1a86f758b55cdba73e29397dd3d94df13bc upstream.
+
+Problem statement: unprivileged user who has read-write access to more than
+one btrfs subvolume may easily consume all kernel memory (eventually
+triggering oom-killer).
+
+Reproducer (./mkrmdir below essentially loops over mkdir/rmdir):
+
+[root@kteam1 ~]# cat prep.sh
+
+DEV=/dev/sdb
+mkfs.btrfs -f $DEV
+mount $DEV /mnt
+for i in `seq 1 16`
+do
+       mkdir /mnt/$i
+       btrfs subvolume create /mnt/SV_$i
+       ID=`btrfs subvolume list /mnt |grep "SV_$i$" |cut -d ' ' -f 2`
+       mount -t btrfs -o subvolid=$ID $DEV /mnt/$i
+       chmod a+rwx /mnt/$i
+done
+
+[root@kteam1 ~]# sh prep.sh
+
+[maxim@kteam1 ~]$ for i in `seq 1 16`; do ./mkrmdir /mnt/$i 2000 2000 & done
+
+[root@kteam1 ~]# for i in `seq 1 4`; do grep "kmalloc-128" /proc/slabinfo | grep -v dma; sleep 60; done
+kmalloc-128        10144  10144    128   32    1 : tunables    0    0    0 : slabdata    317    317      0
+kmalloc-128       9992352 9992352    128   32    1 : tunables    0    0    0 : slabdata 312261 312261      0
+kmalloc-128       24226752 24226752    128   32    1 : tunables    0    0    0 : slabdata 757086 757086      0
+kmalloc-128       42754240 42754240    128   32    1 : tunables    0    0    0 : slabdata 1336070 1336070      0
+
+The huge numbers above come from insane number of async_work-s allocated
+and queued by btrfs_wq_run_delayed_node.
+
+The problem is caused by btrfs_wq_run_delayed_node() queuing more and more
+works if the number of delayed items is above BTRFS_DELAYED_BACKGROUND. The
+worker func (btrfs_async_run_delayed_root) processes at least
+BTRFS_DELAYED_BATCH items (if they are present in the list). So, the machinery
+works as expected while the list is almost empty. As soon as it is getting
+bigger, worker func starts to process more than one item at a time, it takes
+longer, and the chances to have async_works queued more than needed is getting
+higher.
+
+The problem above is worsened by another flaw of delayed-inode implementation:
+if async_work was queued in a throttling branch (number of items >=
+BTRFS_DELAYED_WRITEBACK), corresponding worker func won't quit until
+the number of items < BTRFS_DELAYED_BACKGROUND / 2. So, it is possible that
+the func occupies CPU infinitely (up to 30sec in my experiments): while the
+func is trying to drain the list, the user activity may add more and more
+items to the list.
+
+The patch fixes both problems in straightforward way: refuse queuing too
+many works in btrfs_wq_run_delayed_node and bail out of worker func if
+at least BTRFS_DELAYED_WRITEBACK items are processed.
+
+Changed in v2: remove support of thresh == NO_THRESHOLD.
+
+Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/async-thread.c  |   14 ++++++++++++++
+ fs/btrfs/async-thread.h  |    1 +
+ fs/btrfs/delayed-inode.c |    6 ++++--
+ 3 files changed, 19 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/async-thread.c
++++ b/fs/btrfs/async-thread.c
+@@ -70,6 +70,20 @@ void btrfs_##name(struct work_struct *ar
+       normal_work_helper(work);                                       \
+ }
+ 
++bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq)
++{
++      /*
++       * We could compare wq->normal->pending with num_online_cpus()
++       * to support "thresh == NO_THRESHOLD" case, but it requires
++       * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's
++       * postpone it until someone needs the support of that case.
++       */
++      if (wq->normal->thresh == NO_THRESHOLD)
++              return false;
++
++      return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
++}
++
+ BTRFS_WORK_HELPER(worker_helper);
+ BTRFS_WORK_HELPER(delalloc_helper);
+ BTRFS_WORK_HELPER(flush_delalloc_helper);
+--- a/fs/btrfs/async-thread.h
++++ b/fs/btrfs/async-thread.h
+@@ -80,4 +80,5 @@ void btrfs_queue_work(struct btrfs_workq
+ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
+ void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
+ void btrfs_set_work_high_priority(struct btrfs_work *work);
++bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq);
+ #endif
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1375,7 +1375,8 @@ release_path:
+       total_done++;
+ 
+       btrfs_release_prepared_delayed_node(delayed_node);
+-      if (async_work->nr == 0 || total_done < async_work->nr)
++      if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
++          total_done < async_work->nr)
+               goto again;
+ 
+ free_path:
+@@ -1391,7 +1392,8 @@ static int btrfs_wq_run_delayed_node(str
+ {
+       struct btrfs_async_delayed_work *async_work;
+ 
+-      if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
++      if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
++          btrfs_workqueue_normal_congested(fs_info->delayed_workers))
+               return 0;
+ 
+       async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
diff --git a/queue-4.4/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch b/queue-4.4/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch

new file mode 100644 (file)

index 0000000..05ca781
--- /dev/null
+++ b/queue-4.4/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch
@@ -0,0 +1,42 @@
+From ed0df618b1b06d7431ee4d985317fc5419a5d559 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Tue, 1 Nov 2016 14:21:23 +0100
+Subject: btrfs: store and load values of stripes_min/stripes_max in balance status item
+
+From: David Sterba <dsterba@suse.com>
+
+commit ed0df618b1b06d7431ee4d985317fc5419a5d559 upstream.
+
+The balance status item contains currently known filter values, but the
+stripes filter was unintentionally not among them. This would mean, that
+interrupted and automatically restarted balance does not apply the
+stripe filters.
+
+Fixes: dee32d0ac3719ef8d640efaf0884111df444730f
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.h |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3070,6 +3070,8 @@ btrfs_disk_balance_args_to_cpu(struct bt
+       cpu->target = le64_to_cpu(disk->target);
+       cpu->flags = le64_to_cpu(disk->flags);
+       cpu->limit = le64_to_cpu(disk->limit);
++      cpu->stripes_min = le32_to_cpu(disk->stripes_min);
++      cpu->stripes_max = le32_to_cpu(disk->stripes_max);
+ }
+ 
+ static inline void
+@@ -3088,6 +3090,8 @@ btrfs_cpu_balance_args_to_disk(struct bt
+       disk->target = cpu_to_le64(cpu->target);
+       disk->flags = cpu_to_le64(cpu->flags);
+       disk->limit = cpu_to_le64(cpu->limit);
++      disk->stripes_min = cpu_to_le32(cpu->stripes_min);
++      disk->stripes_max = cpu_to_le32(cpu->stripes_max);
+ }
+ 
+ /* struct btrfs_super_block */
diff --git a/queue-4.4/series b/queue-4.4/series

new file mode 100644 (file)

index 0000000..a670dc5
--- /dev/null
+++ b/queue-4.4/series
@@ -0,0 +1,4 @@
+btrfs-limit-async_work-allocation-and-worker-func-duration.patch
+btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch
+btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch
+btrfs-fix-qgroup-rescan-worker-initialization.patch
diff --git a/queue-4.8/series b/queue-4.8/series

new file mode 100644 (file)

index 0000000..24348e5
--- /dev/null
+++ b/queue-4.8/series
@@ -0,0 +1,9 @@
+aoe-fix-crash-in-page-count-manipulation.patch
+btrfs-limit-async_work-allocation-and-worker-func-duration.patch
+btrfs-fix-bug_on-in-btrfs_mark_buffer_dirty.patch
+btrfs-fix-deadlock-caused-by-fsync-when-logging-directory-entries.patch
+btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch
+btrfs-fix-relocation-incorrectly-dropping-data-references.patch
+btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch
+btrfs-fix-emptiness-check-for-dirtied-extent-buffers-at-check_leaf.patch
+btrfs-fix-qgroup-rescan-worker-initialization.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 3 Jan 2017 19:24:19 +0000 (20:24 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 3 Jan 2017 19:24:19 +0000 (20:24 +0100)
queue-4.4/btrfs-fix-qgroup-rescan-worker-initialization.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-limit-async_work-allocation-and-worker-func-duration.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/series	[new file with mode: 0644]	patch \| blob
queue-4.8/series	[new file with mode: 0644]	patch \| blob