From: Greg Kroah-Hartman Date: Tue, 3 Jan 2017 19:24:19 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v4.9.1~28 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e56d66ad713e936afb708aeac565289b588e1a32;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: btrfs-fix-qgroup-rescan-worker-initialization.patch btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch btrfs-limit-async_work-allocation-and-worker-func-duration.patch btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch --- diff --git a/queue-4.4/btrfs-fix-qgroup-rescan-worker-initialization.patch b/queue-4.4/btrfs-fix-qgroup-rescan-worker-initialization.patch new file mode 100644 index 00000000000..396968fab5c --- /dev/null +++ b/queue-4.4/btrfs-fix-qgroup-rescan-worker-initialization.patch @@ -0,0 +1,48 @@ +From 8d9eddad19467b008e0c881bc3133d7da94b7ec1 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 24 Nov 2016 02:09:04 +0000 +Subject: Btrfs: fix qgroup rescan worker initialization + +From: Filipe Manana + +commit 8d9eddad19467b008e0c881bc3133d7da94b7ec1 upstream. + +We were setting the qgroup_rescan_running flag to true only after the +rescan worker started (which is a task run by a queue). So if a user +space task starts a rescan and immediately after asks to wait for the +rescan worker to finish, this second call might happen before the rescan +worker task starts running, in which case the rescan wait ioctl returns +immediatley, not waiting for the rescan worker to finish. + +This was making the fstest btrfs/022 fail very often. + +Fixes: d2c609b834d6 (btrfs: properly track when rescan worker is running) +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/qgroup.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -2283,10 +2283,6 @@ static void btrfs_qgroup_rescan_worker(s + int err = -ENOMEM; + int ret = 0; + +- mutex_lock(&fs_info->qgroup_rescan_lock); +- fs_info->qgroup_rescan_running = true; +- mutex_unlock(&fs_info->qgroup_rescan_lock); +- + path = btrfs_alloc_path(); + if (!path) + goto out; +@@ -2397,6 +2393,7 @@ qgroup_rescan_init(struct btrfs_fs_info + sizeof(fs_info->qgroup_rescan_progress)); + fs_info->qgroup_rescan_progress.objectid = progress_objectid; + init_completion(&fs_info->qgroup_rescan_completion); ++ fs_info->qgroup_rescan_running = true; + + spin_unlock(&fs_info->qgroup_lock); + mutex_unlock(&fs_info->qgroup_rescan_lock); diff --git a/queue-4.4/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch b/queue-4.4/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch new file mode 100644 index 00000000000..91ed83559b0 --- /dev/null +++ b/queue-4.4/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch @@ -0,0 +1,64 @@ +From 2a7bf53f577e49c43de4ffa7776056de26db65d9 Mon Sep 17 00:00:00 2001 +From: Robbie Ko +Date: Fri, 7 Oct 2016 17:30:47 +0800 +Subject: Btrfs: fix tree search logic when replaying directory entry deletes + +From: Robbie Ko + +commit 2a7bf53f577e49c43de4ffa7776056de26db65d9 upstream. + +If a log tree has a layout like the following: + +leaf N: + ... + item 240 key (282 DIR_LOG_ITEM 0) itemoff 8189 itemsize 8 + dir log end 1275809046 +leaf N + 1: + item 0 key (282 DIR_LOG_ITEM 3936149215) itemoff 16275 itemsize 8 + dir log end 18446744073709551615 + ... + +When we pass the value 1275809046 + 1 as the parameter start_ret to the +function tree-log.c:find_dir_range() (done by replay_dir_deletes()), we +end up with path->slots[0] having the value 239 (points to the last item +of leaf N, item 240). Because the dir log item in that position has an +offset value smaller than *start_ret (1275809046 + 1) we need to move on +to the next leaf, however the logic for that is wrong since it compares +the current slot to the number of items in the leaf, which is smaller +and therefore we don't lookup for the next leaf but instead we set the +slot to point to an item that does not exist, at slot 240, and we later +operate on that slot which has unexpected content or in the worst case +can result in an invalid memory access (accessing beyond the last page +of leaf N's extent buffer). + +So fix the logic that checks when we need to lookup at the next leaf +by first incrementing the slot and only after to check if that slot +is beyond the last item of the current leaf. + +Signed-off-by: Robbie Ko +Reviewed-by: Filipe Manana +Fixes: e02119d5a7b4 (Btrfs: Add a write ahead tree log to optimize synchronous operations) +Signed-off-by: Filipe Manana +[Modified changelog for clarity and correctness] +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/tree-log.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -1923,12 +1923,11 @@ static noinline int find_dir_range(struc + next: + /* check the next slot in the tree to see if it is a valid item */ + nritems = btrfs_header_nritems(path->nodes[0]); ++ path->slots[0]++; + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + goto out; +- } else { +- path->slots[0]++; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); diff --git a/queue-4.4/btrfs-limit-async_work-allocation-and-worker-func-duration.patch b/queue-4.4/btrfs-limit-async_work-allocation-and-worker-func-duration.patch new file mode 100644 index 00000000000..ae178622058 --- /dev/null +++ b/queue-4.4/btrfs-limit-async_work-allocation-and-worker-func-duration.patch @@ -0,0 +1,128 @@ +From 2939e1a86f758b55cdba73e29397dd3d94df13bc Mon Sep 17 00:00:00 2001 +From: Maxim Patlasov +Date: Mon, 12 Dec 2016 14:32:44 -0800 +Subject: btrfs: limit async_work allocation and worker func duration + +From: Maxim Patlasov + +commit 2939e1a86f758b55cdba73e29397dd3d94df13bc upstream. + +Problem statement: unprivileged user who has read-write access to more than +one btrfs subvolume may easily consume all kernel memory (eventually +triggering oom-killer). + +Reproducer (./mkrmdir below essentially loops over mkdir/rmdir): + +[root@kteam1 ~]# cat prep.sh + +DEV=/dev/sdb +mkfs.btrfs -f $DEV +mount $DEV /mnt +for i in `seq 1 16` +do + mkdir /mnt/$i + btrfs subvolume create /mnt/SV_$i + ID=`btrfs subvolume list /mnt |grep "SV_$i$" |cut -d ' ' -f 2` + mount -t btrfs -o subvolid=$ID $DEV /mnt/$i + chmod a+rwx /mnt/$i +done + +[root@kteam1 ~]# sh prep.sh + +[maxim@kteam1 ~]$ for i in `seq 1 16`; do ./mkrmdir /mnt/$i 2000 2000 & done + +[root@kteam1 ~]# for i in `seq 1 4`; do grep "kmalloc-128" /proc/slabinfo | grep -v dma; sleep 60; done +kmalloc-128 10144 10144 128 32 1 : tunables 0 0 0 : slabdata 317 317 0 +kmalloc-128 9992352 9992352 128 32 1 : tunables 0 0 0 : slabdata 312261 312261 0 +kmalloc-128 24226752 24226752 128 32 1 : tunables 0 0 0 : slabdata 757086 757086 0 +kmalloc-128 42754240 42754240 128 32 1 : tunables 0 0 0 : slabdata 1336070 1336070 0 + +The huge numbers above come from insane number of async_work-s allocated +and queued by btrfs_wq_run_delayed_node. + +The problem is caused by btrfs_wq_run_delayed_node() queuing more and more +works if the number of delayed items is above BTRFS_DELAYED_BACKGROUND. The +worker func (btrfs_async_run_delayed_root) processes at least +BTRFS_DELAYED_BATCH items (if they are present in the list). So, the machinery +works as expected while the list is almost empty. As soon as it is getting +bigger, worker func starts to process more than one item at a time, it takes +longer, and the chances to have async_works queued more than needed is getting +higher. + +The problem above is worsened by another flaw of delayed-inode implementation: +if async_work was queued in a throttling branch (number of items >= +BTRFS_DELAYED_WRITEBACK), corresponding worker func won't quit until +the number of items < BTRFS_DELAYED_BACKGROUND / 2. So, it is possible that +the func occupies CPU infinitely (up to 30sec in my experiments): while the +func is trying to drain the list, the user activity may add more and more +items to the list. + +The patch fixes both problems in straightforward way: refuse queuing too +many works in btrfs_wq_run_delayed_node and bail out of worker func if +at least BTRFS_DELAYED_WRITEBACK items are processed. + +Changed in v2: remove support of thresh == NO_THRESHOLD. + +Signed-off-by: Maxim Patlasov +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/async-thread.c | 14 ++++++++++++++ + fs/btrfs/async-thread.h | 1 + + fs/btrfs/delayed-inode.c | 6 ++++-- + 3 files changed, 19 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/async-thread.c ++++ b/fs/btrfs/async-thread.c +@@ -70,6 +70,20 @@ void btrfs_##name(struct work_struct *ar + normal_work_helper(work); \ + } + ++bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq) ++{ ++ /* ++ * We could compare wq->normal->pending with num_online_cpus() ++ * to support "thresh == NO_THRESHOLD" case, but it requires ++ * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's ++ * postpone it until someone needs the support of that case. ++ */ ++ if (wq->normal->thresh == NO_THRESHOLD) ++ return false; ++ ++ return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2; ++} ++ + BTRFS_WORK_HELPER(worker_helper); + BTRFS_WORK_HELPER(delalloc_helper); + BTRFS_WORK_HELPER(flush_delalloc_helper); +--- a/fs/btrfs/async-thread.h ++++ b/fs/btrfs/async-thread.h +@@ -80,4 +80,5 @@ void btrfs_queue_work(struct btrfs_workq + void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); + void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); + void btrfs_set_work_high_priority(struct btrfs_work *work); ++bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq); + #endif +--- a/fs/btrfs/delayed-inode.c ++++ b/fs/btrfs/delayed-inode.c +@@ -1375,7 +1375,8 @@ release_path: + total_done++; + + btrfs_release_prepared_delayed_node(delayed_node); +- if (async_work->nr == 0 || total_done < async_work->nr) ++ if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) || ++ total_done < async_work->nr) + goto again; + + free_path: +@@ -1391,7 +1392,8 @@ static int btrfs_wq_run_delayed_node(str + { + struct btrfs_async_delayed_work *async_work; + +- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ++ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND || ++ btrfs_workqueue_normal_congested(fs_info->delayed_workers)) + return 0; + + async_work = kmalloc(sizeof(*async_work), GFP_NOFS); diff --git a/queue-4.4/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch b/queue-4.4/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch new file mode 100644 index 00000000000..05ca781ff18 --- /dev/null +++ b/queue-4.4/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch @@ -0,0 +1,42 @@ +From ed0df618b1b06d7431ee4d985317fc5419a5d559 Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Tue, 1 Nov 2016 14:21:23 +0100 +Subject: btrfs: store and load values of stripes_min/stripes_max in balance status item + +From: David Sterba + +commit ed0df618b1b06d7431ee4d985317fc5419a5d559 upstream. + +The balance status item contains currently known filter values, but the +stripes filter was unintentionally not among them. This would mean, that +interrupted and automatically restarted balance does not apply the +stripe filters. + +Fixes: dee32d0ac3719ef8d640efaf0884111df444730f +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ctree.h | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -3070,6 +3070,8 @@ btrfs_disk_balance_args_to_cpu(struct bt + cpu->target = le64_to_cpu(disk->target); + cpu->flags = le64_to_cpu(disk->flags); + cpu->limit = le64_to_cpu(disk->limit); ++ cpu->stripes_min = le32_to_cpu(disk->stripes_min); ++ cpu->stripes_max = le32_to_cpu(disk->stripes_max); + } + + static inline void +@@ -3088,6 +3090,8 @@ btrfs_cpu_balance_args_to_disk(struct bt + disk->target = cpu_to_le64(cpu->target); + disk->flags = cpu_to_le64(cpu->flags); + disk->limit = cpu_to_le64(cpu->limit); ++ disk->stripes_min = cpu_to_le32(cpu->stripes_min); ++ disk->stripes_max = cpu_to_le32(cpu->stripes_max); + } + + /* struct btrfs_super_block */ diff --git a/queue-4.4/series b/queue-4.4/series new file mode 100644 index 00000000000..a670dc540ce --- /dev/null +++ b/queue-4.4/series @@ -0,0 +1,4 @@ +btrfs-limit-async_work-allocation-and-worker-func-duration.patch +btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch +btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch +btrfs-fix-qgroup-rescan-worker-initialization.patch diff --git a/queue-4.8/series b/queue-4.8/series new file mode 100644 index 00000000000..24348e58bb0 --- /dev/null +++ b/queue-4.8/series @@ -0,0 +1,9 @@ +aoe-fix-crash-in-page-count-manipulation.patch +btrfs-limit-async_work-allocation-and-worker-func-duration.patch +btrfs-fix-bug_on-in-btrfs_mark_buffer_dirty.patch +btrfs-fix-deadlock-caused-by-fsync-when-logging-directory-entries.patch +btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch +btrfs-fix-relocation-incorrectly-dropping-data-references.patch +btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch +btrfs-fix-emptiness-check-for-dirtied-extent-buffers-at-check_leaf.patch +btrfs-fix-qgroup-rescan-worker-initialization.patch