From: Greg Kroah-Hartman Date: Sun, 11 Nov 2018 20:56:29 +0000 (-0800) Subject: 4.14-stable patches X-Git-Tag: v4.19.2~15 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=54fcf703d22c3a3327a9a151f8440ccebb766f92;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: bpf-wait-for-running-bpf-programs-when-updating-map-in-map.patch btrfs-don-t-run-delayed_iputs-in-commit.patch btrfs-don-t-use-ctl-free_space-for-max_extent_size.patch btrfs-fix-assertion-on-fsync-of-regular-file-when-using-no-holes-feature.patch btrfs-fix-fsync-after-hole-punching-when-using-no-holes-feature.patch btrfs-fix-null-pointer-dereference-on-compressed-write-path-error.patch btrfs-fix-use-after-free-during-inode-eviction.patch btrfs-fix-use-after-free-when-dumping-free-space.patch btrfs-move-the-dio_sem-higher-up-the-callchain.patch btrfs-only-free-reserved-extent-if-we-didn-t-insert-it.patch btrfs-set-max_extent_size-properly.patch net-sched-remove-tca_options-from-policy.patch --- diff --git a/queue-4.14/bpf-wait-for-running-bpf-programs-when-updating-map-in-map.patch b/queue-4.14/bpf-wait-for-running-bpf-programs-when-updating-map-in-map.patch new file mode 100644 index 00000000000..2e8a36929f5 --- /dev/null +++ b/queue-4.14/bpf-wait-for-running-bpf-programs-when-updating-map-in-map.patch @@ -0,0 +1,66 @@ +From 1ae80cf31938c8f77c37a29bbe29e7f1cd492be8 Mon Sep 17 00:00:00 2001 +From: Daniel Colascione +Date: Fri, 12 Oct 2018 03:54:27 -0700 +Subject: bpf: wait for running BPF programs when updating map-in-map + +From: Daniel Colascione + +commit 1ae80cf31938c8f77c37a29bbe29e7f1cd492be8 upstream. + +The map-in-map frequently serves as a mechanism for atomic +snapshotting of state that a BPF program might record. The current +implementation is dangerous to use in this way, however, since +userspace has no way of knowing when all programs that might have +retrieved the "old" value of the map may have completed. + +This change ensures that map update operations on map-in-map map types +always wait for all references to the old map to drop before returning +to userspace. + +Signed-off-by: Daniel Colascione +Reviewed-by: Joel Fernandes (Google) +Signed-off-by: Alexei Starovoitov +[fengc@google.com: 4.14 backport: adjust context] +Signed-off-by: Chenbo Feng +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/bpf/syscall.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -519,6 +519,17 @@ err_put: + return err; + } + ++static void maybe_wait_bpf_programs(struct bpf_map *map) ++{ ++ /* Wait for any running BPF programs to complete so that ++ * userspace, when we return to it, knows that all programs ++ * that could be running use the new map value. ++ */ ++ if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || ++ map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) ++ synchronize_rcu(); ++} ++ + #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags + + static int map_update_elem(union bpf_attr *attr) +@@ -592,6 +603,7 @@ static int map_update_elem(union bpf_att + } + __this_cpu_dec(bpf_prog_active); + preempt_enable(); ++ maybe_wait_bpf_programs(map); + + if (!err) + trace_bpf_map_update_elem(map, ufd, key, value); +@@ -636,6 +648,7 @@ static int map_delete_elem(union bpf_att + rcu_read_unlock(); + __this_cpu_dec(bpf_prog_active); + preempt_enable(); ++ maybe_wait_bpf_programs(map); + + if (!err) + trace_bpf_map_delete_elem(map, ufd, key); diff --git a/queue-4.14/btrfs-don-t-run-delayed_iputs-in-commit.patch b/queue-4.14/btrfs-don-t-run-delayed_iputs-in-commit.patch new file mode 100644 index 00000000000..ac92b27eb98 --- /dev/null +++ b/queue-4.14/btrfs-don-t-run-delayed_iputs-in-commit.patch @@ -0,0 +1,52 @@ +From 30928e9baac238a7330085a1c5747f0b5df444b4 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 11 Oct 2018 15:54:31 -0400 +Subject: btrfs: don't run delayed_iputs in commit + +From: Josef Bacik + +commit 30928e9baac238a7330085a1c5747f0b5df444b4 upstream. + +This could result in a really bad case where we do something like + +evict + evict_refill_and_join + btrfs_commit_transaction + btrfs_run_delayed_iputs + evict + evict_refill_and_join + btrfs_commit_transaction +... forever + +We have plenty of other places where we run delayed iputs that are much +safer, let those do the work. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/transaction.c | 9 --------- + 1 file changed, 9 deletions(-) + +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -2307,15 +2307,6 @@ int btrfs_commit_transaction(struct btrf + + kmem_cache_free(btrfs_trans_handle_cachep, trans); + +- /* +- * If fs has been frozen, we can not handle delayed iputs, otherwise +- * it'll result in deadlock about SB_FREEZE_FS. +- */ +- if (current != fs_info->transaction_kthread && +- current != fs_info->cleaner_kthread && +- !test_bit(BTRFS_FS_FROZEN, &fs_info->flags)) +- btrfs_run_delayed_iputs(fs_info); +- + return ret; + + scrub_continue: diff --git a/queue-4.14/btrfs-don-t-use-ctl-free_space-for-max_extent_size.patch b/queue-4.14/btrfs-don-t-use-ctl-free_space-for-max_extent_size.patch new file mode 100644 index 00000000000..11b66cbc2d1 --- /dev/null +++ b/queue-4.14/btrfs-don-t-use-ctl-free_space-for-max_extent_size.patch @@ -0,0 +1,56 @@ +From fb5c39d7a887108087de6ff93d3f326b01b4ef41 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 11 Oct 2018 15:54:09 -0400 +Subject: btrfs: don't use ctl->free_space for max_extent_size + +From: Josef Bacik + +commit fb5c39d7a887108087de6ff93d3f326b01b4ef41 upstream. + +max_extent_size is supposed to be the largest contiguous range for the +space info, and ctl->free_space is the total free space in the block +group. We need to keep track of these separately and _only_ use the +max_free_space if we don't have a max_extent_size, as that means our +original request was too large to search any of the block groups for and +therefore wouldn't have a max_extent_size set. + +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent-tree.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -7573,6 +7573,7 @@ static noinline int find_free_extent(str + struct btrfs_block_group_cache *block_group = NULL; + u64 search_start = 0; + u64 max_extent_size = 0; ++ u64 max_free_space = 0; + u64 empty_cluster = 0; + struct btrfs_space_info *space_info; + int loop = 0; +@@ -7867,8 +7868,8 @@ unclustered_alloc: + spin_lock(&ctl->tree_lock); + if (ctl->free_space < + num_bytes + empty_cluster + empty_size) { +- if (ctl->free_space > max_extent_size) +- max_extent_size = ctl->free_space; ++ max_free_space = max(max_free_space, ++ ctl->free_space); + spin_unlock(&ctl->tree_lock); + goto loop; + } +@@ -8037,6 +8038,8 @@ loop: + } + out: + if (ret == -ENOSPC) { ++ if (!max_extent_size) ++ max_extent_size = max_free_space; + spin_lock(&space_info->lock); + space_info->max_extent_size = max_extent_size; + spin_unlock(&space_info->lock); diff --git a/queue-4.14/btrfs-fix-assertion-on-fsync-of-regular-file-when-using-no-holes-feature.patch b/queue-4.14/btrfs-fix-assertion-on-fsync-of-regular-file-when-using-no-holes-feature.patch new file mode 100644 index 00000000000..5dfcef286a0 --- /dev/null +++ b/queue-4.14/btrfs-fix-assertion-on-fsync-of-regular-file-when-using-no-holes-feature.patch @@ -0,0 +1,59 @@ +From 7ed586d0a8241e81d58c656c5b315f781fa6fc97 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 15 Oct 2018 09:51:00 +0100 +Subject: Btrfs: fix assertion on fsync of regular file when using no-holes feature + +From: Filipe Manana + +commit 7ed586d0a8241e81d58c656c5b315f781fa6fc97 upstream. + +When using the NO_HOLES feature and logging a regular file, we were +expecting that if we find an inline extent, that either its size in RAM +(uncompressed and unenconded) matches the size of the file or if it does +not, that it matches the sector size and it represents compressed data. +This assertion does not cover a case where the length of the inline extent +is smaller than the sector size and also smaller the file's size, such +case is possible through fallocate. Example: + + $ mkfs.btrfs -f -O no-holes /dev/sdb + $ mount /dev/sdb /mnt + + $ xfs_io -f -c "pwrite -S 0xb60 0 21" /mnt/foobar + $ xfs_io -c "falloc 40 40" /mnt/foobar + $ xfs_io -c "fsync" /mnt/foobar + +In the above example we trigger the assertion because the inline extent's +length is 21 bytes while the file size is 80 bytes. The fallocate() call +merely updated the file's size and did not touch the existing inline +extent, as expected. + +So fix this by adjusting the assertion so that an inline extent length +smaller than the file size is valid if the file size is smaller than the +filesystem's sector size. + +A test case for fstests follows soon. + +Reported-by: Anatoly Trosinenko +Fixes: a89ca6f24ffe ("Btrfs: fix fsync after truncate when no_holes feature is enabled") +CC: stable@vger.kernel.org # 4.14+ +Link: https://lore.kernel.org/linux-btrfs/CAE5jQCfRSBC7n4pUTFJcmHh109=gwyT9mFkCOL+NKfzswmR=_Q@mail.gmail.com/ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/tree-log.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -4641,7 +4641,8 @@ static int btrfs_log_trailing_hole(struc + ASSERT(len == i_size || + (len == fs_info->sectorsize && + btrfs_file_extent_compression(leaf, extent) != +- BTRFS_COMPRESS_NONE)); ++ BTRFS_COMPRESS_NONE) || ++ (len < i_size && i_size < fs_info->sectorsize)); + return 0; + } + diff --git a/queue-4.14/btrfs-fix-fsync-after-hole-punching-when-using-no-holes-feature.patch b/queue-4.14/btrfs-fix-fsync-after-hole-punching-when-using-no-holes-feature.patch new file mode 100644 index 00000000000..1f210d8bbe4 --- /dev/null +++ b/queue-4.14/btrfs-fix-fsync-after-hole-punching-when-using-no-holes-feature.patch @@ -0,0 +1,97 @@ +From 4ee3fad34a9cc2cf33303dfbd0cf554248651c86 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 26 Mar 2018 23:59:00 +0100 +Subject: Btrfs: fix fsync after hole punching when using no-holes feature + +From: Filipe Manana + +commit 4ee3fad34a9cc2cf33303dfbd0cf554248651c86 upstream. + +When we have the no-holes mode enabled and fsync a file after punching a +hole in it, we can end up not logging the whole hole range in the log tree. +This happens if the file has extent items that span more than one leaf and +we punch a hole that covers a range that starts in a leaf but does not go +beyond the offset of the first extent in the next leaf. + +Example: + + $ mkfs.btrfs -f -O no-holes -n 65536 /dev/sdb + $ mount /dev/sdb /mnt + $ for ((i = 0; i <= 831; i++)); do + offset=$((i * 2 * 256 * 1024)) + xfs_io -f -c "pwrite -S 0xab -b 256K $offset 256K" \ + /mnt/foobar >/dev/null + done + $ sync + + # We now have 2 leafs in our filesystem fs tree, the first leaf has an + # item corresponding the extent at file offset 216530944 and the second + # leaf has a first item corresponding to the extent at offset 217055232. + # Now we punch a hole that partially covers the range of the extent at + # offset 216530944 but does go beyond the offset 217055232. + + $ xfs_io -c "fpunch $((216530944 + 128 * 1024 - 4000)) 256K" /mnt/foobar + $ xfs_io -c "fsync" /mnt/foobar + + + + # mount to replay the log + $ mount /dev/sdb /mnt + + # Before this patch, only the subrange [216658016, 216662016[ (length of + # 4000 bytes) was logged, leaving an incorrect file layout after log + # replay. + +Fix this by checking if there is a hole between the last extent item that +we processed and the first extent item in the next leaf, and if there is +one, log an explicit hole extent item. + +Fixes: 16e7549f045d ("Btrfs: incompatible format change to remove hole extents") +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/tree-log.c | 30 ++++++++++++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -3978,6 +3978,36 @@ fill_holes: + break; + *last_extent = extent_end; + } ++ ++ /* ++ * Check if there is a hole between the last extent found in our leaf ++ * and the first extent in the next leaf. If there is one, we need to ++ * log an explicit hole so that at replay time we can punch the hole. ++ */ ++ if (ret == 0 && ++ key.objectid == btrfs_ino(inode) && ++ key.type == BTRFS_EXTENT_DATA_KEY && ++ i == btrfs_header_nritems(src_path->nodes[0])) { ++ ret = btrfs_next_leaf(inode->root, src_path); ++ need_find_last_extent = true; ++ if (ret > 0) { ++ ret = 0; ++ } else if (ret == 0) { ++ btrfs_item_key_to_cpu(src_path->nodes[0], &key, ++ src_path->slots[0]); ++ if (key.objectid == btrfs_ino(inode) && ++ key.type == BTRFS_EXTENT_DATA_KEY && ++ *last_extent < key.offset) { ++ const u64 len = key.offset - *last_extent; ++ ++ ret = btrfs_insert_file_extent(trans, log, ++ btrfs_ino(inode), ++ *last_extent, 0, ++ 0, len, 0, len, ++ 0, 0, 0); ++ } ++ } ++ } + /* + * Need to let the callers know we dropped the path so they should + * re-search. diff --git a/queue-4.14/btrfs-fix-null-pointer-dereference-on-compressed-write-path-error.patch b/queue-4.14/btrfs-fix-null-pointer-dereference-on-compressed-write-path-error.patch new file mode 100644 index 00000000000..634529b3557 --- /dev/null +++ b/queue-4.14/btrfs-fix-null-pointer-dereference-on-compressed-write-path-error.patch @@ -0,0 +1,46 @@ +From 3527a018c00e5dbada2f9d7ed5576437b6dd5cfb Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Sat, 13 Oct 2018 00:37:25 +0100 +Subject: Btrfs: fix null pointer dereference on compressed write path error + +From: Filipe Manana + +commit 3527a018c00e5dbada2f9d7ed5576437b6dd5cfb upstream. + +At inode.c:compress_file_range(), under the "free_pages_out" label, we can +end up dereferencing the "pages" pointer when it has a NULL value. This +case happens when "start" has a value of 0 and we fail to allocate memory +for the "pages" pointer. When that happens we jump to the "cont" label and +then enter the "if (start == 0)" branch where we immediately call the +cow_file_range_inline() function. If that function returns 0 (success +creating an inline extent) or an error (like -ENOMEM for example) we jump +to the "free_pages_out" label and then access "pages[i]" leading to a NULL +pointer dereference, since "nr_pages" has a value greater than zero at +that point. + +Fix this by setting "nr_pages" to 0 when we fail to allocate memory for +the "pages" pointer. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201119 +Fixes: 771ed689d2cd ("Btrfs: Optimize compressed writeback and reads") +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Liu Bo +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -524,6 +524,7 @@ again: + pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); + if (!pages) { + /* just bail out to the uncompressed code */ ++ nr_pages = 0; + goto cont; + } + diff --git a/queue-4.14/btrfs-fix-use-after-free-during-inode-eviction.patch b/queue-4.14/btrfs-fix-use-after-free-during-inode-eviction.patch new file mode 100644 index 00000000000..ef5e7ae0a82 --- /dev/null +++ b/queue-4.14/btrfs-fix-use-after-free-during-inode-eviction.patch @@ -0,0 +1,61 @@ +From 421f0922a2cfb0c75acd9746454aaa576c711a65 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 12 Oct 2018 13:02:48 +0100 +Subject: Btrfs: fix use-after-free during inode eviction + +From: Filipe Manana + +commit 421f0922a2cfb0c75acd9746454aaa576c711a65 upstream. + +At inode.c:evict_inode_truncate_pages(), when we iterate over the +inode's extent states, we access an extent state record's "state" field +after we unlocked the inode's io tree lock. This can lead to a +use-after-free issue because after we unlock the io tree that extent +state record might have been freed due to being merged into another +adjacent extent state record (a previous inflight bio for a read +operation finished in the meanwhile which unlocked a range in the io +tree and cause a merge of extent state records, as explained in the +comment before the while loop added in commit 6ca0709756710 ("Btrfs: fix +hang during inode eviction due to concurrent readahead")). + +Fix this by keeping a copy of the extent state's flags in a local +variable and using it after unlocking the io tree. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201189 +Fixes: b9d0b38928e2 ("btrfs: Add handler for invalidate page") +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -5335,11 +5335,13 @@ static void evict_inode_truncate_pages(s + struct extent_state *cached_state = NULL; + u64 start; + u64 end; ++ unsigned state_flags; + + node = rb_first(&io_tree->state); + state = rb_entry(node, struct extent_state, rb_node); + start = state->start; + end = state->end; ++ state_flags = state->state; + spin_unlock(&io_tree->lock); + + lock_extent_bits(io_tree, start, end, &cached_state); +@@ -5352,7 +5354,7 @@ static void evict_inode_truncate_pages(s + * + * Note, end is the bytenr of last byte, so we need + 1 here. + */ +- if (state->state & EXTENT_DELALLOC) ++ if (state_flags & EXTENT_DELALLOC) + btrfs_qgroup_free_data(inode, NULL, start, end - start + 1); + + clear_extent_bit(io_tree, start, end, diff --git a/queue-4.14/btrfs-fix-use-after-free-when-dumping-free-space.patch b/queue-4.14/btrfs-fix-use-after-free-when-dumping-free-space.patch new file mode 100644 index 00000000000..4b15e798d4d --- /dev/null +++ b/queue-4.14/btrfs-fix-use-after-free-when-dumping-free-space.patch @@ -0,0 +1,221 @@ +From 9084cb6a24bf5838a665af92ded1af8363f9e563 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 22 Oct 2018 10:43:06 +0100 +Subject: Btrfs: fix use-after-free when dumping free space + +From: Filipe Manana + +commit 9084cb6a24bf5838a665af92ded1af8363f9e563 upstream. + +We were iterating a block group's free space cache rbtree without locking +first the lock that protects it (the free_space_ctl->free_space_offset +rbtree is protected by the free_space_ctl->tree_lock spinlock). + +KASAN reported an use-after-free problem when iterating such a rbtree due +to a concurrent rbtree delete: + +[ 9520.359168] ================================================================== +[ 9520.359656] BUG: KASAN: use-after-free in rb_next+0x13/0x90 +[ 9520.359949] Read of size 8 at addr ffff8800b7ada500 by task btrfs-transacti/1721 +[ 9520.360357] +[ 9520.360530] CPU: 4 PID: 1721 Comm: btrfs-transacti Tainted: G L 4.19.0-rc8-nbor #555 +[ 9520.360990] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 +[ 9520.362682] Call Trace: +[ 9520.362887] dump_stack+0xa4/0xf5 +[ 9520.363146] print_address_description+0x78/0x280 +[ 9520.363412] kasan_report+0x263/0x390 +[ 9520.363650] ? rb_next+0x13/0x90 +[ 9520.363873] __asan_load8+0x54/0x90 +[ 9520.364102] rb_next+0x13/0x90 +[ 9520.364380] btrfs_dump_free_space+0x146/0x160 [btrfs] +[ 9520.364697] dump_space_info+0x2cd/0x310 [btrfs] +[ 9520.364997] btrfs_reserve_extent+0x1ee/0x1f0 [btrfs] +[ 9520.365310] __btrfs_prealloc_file_range+0x1cc/0x620 [btrfs] +[ 9520.365646] ? btrfs_update_time+0x180/0x180 [btrfs] +[ 9520.365923] ? _raw_spin_unlock+0x27/0x40 +[ 9520.366204] ? btrfs_alloc_data_chunk_ondemand+0x2c0/0x5c0 [btrfs] +[ 9520.366549] btrfs_prealloc_file_range_trans+0x23/0x30 [btrfs] +[ 9520.366880] cache_save_setup+0x42e/0x580 [btrfs] +[ 9520.367220] ? btrfs_check_data_free_space+0xd0/0xd0 [btrfs] +[ 9520.367518] ? lock_downgrade+0x2f0/0x2f0 +[ 9520.367799] ? btrfs_write_dirty_block_groups+0x11f/0x6e0 [btrfs] +[ 9520.368104] ? kasan_check_read+0x11/0x20 +[ 9520.368349] ? do_raw_spin_unlock+0xa8/0x140 +[ 9520.368638] btrfs_write_dirty_block_groups+0x2af/0x6e0 [btrfs] +[ 9520.368978] ? btrfs_start_dirty_block_groups+0x870/0x870 [btrfs] +[ 9520.369282] ? do_raw_spin_unlock+0xa8/0x140 +[ 9520.369534] ? _raw_spin_unlock+0x27/0x40 +[ 9520.369811] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] +[ 9520.370137] commit_cowonly_roots+0x4b9/0x610 [btrfs] +[ 9520.370560] ? commit_fs_roots+0x350/0x350 [btrfs] +[ 9520.370926] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] +[ 9520.371285] btrfs_commit_transaction+0x5e5/0x10e0 [btrfs] +[ 9520.371612] ? btrfs_apply_pending_changes+0x90/0x90 [btrfs] +[ 9520.371943] ? start_transaction+0x168/0x6c0 [btrfs] +[ 9520.372257] transaction_kthread+0x21c/0x240 [btrfs] +[ 9520.372537] kthread+0x1d2/0x1f0 +[ 9520.372793] ? btrfs_cleanup_transaction+0xb50/0xb50 [btrfs] +[ 9520.373090] ? kthread_park+0xb0/0xb0 +[ 9520.373329] ret_from_fork+0x3a/0x50 +[ 9520.373567] +[ 9520.373738] Allocated by task 1804: +[ 9520.373974] kasan_kmalloc+0xff/0x180 +[ 9520.374208] kasan_slab_alloc+0x11/0x20 +[ 9520.374447] kmem_cache_alloc+0xfc/0x2d0 +[ 9520.374731] __btrfs_add_free_space+0x40/0x580 [btrfs] +[ 9520.375044] unpin_extent_range+0x4f7/0x7a0 [btrfs] +[ 9520.375383] btrfs_finish_extent_commit+0x15f/0x4d0 [btrfs] +[ 9520.375707] btrfs_commit_transaction+0xb06/0x10e0 [btrfs] +[ 9520.376027] btrfs_alloc_data_chunk_ondemand+0x237/0x5c0 [btrfs] +[ 9520.376365] btrfs_check_data_free_space+0x81/0xd0 [btrfs] +[ 9520.376689] btrfs_delalloc_reserve_space+0x25/0x80 [btrfs] +[ 9520.377018] btrfs_direct_IO+0x42e/0x6d0 [btrfs] +[ 9520.377284] generic_file_direct_write+0x11e/0x220 +[ 9520.377587] btrfs_file_write_iter+0x472/0xac0 [btrfs] +[ 9520.377875] aio_write+0x25c/0x360 +[ 9520.378106] io_submit_one+0xaa0/0xdc0 +[ 9520.378343] __se_sys_io_submit+0xfa/0x2f0 +[ 9520.378589] __x64_sys_io_submit+0x43/0x50 +[ 9520.378840] do_syscall_64+0x7d/0x240 +[ 9520.379081] entry_SYSCALL_64_after_hwframe+0x49/0xbe +[ 9520.379387] +[ 9520.379557] Freed by task 1802: +[ 9520.379782] __kasan_slab_free+0x173/0x260 +[ 9520.380028] kasan_slab_free+0xe/0x10 +[ 9520.380262] kmem_cache_free+0xc1/0x2c0 +[ 9520.380544] btrfs_find_space_for_alloc+0x4cd/0x4e0 [btrfs] +[ 9520.380866] find_free_extent+0xa99/0x17e0 [btrfs] +[ 9520.381166] btrfs_reserve_extent+0xd5/0x1f0 [btrfs] +[ 9520.381474] btrfs_get_blocks_direct+0x60b/0xbd0 [btrfs] +[ 9520.381761] __blockdev_direct_IO+0x10ee/0x58a1 +[ 9520.382059] btrfs_direct_IO+0x25a/0x6d0 [btrfs] +[ 9520.382321] generic_file_direct_write+0x11e/0x220 +[ 9520.382623] btrfs_file_write_iter+0x472/0xac0 [btrfs] +[ 9520.382904] aio_write+0x25c/0x360 +[ 9520.383172] io_submit_one+0xaa0/0xdc0 +[ 9520.383416] __se_sys_io_submit+0xfa/0x2f0 +[ 9520.383678] __x64_sys_io_submit+0x43/0x50 +[ 9520.383927] do_syscall_64+0x7d/0x240 +[ 9520.384165] entry_SYSCALL_64_after_hwframe+0x49/0xbe +[ 9520.384439] +[ 9520.384610] The buggy address belongs to the object at ffff8800b7ada500 + which belongs to the cache btrfs_free_space of size 72 +[ 9520.385175] The buggy address is located 0 bytes inside of + 72-byte region [ffff8800b7ada500, ffff8800b7ada548) +[ 9520.385691] The buggy address belongs to the page: +[ 9520.385957] page:ffffea0002deb680 count:1 mapcount:0 mapping:ffff880108a1d700 index:0x0 compound_mapcount: 0 +[ 9520.388030] flags: 0x8100(slab|head) +[ 9520.388281] raw: 0000000000008100 ffffea0002deb608 ffffea0002728808 ffff880108a1d700 +[ 9520.388722] raw: 0000000000000000 0000000000130013 00000001ffffffff 0000000000000000 +[ 9520.389169] page dumped because: kasan: bad access detected +[ 9520.389473] +[ 9520.389658] Memory state around the buggy address: +[ 9520.389943] ffff8800b7ada400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 9520.390368] ffff8800b7ada480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 9520.390796] >ffff8800b7ada500: fb fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc +[ 9520.391223] ^ +[ 9520.391461] ffff8800b7ada580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 9520.391885] ffff8800b7ada600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 9520.392313] ================================================================== +[ 9520.392772] BTRFS critical (device vdc): entry offset 2258497536, bytes 131072, bitmap no +[ 9520.393247] BUG: unable to handle kernel NULL pointer dereference at 0000000000000011 +[ 9520.393705] PGD 800000010dbab067 P4D 800000010dbab067 PUD 107551067 PMD 0 +[ 9520.394059] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI +[ 9520.394378] CPU: 4 PID: 1721 Comm: btrfs-transacti Tainted: G B L 4.19.0-rc8-nbor #555 +[ 9520.394858] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 +[ 9520.395350] RIP: 0010:rb_next+0x3c/0x90 +[ 9520.396461] RSP: 0018:ffff8801074ff780 EFLAGS: 00010292 +[ 9520.396762] RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff81b5ac4c +[ 9520.397115] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 0000000000000011 +[ 9520.397468] RBP: ffff8801074ff7a0 R08: ffffed0021d64ccc R09: ffffed0021d64ccc +[ 9520.397821] R10: 0000000000000001 R11: ffffed0021d64ccb R12: ffff8800b91e0000 +[ 9520.398188] R13: ffff8800a3ceba48 R14: ffff8800b627bf80 R15: 0000000000020000 +[ 9520.398555] FS: 0000000000000000(0000) GS:ffff88010eb00000(0000) knlGS:0000000000000000 +[ 9520.399007] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 9520.399335] CR2: 0000000000000011 CR3: 0000000106b52000 CR4: 00000000000006a0 +[ 9520.399679] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 9520.400023] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 9520.400400] Call Trace: +[ 9520.400648] btrfs_dump_free_space+0x146/0x160 [btrfs] +[ 9520.400974] dump_space_info+0x2cd/0x310 [btrfs] +[ 9520.401287] btrfs_reserve_extent+0x1ee/0x1f0 [btrfs] +[ 9520.401609] __btrfs_prealloc_file_range+0x1cc/0x620 [btrfs] +[ 9520.401952] ? btrfs_update_time+0x180/0x180 [btrfs] +[ 9520.402232] ? _raw_spin_unlock+0x27/0x40 +[ 9520.402522] ? btrfs_alloc_data_chunk_ondemand+0x2c0/0x5c0 [btrfs] +[ 9520.402882] btrfs_prealloc_file_range_trans+0x23/0x30 [btrfs] +[ 9520.403261] cache_save_setup+0x42e/0x580 [btrfs] +[ 9520.403570] ? btrfs_check_data_free_space+0xd0/0xd0 [btrfs] +[ 9520.403871] ? lock_downgrade+0x2f0/0x2f0 +[ 9520.404161] ? btrfs_write_dirty_block_groups+0x11f/0x6e0 [btrfs] +[ 9520.404481] ? kasan_check_read+0x11/0x20 +[ 9520.404732] ? do_raw_spin_unlock+0xa8/0x140 +[ 9520.405026] btrfs_write_dirty_block_groups+0x2af/0x6e0 [btrfs] +[ 9520.405375] ? btrfs_start_dirty_block_groups+0x870/0x870 [btrfs] +[ 9520.405694] ? do_raw_spin_unlock+0xa8/0x140 +[ 9520.405958] ? _raw_spin_unlock+0x27/0x40 +[ 9520.406243] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] +[ 9520.406574] commit_cowonly_roots+0x4b9/0x610 [btrfs] +[ 9520.406899] ? commit_fs_roots+0x350/0x350 [btrfs] +[ 9520.407253] ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs] +[ 9520.407589] btrfs_commit_transaction+0x5e5/0x10e0 [btrfs] +[ 9520.407925] ? btrfs_apply_pending_changes+0x90/0x90 [btrfs] +[ 9520.408262] ? start_transaction+0x168/0x6c0 [btrfs] +[ 9520.408582] transaction_kthread+0x21c/0x240 [btrfs] +[ 9520.408870] kthread+0x1d2/0x1f0 +[ 9520.409138] ? btrfs_cleanup_transaction+0xb50/0xb50 [btrfs] +[ 9520.409440] ? kthread_park+0xb0/0xb0 +[ 9520.409682] ret_from_fork+0x3a/0x50 +[ 9520.410508] Dumping ftrace buffer: +[ 9520.410764] (ftrace buffer empty) +[ 9520.411007] CR2: 0000000000000011 +[ 9520.411297] ---[ end trace 01a0863445cf360a ]--- +[ 9520.411568] RIP: 0010:rb_next+0x3c/0x90 +[ 9520.412644] RSP: 0018:ffff8801074ff780 EFLAGS: 00010292 +[ 9520.412932] RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff81b5ac4c +[ 9520.413274] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 0000000000000011 +[ 9520.413616] RBP: ffff8801074ff7a0 R08: ffffed0021d64ccc R09: ffffed0021d64ccc +[ 9520.414007] R10: 0000000000000001 R11: ffffed0021d64ccb R12: ffff8800b91e0000 +[ 9520.414349] R13: ffff8800a3ceba48 R14: ffff8800b627bf80 R15: 0000000000020000 +[ 9520.416074] FS: 0000000000000000(0000) GS:ffff88010eb00000(0000) knlGS:0000000000000000 +[ 9520.416536] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 9520.416848] CR2: 0000000000000011 CR3: 0000000106b52000 CR4: 00000000000006a0 +[ 9520.418477] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 9520.418846] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 9520.419204] Kernel panic - not syncing: Fatal exception +[ 9520.419666] Dumping ftrace buffer: +[ 9520.419930] (ftrace buffer empty) +[ 9520.420168] Kernel Offset: disabled +[ 9520.420406] ---[ end Kernel panic - not syncing: Fatal exception ]--- + +Fix this by acquiring the respective lock before iterating the rbtree. + +Reported-by: Nikolay Borisov +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/free-space-cache.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/btrfs/free-space-cache.c ++++ b/fs/btrfs/free-space-cache.c +@@ -2482,6 +2482,7 @@ void btrfs_dump_free_space(struct btrfs_ + struct rb_node *n; + int count = 0; + ++ spin_lock(&ctl->tree_lock); + for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { + info = rb_entry(n, struct btrfs_free_space, offset_index); + if (info->bytes >= bytes && !block_group->ro) +@@ -2490,6 +2491,7 @@ void btrfs_dump_free_space(struct btrfs_ + info->offset, info->bytes, + (info->bitmap) ? "yes" : "no"); + } ++ spin_unlock(&ctl->tree_lock); + btrfs_info(fs_info, "block group has cluster?: %s", + list_empty(&block_group->cluster_list) ? "no" : "yes"); + btrfs_info(fs_info, diff --git a/queue-4.14/btrfs-move-the-dio_sem-higher-up-the-callchain.patch b/queue-4.14/btrfs-move-the-dio_sem-higher-up-the-callchain.patch new file mode 100644 index 00000000000..08c8be35ad7 --- /dev/null +++ b/queue-4.14/btrfs-move-the-dio_sem-higher-up-the-callchain.patch @@ -0,0 +1,239 @@ +From c495144bc6962186feae31d687596d2472000e45 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 12 Oct 2018 15:32:32 -0400 +Subject: btrfs: move the dio_sem higher up the callchain + +From: Josef Bacik + +commit c495144bc6962186feae31d687596d2472000e45 upstream. + +We're getting a lockdep splat because we take the dio_sem under the +log_mutex. What we really need is to protect fsync() from logging an +extent map for an extent we never waited on higher up, so just guard the +whole thing with dio_sem. + +====================================================== +WARNING: possible circular locking dependency detected +4.18.0-rc4-xfstests-00025-g5de5edbaf1d4 #411 Not tainted +------------------------------------------------------ +aio-dio-invalid/30928 is trying to acquire lock: +0000000092621cfd (&mm->mmap_sem){++++}, at: get_user_pages_unlocked+0x5a/0x1e0 + +but task is already holding lock: +00000000cefe6b35 (&ei->dio_sem){++++}, at: btrfs_direct_IO+0x3be/0x400 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #5 (&ei->dio_sem){++++}: + lock_acquire+0xbd/0x220 + down_write+0x51/0xb0 + btrfs_log_changed_extents+0x80/0xa40 + btrfs_log_inode+0xbaf/0x1000 + btrfs_log_inode_parent+0x26f/0xa80 + btrfs_log_dentry_safe+0x50/0x70 + btrfs_sync_file+0x357/0x540 + do_fsync+0x38/0x60 + __ia32_sys_fdatasync+0x12/0x20 + do_fast_syscall_32+0x9a/0x2f0 + entry_SYSENTER_compat+0x84/0x96 + +-> #4 (&ei->log_mutex){+.+.}: + lock_acquire+0xbd/0x220 + __mutex_lock+0x86/0xa10 + btrfs_record_unlink_dir+0x2a/0xa0 + btrfs_unlink+0x5a/0xc0 + vfs_unlink+0xb1/0x1a0 + do_unlinkat+0x264/0x2b0 + do_fast_syscall_32+0x9a/0x2f0 + entry_SYSENTER_compat+0x84/0x96 + +-> #3 (sb_internal#2){.+.+}: + lock_acquire+0xbd/0x220 + __sb_start_write+0x14d/0x230 + start_transaction+0x3e6/0x590 + btrfs_evict_inode+0x475/0x640 + evict+0xbf/0x1b0 + btrfs_run_delayed_iputs+0x6c/0x90 + cleaner_kthread+0x124/0x1a0 + kthread+0x106/0x140 + ret_from_fork+0x3a/0x50 + +-> #2 (&fs_info->cleaner_delayed_iput_mutex){+.+.}: + lock_acquire+0xbd/0x220 + __mutex_lock+0x86/0xa10 + btrfs_alloc_data_chunk_ondemand+0x197/0x530 + btrfs_check_data_free_space+0x4c/0x90 + btrfs_delalloc_reserve_space+0x20/0x60 + btrfs_page_mkwrite+0x87/0x520 + do_page_mkwrite+0x31/0xa0 + __handle_mm_fault+0x799/0xb00 + handle_mm_fault+0x7c/0xe0 + __do_page_fault+0x1d3/0x4a0 + async_page_fault+0x1e/0x30 + +-> #1 (sb_pagefaults){.+.+}: + lock_acquire+0xbd/0x220 + __sb_start_write+0x14d/0x230 + btrfs_page_mkwrite+0x6a/0x520 + do_page_mkwrite+0x31/0xa0 + __handle_mm_fault+0x799/0xb00 + handle_mm_fault+0x7c/0xe0 + __do_page_fault+0x1d3/0x4a0 + async_page_fault+0x1e/0x30 + +-> #0 (&mm->mmap_sem){++++}: + __lock_acquire+0x42e/0x7a0 + lock_acquire+0xbd/0x220 + down_read+0x48/0xb0 + get_user_pages_unlocked+0x5a/0x1e0 + get_user_pages_fast+0xa4/0x150 + iov_iter_get_pages+0xc3/0x340 + do_direct_IO+0xf93/0x1d70 + __blockdev_direct_IO+0x32d/0x1c20 + btrfs_direct_IO+0x227/0x400 + generic_file_direct_write+0xcf/0x180 + btrfs_file_write_iter+0x308/0x58c + aio_write+0xf8/0x1d0 + io_submit_one+0x3a9/0x620 + __ia32_compat_sys_io_submit+0xb2/0x270 + do_int80_syscall_32+0x5b/0x1a0 + entry_INT80_compat+0x88/0xa0 + +other info that might help us debug this: + +Chain exists of: + &mm->mmap_sem --> &ei->log_mutex --> &ei->dio_sem + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(&ei->dio_sem); + lock(&ei->log_mutex); + lock(&ei->dio_sem); + lock(&mm->mmap_sem); + + *** DEADLOCK *** + +1 lock held by aio-dio-invalid/30928: + #0: 00000000cefe6b35 (&ei->dio_sem){++++}, at: btrfs_direct_IO+0x3be/0x400 + +stack backtrace: +CPU: 0 PID: 30928 Comm: aio-dio-invalid Not tainted 4.18.0-rc4-xfstests-00025-g5de5edbaf1d4 #411 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 +Call Trace: + dump_stack+0x7c/0xbb + print_circular_bug.isra.37+0x297/0x2a4 + check_prev_add.constprop.45+0x781/0x7a0 + ? __lock_acquire+0x42e/0x7a0 + validate_chain.isra.41+0x7f0/0xb00 + __lock_acquire+0x42e/0x7a0 + lock_acquire+0xbd/0x220 + ? get_user_pages_unlocked+0x5a/0x1e0 + down_read+0x48/0xb0 + ? get_user_pages_unlocked+0x5a/0x1e0 + get_user_pages_unlocked+0x5a/0x1e0 + get_user_pages_fast+0xa4/0x150 + iov_iter_get_pages+0xc3/0x340 + do_direct_IO+0xf93/0x1d70 + ? __alloc_workqueue_key+0x358/0x490 + ? __blockdev_direct_IO+0x14b/0x1c20 + __blockdev_direct_IO+0x32d/0x1c20 + ? btrfs_run_delalloc_work+0x40/0x40 + ? can_nocow_extent+0x490/0x490 + ? kvm_clock_read+0x1f/0x30 + ? can_nocow_extent+0x490/0x490 + ? btrfs_run_delalloc_work+0x40/0x40 + btrfs_direct_IO+0x227/0x400 + ? btrfs_run_delalloc_work+0x40/0x40 + generic_file_direct_write+0xcf/0x180 + btrfs_file_write_iter+0x308/0x58c + aio_write+0xf8/0x1d0 + ? kvm_clock_read+0x1f/0x30 + ? __might_fault+0x3e/0x90 + io_submit_one+0x3a9/0x620 + ? io_submit_one+0xe5/0x620 + __ia32_compat_sys_io_submit+0xb2/0x270 + do_int80_syscall_32+0x5b/0x1a0 + entry_INT80_compat+0x88/0xa0 + +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/file.c | 12 ++++++++++++ + fs/btrfs/tree-log.c | 2 -- + 2 files changed, 12 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -2078,6 +2078,14 @@ int btrfs_sync_file(struct file *file, l + goto out; + + inode_lock(inode); ++ ++ /* ++ * We take the dio_sem here because the tree log stuff can race with ++ * lockless dio writes and get an extent map logged for an extent we ++ * never waited on. We need it this high up for lockdep reasons. ++ */ ++ down_write(&BTRFS_I(inode)->dio_sem); ++ + atomic_inc(&root->log_batch); + full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags); +@@ -2129,6 +2137,7 @@ int btrfs_sync_file(struct file *file, l + ret = start_ordered_ops(inode, start, end); + } + if (ret) { ++ up_write(&BTRFS_I(inode)->dio_sem); + inode_unlock(inode); + goto out; + } +@@ -2184,6 +2193,7 @@ int btrfs_sync_file(struct file *file, l + * checked called fsync. + */ + ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err); ++ up_write(&BTRFS_I(inode)->dio_sem); + inode_unlock(inode); + goto out; + } +@@ -2208,6 +2218,7 @@ int btrfs_sync_file(struct file *file, l + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); ++ up_write(&BTRFS_I(inode)->dio_sem); + inode_unlock(inode); + goto out; + } +@@ -2229,6 +2240,7 @@ int btrfs_sync_file(struct file *file, l + * file again, but that will end up using the synchronization + * inside btrfs_sync_log to keep things safe. + */ ++ up_write(&BTRFS_I(inode)->dio_sem); + inode_unlock(inode); + + /* +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -4362,7 +4362,6 @@ static int btrfs_log_changed_extents(str + + INIT_LIST_HEAD(&extents); + +- down_write(&inode->dio_sem); + write_lock(&tree->lock); + test_gen = root->fs_info->last_trans_committed; + logged_start = start; +@@ -4443,7 +4442,6 @@ process: + } + WARN_ON(!list_empty(&extents)); + write_unlock(&tree->lock); +- up_write(&inode->dio_sem); + + btrfs_release_path(path); + if (!ret) diff --git a/queue-4.14/btrfs-only-free-reserved-extent-if-we-didn-t-insert-it.patch b/queue-4.14/btrfs-only-free-reserved-extent-if-we-didn-t-insert-it.patch new file mode 100644 index 00000000000..af6ae16a2c0 --- /dev/null +++ b/queue-4.14/btrfs-only-free-reserved-extent-if-we-didn-t-insert-it.patch @@ -0,0 +1,66 @@ +From 49940bdd57779c78462da7aa5a8650b2fea8c2ff Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 11 Oct 2018 15:54:21 -0400 +Subject: btrfs: only free reserved extent if we didn't insert it + +From: Josef Bacik + +commit 49940bdd57779c78462da7aa5a8650b2fea8c2ff upstream. + +When we insert the file extent once the ordered extent completes we free +the reserved extent reservation as it'll have been migrated to the +bytes_used counter. However if we error out after this step we'll still +clear the reserved extent reservation, resulting in a negative +accounting of the reserved bytes for the block group and space info. +Fix this by only doing the free if we didn't successfully insert a file +extent for this extent. + +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Omar Sandoval +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -2966,6 +2966,7 @@ static int btrfs_finish_ordered_io(struc + bool truncated = false; + bool range_locked = false; + bool clear_new_delalloc_bytes = false; ++ bool clear_reserved_extent = true; + + if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && + !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) && +@@ -3069,10 +3070,12 @@ static int btrfs_finish_ordered_io(struc + logical_len, logical_len, + compress_type, 0, 0, + BTRFS_FILE_EXTENT_REG); +- if (!ret) ++ if (!ret) { ++ clear_reserved_extent = false; + btrfs_release_delalloc_bytes(fs_info, + ordered_extent->start, + ordered_extent->disk_len); ++ } + } + unpin_extent_cache(&BTRFS_I(inode)->extent_tree, + ordered_extent->file_offset, ordered_extent->len, +@@ -3132,8 +3135,13 @@ out: + * wrong we need to return the space for this ordered extent + * back to the allocator. We only free the extent in the + * truncated case if we didn't write out the extent at all. ++ * ++ * If we made it past insert_reserved_file_extent before we ++ * errored out then we don't need to do this as the accounting ++ * has already been done. + */ + if ((ret || !logical_len) && ++ clear_reserved_extent && + !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && + !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) + btrfs_free_reserved_extent(fs_info, diff --git a/queue-4.14/btrfs-set-max_extent_size-properly.patch b/queue-4.14/btrfs-set-max_extent_size-properly.patch new file mode 100644 index 00000000000..4047bf6d04d --- /dev/null +++ b/queue-4.14/btrfs-set-max_extent_size-properly.patch @@ -0,0 +1,97 @@ +From ad22cf6ea47fa20fbe11ac324a0a15c0a9a4a2a9 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 12 Oct 2018 15:32:33 -0400 +Subject: btrfs: set max_extent_size properly + +From: Josef Bacik + +commit ad22cf6ea47fa20fbe11ac324a0a15c0a9a4a2a9 upstream. + +We can't use entry->bytes if our entry is a bitmap entry, we need to use +entry->max_extent_size in that case. Fix up all the logic to make this +consistent. + +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/free-space-cache.c | 30 ++++++++++++++++++++---------- + 1 file changed, 20 insertions(+), 10 deletions(-) + +--- a/fs/btrfs/free-space-cache.c ++++ b/fs/btrfs/free-space-cache.c +@@ -1795,6 +1795,13 @@ static int search_bitmap(struct btrfs_fr + return -1; + } + ++static inline u64 get_max_extent_size(struct btrfs_free_space *entry) ++{ ++ if (entry->bitmap) ++ return entry->max_extent_size; ++ return entry->bytes; ++} ++ + /* Cache the size of the max extent in bytes */ + static struct btrfs_free_space * + find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, +@@ -1816,8 +1823,8 @@ find_free_space(struct btrfs_free_space_ + for (node = &entry->offset_index; node; node = rb_next(node)) { + entry = rb_entry(node, struct btrfs_free_space, offset_index); + if (entry->bytes < *bytes) { +- if (entry->bytes > *max_extent_size) +- *max_extent_size = entry->bytes; ++ *max_extent_size = max(get_max_extent_size(entry), ++ *max_extent_size); + continue; + } + +@@ -1835,8 +1842,8 @@ find_free_space(struct btrfs_free_space_ + } + + if (entry->bytes < *bytes + align_off) { +- if (entry->bytes > *max_extent_size) +- *max_extent_size = entry->bytes; ++ *max_extent_size = max(get_max_extent_size(entry), ++ *max_extent_size); + continue; + } + +@@ -1848,8 +1855,10 @@ find_free_space(struct btrfs_free_space_ + *offset = tmp; + *bytes = size; + return entry; +- } else if (size > *max_extent_size) { +- *max_extent_size = size; ++ } else { ++ *max_extent_size = ++ max(get_max_extent_size(entry), ++ *max_extent_size); + } + continue; + } +@@ -2709,8 +2718,8 @@ static u64 btrfs_alloc_from_bitmap(struc + + err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); + if (err) { +- if (search_bytes > *max_extent_size) +- *max_extent_size = search_bytes; ++ *max_extent_size = max(get_max_extent_size(entry), ++ *max_extent_size); + return 0; + } + +@@ -2747,8 +2756,9 @@ u64 btrfs_alloc_from_cluster(struct btrf + + entry = rb_entry(node, struct btrfs_free_space, offset_index); + while (1) { +- if (entry->bytes < bytes && entry->bytes > *max_extent_size) +- *max_extent_size = entry->bytes; ++ if (entry->bytes < bytes) ++ *max_extent_size = max(get_max_extent_size(entry), ++ *max_extent_size); + + if (entry->bytes < bytes || + (!entry->bitmap && entry->offset < min_start)) { diff --git a/queue-4.14/net-sched-remove-tca_options-from-policy.patch b/queue-4.14/net-sched-remove-tca_options-from-policy.patch new file mode 100644 index 00000000000..d0fc0b57952 --- /dev/null +++ b/queue-4.14/net-sched-remove-tca_options-from-policy.patch @@ -0,0 +1,36 @@ +From e72bde6b66299602087c8c2350d36a525e75d06e Mon Sep 17 00:00:00 2001 +From: David Ahern +Date: Wed, 24 Oct 2018 08:32:49 -0700 +Subject: net: sched: Remove TCA_OPTIONS from policy + +From: David Ahern + +commit e72bde6b66299602087c8c2350d36a525e75d06e upstream. + +Marco reported an error with hfsc: +root@Calimero:~# tc qdisc add dev eth0 root handle 1:0 hfsc default 1 +Error: Attribute failed policy validation. + +Apparently a few implementations pass TCA_OPTIONS as a binary instead +of nested attribute, so drop TCA_OPTIONS from the policy. + +Fixes: 8b4c3cdd9dd8 ("net: sched: Add policy validation for tc attributes") +Reported-by: Marco Berizzi +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/sched/sch_api.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -1218,7 +1218,6 @@ check_loop_fn(struct Qdisc *q, unsigned + + const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { + [TCA_KIND] = { .type = NLA_STRING }, +- [TCA_OPTIONS] = { .type = NLA_NESTED }, + [TCA_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct tc_estimator) }, + [TCA_STAB] = { .type = NLA_NESTED }, diff --git a/queue-4.14/series b/queue-4.14/series index 87f41cb5b5b..199922c7c76 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -208,3 +208,15 @@ btrfs-make-sure-we-create-all-new-block-groups.patch btrfs-fix-warning-when-replaying-log-after-fsync-of-a-tmpfile.patch btrfs-fix-wrong-dentries-after-fsync-of-file-that-got-its-parent-replaced.patch btrfs-qgroup-dirty-all-qgroups-before-rescan.patch +btrfs-fix-null-pointer-dereference-on-compressed-write-path-error.patch +btrfs-fix-assertion-on-fsync-of-regular-file-when-using-no-holes-feature.patch +btrfs-set-max_extent_size-properly.patch +btrfs-don-t-use-ctl-free_space-for-max_extent_size.patch +btrfs-only-free-reserved-extent-if-we-didn-t-insert-it.patch +btrfs-don-t-run-delayed_iputs-in-commit.patch +btrfs-move-the-dio_sem-higher-up-the-callchain.patch +btrfs-fix-use-after-free-during-inode-eviction.patch +btrfs-fix-use-after-free-when-dumping-free-space.patch +btrfs-fix-fsync-after-hole-punching-when-using-no-holes-feature.patch +net-sched-remove-tca_options-from-policy.patch +bpf-wait-for-running-bpf-programs-when-updating-map-in-map.patch