From fb39a6cbbf702bddfa88957e7b6cbaba863ac9d6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 1 Mar 2016 10:32:30 -0800 Subject: [PATCH] 4.4-stable patches added patches: btrfs-add-missing-brelse-when-superblock-checksum-fails.patch btrfs-fix-number-of-transaction-units-required-to-create-symlink.patch btrfs-fix-transaction-handle-leak-on-failure-to-create-hard-link.patch btrfs-igrab-inode-in-writepage.patch btrfs-initialize-btrfs_root-highest_objectid-when-loading-tree-root-and-subvolume-roots.patch btrfs-initialize-the-seq-counter-in-struct-btrfs_device.patch btrfs-send-don-t-bug_on-when-an-empty-symlink-is-found.patch btrfs-statfs-report-zero-available-if-metadata-are-exhausted.patch --- ...relse-when-superblock-checksum-fails.patch | 32 ++++ ...ion-units-required-to-create-symlink.patch | 36 ++++ ...-leak-on-failure-to-create-hard-link.patch | 54 ++++++ .../btrfs-igrab-inode-in-writepage.patch | 59 ++++++ ...oading-tree-root-and-subvolume-roots.patch | 168 ++++++++++++++++++ ...e-seq-counter-in-struct-btrfs_device.patch | 45 +++++ ...ug_on-when-an-empty-symlink-is-found.patch | 61 +++++++ ...-available-if-metadata-are-exhausted.patch | 103 +++++++++++ queue-4.4/series | 8 + 9 files changed, 566 insertions(+) create mode 100644 queue-4.4/btrfs-add-missing-brelse-when-superblock-checksum-fails.patch create mode 100644 queue-4.4/btrfs-fix-number-of-transaction-units-required-to-create-symlink.patch create mode 100644 queue-4.4/btrfs-fix-transaction-handle-leak-on-failure-to-create-hard-link.patch create mode 100644 queue-4.4/btrfs-igrab-inode-in-writepage.patch create mode 100644 queue-4.4/btrfs-initialize-btrfs_root-highest_objectid-when-loading-tree-root-and-subvolume-roots.patch create mode 100644 queue-4.4/btrfs-initialize-the-seq-counter-in-struct-btrfs_device.patch create mode 100644 queue-4.4/btrfs-send-don-t-bug_on-when-an-empty-symlink-is-found.patch create mode 100644 queue-4.4/btrfs-statfs-report-zero-available-if-metadata-are-exhausted.patch diff --git a/queue-4.4/btrfs-add-missing-brelse-when-superblock-checksum-fails.patch b/queue-4.4/btrfs-add-missing-brelse-when-superblock-checksum-fails.patch new file mode 100644 index 00000000000..9f97b9e577c --- /dev/null +++ b/queue-4.4/btrfs-add-missing-brelse-when-superblock-checksum-fails.patch @@ -0,0 +1,32 @@ +From b2acdddfad13c38a1e8b927d83c3cf321f63601a Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Wed, 7 Oct 2015 17:23:23 +0800 +Subject: Btrfs: add missing brelse when superblock checksum fails + +From: Anand Jain + +commit b2acdddfad13c38a1e8b927d83c3cf321f63601a upstream. + +Looks like oversight, call brelse() when checksum fails. Further down the +code, in the non error path, we do call brelse() and so we don't see +brelse() in the goto error paths. + +Signed-off-by: Anand Jain +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/disk-io.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -2667,6 +2667,7 @@ int open_ctree(struct super_block *sb, + if (btrfs_check_super_csum(bh->b_data)) { + printk(KERN_ERR "BTRFS: superblock checksum mismatch\n"); + err = -EINVAL; ++ brelse(bh); + goto fail_alloc; + } + diff --git a/queue-4.4/btrfs-fix-number-of-transaction-units-required-to-create-symlink.patch b/queue-4.4/btrfs-fix-number-of-transaction-units-required-to-create-symlink.patch new file mode 100644 index 00000000000..3885983dcb1 --- /dev/null +++ b/queue-4.4/btrfs-fix-number-of-transaction-units-required-to-create-symlink.patch @@ -0,0 +1,36 @@ +From 9269d12b2d57d9e3d13036bb750762d1110d425c Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 31 Dec 2015 18:16:29 +0000 +Subject: Btrfs: fix number of transaction units required to create symlink + +From: Filipe Manana + +commit 9269d12b2d57d9e3d13036bb750762d1110d425c upstream. + +We weren't accounting for the insertion of an inline extent item for the +symlink inode nor that we need to update the parent inode item (through +the call to btrfs_add_nondir()). So fix this by including two more +transaction units. + +Signed-off-by: Filipe Manana +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -9663,9 +9663,11 @@ static int btrfs_symlink(struct inode *d + /* + * 2 items for inode item and ref + * 2 items for dir items ++ * 1 item for updating parent inode item ++ * 1 item for the inline extent item + * 1 item for xattr if selinux is on + */ +- trans = btrfs_start_transaction(root, 5); ++ trans = btrfs_start_transaction(root, 7); + if (IS_ERR(trans)) + return PTR_ERR(trans); + diff --git a/queue-4.4/btrfs-fix-transaction-handle-leak-on-failure-to-create-hard-link.patch b/queue-4.4/btrfs-fix-transaction-handle-leak-on-failure-to-create-hard-link.patch new file mode 100644 index 00000000000..4e469688300 --- /dev/null +++ b/queue-4.4/btrfs-fix-transaction-handle-leak-on-failure-to-create-hard-link.patch @@ -0,0 +1,54 @@ +From 271dba4521aed0c37c063548f876b49f5cd64b2e Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Tue, 5 Jan 2016 16:24:05 +0000 +Subject: Btrfs: fix transaction handle leak on failure to create hard link + +From: Filipe Manana + +commit 271dba4521aed0c37c063548f876b49f5cd64b2e upstream. + +If we failed to create a hard link we were not always releasing the +the transaction handle we got before, resulting in a memory leak and +preventing any other tasks from being able to commit the current +transaction. +Fix this by always releasing our transaction handle. + +Signed-off-by: Filipe Manana +Reviewed-by: Liu Bo +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -6493,7 +6493,7 @@ out_unlock_inode: + static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) + { +- struct btrfs_trans_handle *trans; ++ struct btrfs_trans_handle *trans = NULL; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct inode *inode = d_inode(old_dentry); + u64 index; +@@ -6519,6 +6519,7 @@ static int btrfs_link(struct dentry *old + trans = btrfs_start_transaction(root, 5); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); ++ trans = NULL; + goto fail; + } + +@@ -6552,9 +6553,10 @@ static int btrfs_link(struct dentry *old + btrfs_log_new_name(trans, inode, NULL, parent); + } + +- btrfs_end_transaction(trans, root); + btrfs_balance_delayed_items(root); + fail: ++ if (trans) ++ btrfs_end_transaction(trans, root); + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); diff --git a/queue-4.4/btrfs-igrab-inode-in-writepage.patch b/queue-4.4/btrfs-igrab-inode-in-writepage.patch new file mode 100644 index 00000000000..19cfa58e289 --- /dev/null +++ b/queue-4.4/btrfs-igrab-inode-in-writepage.patch @@ -0,0 +1,59 @@ +From be7bd730841e69fe8f70120098596f648cd1f3ff Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 22 Oct 2015 15:05:09 -0400 +Subject: Btrfs: igrab inode in writepage + +From: Josef Bacik + +commit be7bd730841e69fe8f70120098596f648cd1f3ff upstream. + +We hit this panic on a few of our boxes this week where we have an +ordered_extent with an NULL inode. We do an igrab() of the inode in writepages, +but weren't doing it in writepage which can be called directly from the VM on +dirty pages. If the inode has been unlinked then we could have I_FREEING set +which means igrab() would return NULL and we get this panic. Fix this by trying +to igrab in btrfs_writepage, and if it returns NULL then just redirty the page +and return AOP_WRITEPAGE_ACTIVATE; so the VM knows it wasn't successful. Thanks, + +Signed-off-by: Josef Bacik +Reviewed-by: Liu Bo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -8548,15 +8548,28 @@ int btrfs_readpage(struct file *file, st + static int btrfs_writepage(struct page *page, struct writeback_control *wbc) + { + struct extent_io_tree *tree; +- ++ struct inode *inode = page->mapping->host; ++ int ret; + + if (current->flags & PF_MEMALLOC) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } ++ ++ /* ++ * If we are under memory pressure we will call this directly from the ++ * VM, we need to make sure we have the inode referenced for the ordered ++ * extent. If not just return like we didn't do anything. ++ */ ++ if (!igrab(inode)) { ++ redirty_page_for_writepage(wbc, page); ++ return AOP_WRITEPAGE_ACTIVATE; ++ } + tree = &BTRFS_I(page->mapping->host)->io_tree; +- return extent_write_full_page(tree, page, btrfs_get_extent, wbc); ++ ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc); ++ btrfs_add_delayed_iput(inode); ++ return ret; + } + + static int btrfs_writepages(struct address_space *mapping, diff --git a/queue-4.4/btrfs-initialize-btrfs_root-highest_objectid-when-loading-tree-root-and-subvolume-roots.patch b/queue-4.4/btrfs-initialize-btrfs_root-highest_objectid-when-loading-tree-root-and-subvolume-roots.patch new file mode 100644 index 00000000000..a04695fb117 --- /dev/null +++ b/queue-4.4/btrfs-initialize-btrfs_root-highest_objectid-when-loading-tree-root-and-subvolume-roots.patch @@ -0,0 +1,168 @@ +From f32e48e925964c4f8ab917850788a87e1cef3bad Mon Sep 17 00:00:00 2001 +From: Chandan Rajendra +Date: Thu, 7 Jan 2016 18:56:59 +0530 +Subject: Btrfs: Initialize btrfs_root->highest_objectid when loading tree root and subvolume roots + +From: Chandan Rajendra + +commit f32e48e925964c4f8ab917850788a87e1cef3bad upstream. + +The following call trace is seen when btrfs/031 test is executed in a loop, + +[ 158.661848] ------------[ cut here ]------------ +[ 158.662634] WARNING: CPU: 2 PID: 890 at /home/chandan/repos/linux/fs/btrfs/ioctl.c:558 create_subvol+0x3d1/0x6ea() +[ 158.664102] BTRFS: Transaction aborted (error -2) +[ 158.664774] Modules linked in: +[ 158.665266] CPU: 2 PID: 890 Comm: btrfs Not tainted 4.4.0-rc6-g511711a #2 +[ 158.666251] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 +[ 158.667392] ffffffff81c0a6b0 ffff8806c7c4f8e8 ffffffff81431fc8 ffff8806c7c4f930 +[ 158.668515] ffff8806c7c4f920 ffffffff81051aa1 ffff880c85aff000 ffff8800bb44d000 +[ 158.669647] ffff8808863b5c98 0000000000000000 00000000fffffffe ffff8806c7c4f980 +[ 158.670769] Call Trace: +[ 158.671153] [] dump_stack+0x44/0x5c +[ 158.671884] [] warn_slowpath_common+0x81/0xc0 +[ 158.672769] [] warn_slowpath_fmt+0x47/0x50 +[ 158.673620] [] create_subvol+0x3d1/0x6ea +[ 158.674440] [] btrfs_mksubvol.isra.30+0x369/0x520 +[ 158.675376] [] ? percpu_down_read+0x1a/0x50 +[ 158.676235] [] btrfs_ioctl_snap_create_transid+0x101/0x180 +[ 158.677268] [] btrfs_ioctl_snap_create+0x52/0x70 +[ 158.678183] [] btrfs_ioctl+0x474/0x2f90 +[ 158.678975] [] ? vma_merge+0xee/0x300 +[ 158.679751] [] ? alloc_pages_vma+0x91/0x170 +[ 158.680599] [] ? lru_cache_add_active_or_unevictable+0x22/0x70 +[ 158.681686] [] ? selinux_file_ioctl+0xff/0x1d0 +[ 158.682581] [] do_vfs_ioctl+0x2c1/0x490 +[ 158.683399] [] ? security_file_ioctl+0x3e/0x60 +[ 158.684297] [] SyS_ioctl+0x74/0x80 +[ 158.685051] [] entry_SYSCALL_64_fastpath+0x12/0x6a +[ 158.685958] ---[ end trace 4b63312de5a2cb76 ]--- +[ 158.686647] BTRFS: error (device loop0) in create_subvol:558: errno=-2 No such entry +[ 158.709508] BTRFS info (device loop0): forced readonly +[ 158.737113] BTRFS info (device loop0): disk space caching is enabled +[ 158.738096] BTRFS error (device loop0): Remounting read-write after error is not allowed +[ 158.851303] BTRFS error (device loop0): cleaner transaction attach returned -30 + +This occurs because, + +Mount filesystem +Create subvol with ID 257 +Unmount filesystem +Mount filesystem +Delete subvol with ID 257 + btrfs_drop_snapshot() + Add root corresponding to subvol 257 into + btrfs_transaction->dropped_roots list +Create new subvol (i.e. create_subvol()) + 257 is returned as the next free objectid + btrfs_read_fs_root_no_name() + Finds the btrfs_root instance corresponding to the old subvol with ID 257 + in btrfs_fs_info->fs_roots_radix. + Returns error since btrfs_root_item->refs has the value of 0. + +To fix the issue the commit initializes tree root's and subvolume root's +highest_objectid when loading the roots from disk. + +Signed-off-by: Chandan Rajendra +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/disk-io.c | 27 +++++++++++++++++++++++++++ + fs/btrfs/inode-map.c | 9 +-------- + fs/btrfs/inode-map.h | 1 + + fs/btrfs/ioctl.c | 4 ++++ + 4 files changed, 33 insertions(+), 8 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -1582,8 +1582,23 @@ int btrfs_init_fs_root(struct btrfs_root + ret = get_anon_bdev(&root->anon_dev); + if (ret) + goto free_writers; ++ ++ mutex_lock(&root->objectid_mutex); ++ ret = btrfs_find_highest_objectid(root, ++ &root->highest_objectid); ++ if (ret) { ++ mutex_unlock(&root->objectid_mutex); ++ goto free_root_dev; ++ } ++ ++ ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID); ++ ++ mutex_unlock(&root->objectid_mutex); ++ + return 0; + ++free_root_dev: ++ free_anon_bdev(root->anon_dev); + free_writers: + btrfs_free_subvolume_writers(root->subv_writers); + fail: +@@ -2900,6 +2915,18 @@ retry_root_backup: + tree_root->commit_root = btrfs_root_node(tree_root); + btrfs_set_root_refs(&tree_root->root_item, 1); + ++ mutex_lock(&tree_root->objectid_mutex); ++ ret = btrfs_find_highest_objectid(tree_root, ++ &tree_root->highest_objectid); ++ if (ret) { ++ mutex_unlock(&tree_root->objectid_mutex); ++ goto recovery_tree_root; ++ } ++ ++ ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID); ++ ++ mutex_unlock(&tree_root->objectid_mutex); ++ + ret = btrfs_read_roots(fs_info, tree_root); + if (ret) + goto recovery_tree_root; +--- a/fs/btrfs/inode-map.c ++++ b/fs/btrfs/inode-map.c +@@ -515,7 +515,7 @@ out: + return ret; + } + +-static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid) ++int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid) + { + struct btrfs_path *path; + int ret; +@@ -555,13 +555,6 @@ int btrfs_find_free_objectid(struct btrf + int ret; + mutex_lock(&root->objectid_mutex); + +- if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { +- ret = btrfs_find_highest_objectid(root, +- &root->highest_objectid); +- if (ret) +- goto out; +- } +- + if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { + ret = -ENOSPC; + goto out; +--- a/fs/btrfs/inode-map.h ++++ b/fs/btrfs/inode-map.h +@@ -9,5 +9,6 @@ int btrfs_save_ino_cache(struct btrfs_ro + struct btrfs_trans_handle *trans); + + int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid); ++int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid); + + #endif +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -568,6 +568,10 @@ static noinline int create_subvol(struct + goto fail; + } + ++ mutex_lock(&new_root->objectid_mutex); ++ new_root->highest_objectid = new_dirid; ++ mutex_unlock(&new_root->objectid_mutex); ++ + /* + * insert the directory item + */ diff --git a/queue-4.4/btrfs-initialize-the-seq-counter-in-struct-btrfs_device.patch b/queue-4.4/btrfs-initialize-the-seq-counter-in-struct-btrfs_device.patch new file mode 100644 index 00000000000..c8d9bc30ea2 --- /dev/null +++ b/queue-4.4/btrfs-initialize-the-seq-counter-in-struct-btrfs_device.patch @@ -0,0 +1,45 @@ +From 546bed631203344611f42b2af1d224d2eedb4e6b Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Fri, 15 Jan 2016 14:37:15 +0100 +Subject: btrfs: initialize the seq counter in struct btrfs_device + +From: Sebastian Andrzej Siewior + +commit 546bed631203344611f42b2af1d224d2eedb4e6b upstream. + +I managed to trigger this: +| INFO: trying to register non-static key. +| the code is fine but needs lockdep annotation. +| turning off the locking correctness validator. +| CPU: 1 PID: 781 Comm: systemd-gpt-aut Not tainted 4.4.0-rt2+ #14 +| Hardware name: ARM-Versatile Express +| [<80307cec>] (dump_stack) +| [<80070e98>] (__lock_acquire) +| [<8007184c>] (lock_acquire) +| [<80287800>] (btrfs_ioctl) +| [<8012a8d4>] (do_vfs_ioctl) +| [<8012ac14>] (SyS_ioctl) + +so I think that btrfs_device_data_ordered_init() is not invoked behind +a macro somewhere. + +Fixes: 7cc8e58d53cd ("Btrfs: fix unprotected device's variants on 32bits machine") +Signed-off-by: Sebastian Andrzej Siewior +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/volumes.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -232,6 +232,7 @@ static struct btrfs_device *__alloc_devi + spin_lock_init(&dev->reada_lock); + atomic_set(&dev->reada_in_flight, 0); + atomic_set(&dev->dev_stats_ccnt, 0); ++ btrfs_device_data_ordered_init(dev); + INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); + INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); + diff --git a/queue-4.4/btrfs-send-don-t-bug_on-when-an-empty-symlink-is-found.patch b/queue-4.4/btrfs-send-don-t-bug_on-when-an-empty-symlink-is-found.patch new file mode 100644 index 00000000000..1b0eca0c250 --- /dev/null +++ b/queue-4.4/btrfs-send-don-t-bug_on-when-an-empty-symlink-is-found.patch @@ -0,0 +1,61 @@ +From a879719b8c90e15c9e7fa7266d5e3c0ca962f9df Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 31 Dec 2015 18:07:59 +0000 +Subject: Btrfs: send, don't BUG_ON() when an empty symlink is found + +From: Filipe Manana + +commit a879719b8c90e15c9e7fa7266d5e3c0ca962f9df upstream. + +When a symlink is successfully created it always has an inline extent +containing the source path. However if an error happens when creating +the symlink, we can leave in the subvolume's tree a symlink inode without +any such inline extent item - this happens if after btrfs_symlink() calls +btrfs_end_transaction() and before it calls the inode eviction handler +(through the final iput() call), the transaction gets committed and a +crash happens before the eviction handler gets called, or if a snapshot +of the subvolume is made before the eviction handler gets called. Sadly +we can't just avoid this by making btrfs_symlink() call +btrfs_end_transaction() after it calls the eviction handler, because the +later can commit the current transaction before it removes any items from +the subvolume tree (if it encounters ENOSPC errors while reserving space +for removing all the items). + +So make send fail more gracefully, with an -EIO error, and print a +message to dmesg/syslog informing that there's an empty symlink inode, +so that the user can delete the empty symlink or do something else +about it. + +Reported-by: Stephen R. van den Berg +Signed-off-by: Filipe Manana +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/send.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -1469,7 +1469,21 @@ static int read_symlink(struct btrfs_roo + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; +- BUG_ON(ret); ++ if (ret) { ++ /* ++ * An empty symlink inode. Can happen in rare error paths when ++ * creating a symlink (transaction committed before the inode ++ * eviction handler removed the symlink inode items and a crash ++ * happened in between or the subvol was snapshoted in between). ++ * Print an informative message to dmesg/syslog so that the user ++ * can delete the symlink. ++ */ ++ btrfs_err(root->fs_info, ++ "Found empty symlink inode %llu at root %llu", ++ ino, root->root_key.objectid); ++ ret = -EIO; ++ goto out; ++ } + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); diff --git a/queue-4.4/btrfs-statfs-report-zero-available-if-metadata-are-exhausted.patch b/queue-4.4/btrfs-statfs-report-zero-available-if-metadata-are-exhausted.patch new file mode 100644 index 00000000000..987e6d0a8f8 --- /dev/null +++ b/queue-4.4/btrfs-statfs-report-zero-available-if-metadata-are-exhausted.patch @@ -0,0 +1,103 @@ +From ca8a51b3a979d57b082b14eda38602b7f52d81d1 Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Sat, 10 Oct 2015 17:59:53 +0200 +Subject: btrfs: statfs: report zero available if metadata are exhausted + +From: David Sterba + +commit ca8a51b3a979d57b082b14eda38602b7f52d81d1 upstream. + +There is one ENOSPC case that's very confusing. There's Available +greater than zero but no file operation succeds (besides removing +files). This happens when the metadata are exhausted and there's no +possibility to allocate another chunk. + +In this scenario it's normal that there's still some space in the data +chunk and the calculation in df reflects that in the Avail value. + +To at least give some clue about the ENOSPC situation, let statfs report +zero value in Avail, even if there's still data space available. + +Current: + /dev/sdb1 4.0G 3.3G 719M 83% /mnt/test + +New: + /dev/sdb1 4.0G 3.3G 0 100% /mnt/test + +We calculate the remaining metadata space minus global reserve. If this +is (supposedly) smaller than zero, there's no space. But this does not +hold in practice, the exhausted state happens where's still some +positive delta. So we apply some guesswork and compare the delta to a 4M +threshold. (Practically observed delta was 2M.) + +We probably cannot calculate the exact threshold value because this +depends on the internal reservations requested by various operations, so +some operations that consume a few metadata will succeed even if the +Avail is zero. But this is better than the other way around. + +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/super.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -1956,6 +1956,8 @@ static int btrfs_calc_avail_data_space(s + * there are other factors that may change the result (like a new metadata + * chunk). + * ++ * If metadata is exhausted, f_bavail will be 0. ++ * + * FIXME: not accurate for mixed block groups, total and free/used are ok, + * available appears slightly larger. + */ +@@ -1967,11 +1969,13 @@ static int btrfs_statfs(struct dentry *d + struct btrfs_space_info *found; + u64 total_used = 0; + u64 total_free_data = 0; ++ u64 total_free_meta = 0; + int bits = dentry->d_sb->s_blocksize_bits; + __be32 *fsid = (__be32 *)fs_info->fsid; + unsigned factor = 1; + struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; + int ret; ++ u64 thresh = 0; + + /* + * holding chunk_muext to avoid allocating new chunks, holding +@@ -1997,6 +2001,8 @@ static int btrfs_statfs(struct dentry *d + } + } + } ++ if (found->flags & BTRFS_BLOCK_GROUP_METADATA) ++ total_free_meta += found->disk_total - found->disk_used; + + total_used += found->disk_used; + } +@@ -2019,6 +2025,24 @@ static int btrfs_statfs(struct dentry *d + buf->f_bavail += div_u64(total_free_data, factor); + buf->f_bavail = buf->f_bavail >> bits; + ++ /* ++ * We calculate the remaining metadata space minus global reserve. If ++ * this is (supposedly) smaller than zero, there's no space. But this ++ * does not hold in practice, the exhausted state happens where's still ++ * some positive delta. So we apply some guesswork and compare the ++ * delta to a 4M threshold. (Practically observed delta was ~2M.) ++ * ++ * We probably cannot calculate the exact threshold value because this ++ * depends on the internal reservations requested by various ++ * operations, so some operations that consume a few metadata will ++ * succeed even if the Avail is zero. But this is better than the other ++ * way around. ++ */ ++ thresh = 4 * 1024 * 1024; ++ ++ if (total_free_meta - thresh < block_rsv->size) ++ buf->f_bavail = 0; ++ + buf->f_type = BTRFS_SUPER_MAGIC; + buf->f_bsize = dentry->d_sb->s_blocksize; + buf->f_namelen = BTRFS_NAME_LEN; diff --git a/queue-4.4/series b/queue-4.4/series index de3edf4b7a9..9e3a58a40c5 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -84,3 +84,11 @@ genirq-validate-action-before-dereferencing-it-in-handle_irq_event_percpu.patch clocksource-drivers-vt8500-increase-the-minimum-delta.patch s390-kvm-remove-dependency-on-struct-save_area-definition.patch kvm-s390-fix-memory-overwrites-when-vx-is-disabled.patch +btrfs-add-missing-brelse-when-superblock-checksum-fails.patch +btrfs-igrab-inode-in-writepage.patch +btrfs-statfs-report-zero-available-if-metadata-are-exhausted.patch +btrfs-send-don-t-bug_on-when-an-empty-symlink-is-found.patch +btrfs-fix-number-of-transaction-units-required-to-create-symlink.patch +btrfs-fix-transaction-handle-leak-on-failure-to-create-hard-link.patch +btrfs-initialize-btrfs_root-highest_objectid-when-loading-tree-root-and-subvolume-roots.patch +btrfs-initialize-the-seq-counter-in-struct-btrfs_device.patch -- 2.47.3