--- /dev/null
+From 8d875f95da43c6a8f18f77869f2ef26e9594fecc Mon Sep 17 00:00:00 2001
+From: Chris Mason <clm@fb.com>
+Date: Tue, 12 Aug 2014 10:47:42 -0700
+Subject: btrfs: disable strict file flushes for renames and truncates
+
+From: Chris Mason <clm@fb.com>
+
+commit 8d875f95da43c6a8f18f77869f2ef26e9594fecc upstream.
+
+Truncates and renames are often used to replace old versions of a file
+with new versions. Applications often expect this to be an atomic
+replacement, even if they haven't done anything to make sure the new
+version is fully on disk.
+
+Btrfs has strict flushing in place to make sure that renaming over an
+old file with a new file will fully flush out the new file before
+allowing the transaction commit with the rename to complete.
+
+This ordering means the commit code needs to be able to lock file pages,
+and there are a few paths in the filesystem where we will try to end a
+transaction with the page lock held. It's rare, but these things can
+deadlock.
+
+This patch removes the ordered flushes and switches to a best effort
+filemap_flush like ext4 uses. It's not perfect, but it should fix the
+deadlocks.
+
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/btrfs_inode.h | 6 --
+ fs/btrfs/disk-io.c | 32 ------------
+ fs/btrfs/file.c | 26 ----------
+ fs/btrfs/inode.c | 47 +-----------------
+ fs/btrfs/ordered-data.c | 123 ------------------------------------------------
+ fs/btrfs/ordered-data.h | 5 -
+ fs/btrfs/transaction.c | 33 ------------
+ fs/btrfs/transaction.h | 1
+ 8 files changed, 6 insertions(+), 267 deletions(-)
+
+--- a/fs/btrfs/btrfs_inode.h
++++ b/fs/btrfs/btrfs_inode.h
+@@ -84,12 +84,6 @@ struct btrfs_inode {
+ */
+ struct list_head delalloc_inodes;
+
+- /*
+- * list for tracking inodes that must be sent to disk before a
+- * rename or truncate commit
+- */
+- struct list_head ordered_operations;
+-
+ /* node for the red-black tree that links inodes in subvolume root */
+ struct rb_node rb_node;
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -60,8 +60,6 @@ static void end_workqueue_fn(struct btrf
+ static void free_fs_root(struct btrfs_root *root);
+ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+ int read_only);
+-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
+- struct btrfs_root *root);
+ static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
+ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+ struct btrfs_root *root);
+@@ -3829,34 +3827,6 @@ static void btrfs_error_commit_super(str
+ btrfs_cleanup_transaction(root);
+ }
+
+-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
+- struct btrfs_root *root)
+-{
+- struct btrfs_inode *btrfs_inode;
+- struct list_head splice;
+-
+- INIT_LIST_HEAD(&splice);
+-
+- mutex_lock(&root->fs_info->ordered_operations_mutex);
+- spin_lock(&root->fs_info->ordered_root_lock);
+-
+- list_splice_init(&t->ordered_operations, &splice);
+- while (!list_empty(&splice)) {
+- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+- ordered_operations);
+-
+- list_del_init(&btrfs_inode->ordered_operations);
+- spin_unlock(&root->fs_info->ordered_root_lock);
+-
+- btrfs_invalidate_inodes(btrfs_inode->root);
+-
+- spin_lock(&root->fs_info->ordered_root_lock);
+- }
+-
+- spin_unlock(&root->fs_info->ordered_root_lock);
+- mutex_unlock(&root->fs_info->ordered_operations_mutex);
+-}
+-
+ static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
+ {
+ struct btrfs_ordered_extent *ordered;
+@@ -4093,8 +4063,6 @@ again:
+ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
+ struct btrfs_root *root)
+ {
+- btrfs_destroy_ordered_operations(cur_trans, root);
+-
+ btrfs_destroy_delayed_refs(cur_trans, root);
+
+ cur_trans->state = TRANS_STATE_COMMIT_START;
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1838,33 +1838,9 @@ out:
+
+ int btrfs_release_file(struct inode *inode, struct file *filp)
+ {
+- /*
+- * ordered_data_close is set by settattr when we are about to truncate
+- * a file from a non-zero size to a zero size. This tries to
+- * flush down new bytes that may have been written if the
+- * application were using truncate to replace a file in place.
+- */
+- if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
+- &BTRFS_I(inode)->runtime_flags)) {
+- struct btrfs_trans_handle *trans;
+- struct btrfs_root *root = BTRFS_I(inode)->root;
+-
+- /*
+- * We need to block on a committing transaction to keep us from
+- * throwing a ordered operation on to the list and causing
+- * something like sync to deadlock trying to flush out this
+- * inode.
+- */
+- trans = btrfs_start_transaction(root, 0);
+- if (IS_ERR(trans))
+- return PTR_ERR(trans);
+- btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
+- btrfs_end_transaction(trans, root);
+- if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+- filemap_flush(inode->i_mapping);
+- }
+ if (filp->private_data)
+ btrfs_ioctl_trans_end(filp);
++ filemap_flush(inode->i_mapping);
+ return 0;
+ }
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -7951,27 +7951,6 @@ static int btrfs_truncate(struct inode *
+ BUG_ON(ret);
+
+ /*
+- * setattr is responsible for setting the ordered_data_close flag,
+- * but that is only tested during the last file release. That
+- * could happen well after the next commit, leaving a great big
+- * window where new writes may get lost if someone chooses to write
+- * to this file after truncating to zero
+- *
+- * The inode doesn't have any dirty data here, and so if we commit
+- * this is a noop. If someone immediately starts writing to the inode
+- * it is very likely we'll catch some of their writes in this
+- * transaction, and the commit will find this file on the ordered
+- * data list with good things to send down.
+- *
+- * This is a best effort solution, there is still a window where
+- * using truncate to replace the contents of the file will
+- * end up with a zero length file after a crash.
+- */
+- if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
+- &BTRFS_I(inode)->runtime_flags))
+- btrfs_add_ordered_operation(trans, root, inode);
+-
+- /*
+ * So if we truncate and then write and fsync we normally would just
+ * write the extents that changed, which is a problem if we need to
+ * first truncate that entire inode. So set this flag so we write out
+@@ -8118,7 +8097,6 @@ struct inode *btrfs_alloc_inode(struct s
+ mutex_init(&ei->delalloc_mutex);
+ btrfs_ordered_inode_tree_init(&ei->ordered_tree);
+ INIT_LIST_HEAD(&ei->delalloc_inodes);
+- INIT_LIST_HEAD(&ei->ordered_operations);
+ RB_CLEAR_NODE(&ei->rb_node);
+
+ return inode;
+@@ -8158,17 +8136,6 @@ void btrfs_destroy_inode(struct inode *i
+ if (!root)
+ goto free;
+
+- /*
+- * Make sure we're properly removed from the ordered operation
+- * lists.
+- */
+- smp_mb();
+- if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
+- spin_lock(&root->fs_info->ordered_root_lock);
+- list_del_init(&BTRFS_I(inode)->ordered_operations);
+- spin_unlock(&root->fs_info->ordered_root_lock);
+- }
+-
+ if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+ &BTRFS_I(inode)->runtime_flags)) {
+ btrfs_info(root->fs_info, "inode %llu still on the orphan list",
+@@ -8350,12 +8317,10 @@ static int btrfs_rename(struct inode *ol
+ ret = 0;
+
+ /*
+- * we're using rename to replace one file with another.
+- * and the replacement file is large. Start IO on it now so
+- * we don't add too much work to the end of the transaction
++ * we're using rename to replace one file with another. Start IO on it
++ * now so we don't add too much work to the end of the transaction
+ */
+- if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
+- old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
++ if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
+ filemap_flush(old_inode->i_mapping);
+
+ /* close the racy window with snapshot create/destroy ioctl */
+@@ -8403,12 +8368,6 @@ static int btrfs_rename(struct inode *ol
+ */
+ btrfs_pin_log_trans(root);
+ }
+- /*
+- * make sure the inode gets flushed if it is replacing
+- * something.
+- */
+- if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
+- btrfs_add_ordered_operation(trans, root, old_inode);
+
+ inode_inc_iversion(old_dir);
+ inode_inc_iversion(new_dir);
+--- a/fs/btrfs/ordered-data.c
++++ b/fs/btrfs/ordered-data.c
+@@ -571,18 +571,6 @@ void btrfs_remove_ordered_extent(struct
+
+ trace_btrfs_ordered_extent_remove(inode, entry);
+
+- /*
+- * we have no more ordered extents for this inode and
+- * no dirty pages. We can safely remove it from the
+- * list of ordered extents
+- */
+- if (RB_EMPTY_ROOT(&tree->tree) &&
+- !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
+- spin_lock(&root->fs_info->ordered_root_lock);
+- list_del_init(&BTRFS_I(inode)->ordered_operations);
+- spin_unlock(&root->fs_info->ordered_root_lock);
+- }
+-
+ if (!root->nr_ordered_extents) {
+ spin_lock(&root->fs_info->ordered_root_lock);
+ BUG_ON(list_empty(&root->ordered_root));
+@@ -687,81 +675,6 @@ void btrfs_wait_ordered_roots(struct btr
+ }
+
+ /*
+- * this is used during transaction commit to write all the inodes
+- * added to the ordered operation list. These files must be fully on
+- * disk before the transaction commits.
+- *
+- * we have two modes here, one is to just start the IO via filemap_flush
+- * and the other is to wait for all the io. When we wait, we have an
+- * extra check to make sure the ordered operation list really is empty
+- * before we return
+- */
+-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root, int wait)
+-{
+- struct btrfs_inode *btrfs_inode;
+- struct inode *inode;
+- struct btrfs_transaction *cur_trans = trans->transaction;
+- struct list_head splice;
+- struct list_head works;
+- struct btrfs_delalloc_work *work, *next;
+- int ret = 0;
+-
+- INIT_LIST_HEAD(&splice);
+- INIT_LIST_HEAD(&works);
+-
+- mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
+- spin_lock(&root->fs_info->ordered_root_lock);
+- list_splice_init(&cur_trans->ordered_operations, &splice);
+- while (!list_empty(&splice)) {
+- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+- ordered_operations);
+- inode = &btrfs_inode->vfs_inode;
+-
+- list_del_init(&btrfs_inode->ordered_operations);
+-
+- /*
+- * the inode may be getting freed (in sys_unlink path).
+- */
+- inode = igrab(inode);
+- if (!inode)
+- continue;
+-
+- if (!wait)
+- list_add_tail(&BTRFS_I(inode)->ordered_operations,
+- &cur_trans->ordered_operations);
+- spin_unlock(&root->fs_info->ordered_root_lock);
+-
+- work = btrfs_alloc_delalloc_work(inode, wait, 1);
+- if (!work) {
+- spin_lock(&root->fs_info->ordered_root_lock);
+- if (list_empty(&BTRFS_I(inode)->ordered_operations))
+- list_add_tail(&btrfs_inode->ordered_operations,
+- &splice);
+- list_splice_tail(&splice,
+- &cur_trans->ordered_operations);
+- spin_unlock(&root->fs_info->ordered_root_lock);
+- ret = -ENOMEM;
+- goto out;
+- }
+- list_add_tail(&work->list, &works);
+- btrfs_queue_work(root->fs_info->flush_workers,
+- &work->work);
+-
+- cond_resched();
+- spin_lock(&root->fs_info->ordered_root_lock);
+- }
+- spin_unlock(&root->fs_info->ordered_root_lock);
+-out:
+- list_for_each_entry_safe(work, next, &works, list) {
+- list_del_init(&work->list);
+- btrfs_wait_and_free_delalloc_work(work);
+- }
+- mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
+- return ret;
+-}
+-
+-/*
+ * Used to start IO or wait for a given ordered extent to finish.
+ *
+ * If wait is one, this effectively waits on page writeback for all the pages
+@@ -1120,42 +1033,6 @@ out:
+ return index;
+ }
+
+-
+-/*
+- * add a given inode to the list of inodes that must be fully on
+- * disk before a transaction commit finishes.
+- *
+- * This basically gives us the ext3 style data=ordered mode, and it is mostly
+- * used to make sure renamed files are fully on disk.
+- *
+- * It is a noop if the inode is already fully on disk.
+- *
+- * If trans is not null, we'll do a friendly check for a transaction that
+- * is already flushing things and force the IO down ourselves.
+- */
+-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root, struct inode *inode)
+-{
+- struct btrfs_transaction *cur_trans = trans->transaction;
+- u64 last_mod;
+-
+- last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
+-
+- /*
+- * if this file hasn't been changed since the last transaction
+- * commit, we can safely return without doing anything
+- */
+- if (last_mod <= root->fs_info->last_trans_committed)
+- return;
+-
+- spin_lock(&root->fs_info->ordered_root_lock);
+- if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
+- list_add_tail(&BTRFS_I(inode)->ordered_operations,
+- &cur_trans->ordered_operations);
+- }
+- spin_unlock(&root->fs_info->ordered_root_lock);
+-}
+-
+ int __init ordered_data_init(void)
+ {
+ btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
+--- a/fs/btrfs/ordered-data.h
++++ b/fs/btrfs/ordered-data.h
+@@ -190,11 +190,6 @@ int btrfs_ordered_update_i_size(struct i
+ struct btrfs_ordered_extent *ordered);
+ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
+ u32 *sum, int len);
+-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root, int wait);
+-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root,
+- struct inode *inode);
+ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
+ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
+ void btrfs_get_logged_extents(struct inode *inode,
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -218,7 +218,6 @@ loop:
+ spin_lock_init(&cur_trans->delayed_refs.lock);
+
+ INIT_LIST_HEAD(&cur_trans->pending_snapshots);
+- INIT_LIST_HEAD(&cur_trans->ordered_operations);
+ INIT_LIST_HEAD(&cur_trans->pending_chunks);
+ INIT_LIST_HEAD(&cur_trans->switch_commits);
+ list_add_tail(&cur_trans->list, &fs_info->trans_list);
+@@ -1612,27 +1611,6 @@ static void cleanup_transaction(struct b
+ kmem_cache_free(btrfs_trans_handle_cachep, trans);
+ }
+
+-static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root)
+-{
+- int ret;
+-
+- ret = btrfs_run_delayed_items(trans, root);
+- if (ret)
+- return ret;
+-
+- /*
+- * rename don't use btrfs_join_transaction, so, once we
+- * set the transaction to blocked above, we aren't going
+- * to get any new ordered operations. We can safely run
+- * it here and no for sure that nothing new will be added
+- * to the list
+- */
+- ret = btrfs_run_ordered_operations(trans, root, 1);
+-
+- return ret;
+-}
+-
+ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
+ {
+ if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
+@@ -1653,13 +1631,6 @@ int btrfs_commit_transaction(struct btrf
+ struct btrfs_transaction *prev_trans = NULL;
+ int ret;
+
+- ret = btrfs_run_ordered_operations(trans, root, 0);
+- if (ret) {
+- btrfs_abort_transaction(trans, root, ret);
+- btrfs_end_transaction(trans, root);
+- return ret;
+- }
+-
+ /* Stop the commit early if ->aborted is set */
+ if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+ ret = cur_trans->aborted;
+@@ -1740,7 +1711,7 @@ int btrfs_commit_transaction(struct btrf
+ if (ret)
+ goto cleanup_transaction;
+
+- ret = btrfs_flush_all_pending_stuffs(trans, root);
++ ret = btrfs_run_delayed_items(trans, root);
+ if (ret)
+ goto cleanup_transaction;
+
+@@ -1748,7 +1719,7 @@ int btrfs_commit_transaction(struct btrf
+ extwriter_counter_read(cur_trans) == 0);
+
+ /* some pending stuffs might be added after the previous flush. */
+- ret = btrfs_flush_all_pending_stuffs(trans, root);
++ ret = btrfs_run_delayed_items(trans, root);
+ if (ret)
+ goto cleanup_transaction;
+
+--- a/fs/btrfs/transaction.h
++++ b/fs/btrfs/transaction.h
+@@ -55,7 +55,6 @@ struct btrfs_transaction {
+ wait_queue_head_t writer_wait;
+ wait_queue_head_t commit_wait;
+ struct list_head pending_snapshots;
+- struct list_head ordered_operations;
+ struct list_head pending_chunks;
+ struct list_head switch_commits;
+ struct btrfs_delayed_ref_root delayed_refs;
--- /dev/null
+From ce62003f690dff38d3164a632ec69efa15c32cbf Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Thu, 24 Jul 2014 22:48:05 +0800
+Subject: Btrfs: fix compressed write corruption on enospc
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit ce62003f690dff38d3164a632ec69efa15c32cbf upstream.
+
+When failing to allocate space for the whole compressed extent, we'll
+fallback to uncompressed IO, but we've forgotten to redirty the pages
+which belong to this compressed extent, and these 'clean' pages will
+simply skip 'submit' part and go to endio directly, at last we got data
+corruption as we write nothing.
+
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+Tested-By: Martin Steigerwald <martin@lichtvoll.de>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -709,6 +709,18 @@ retry:
+ unlock_extent(io_tree, async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1);
++
++ /*
++ * we need to redirty the pages if we decide to
++ * fallback to uncompressed IO, otherwise we
++ * will not submit these pages down to lower
++ * layers.
++ */
++ extent_range_redirty_for_io(inode,
++ async_extent->start,
++ async_extent->start +
++ async_extent->ram_size - 1);
++
+ goto retry;
+ }
+ goto out_free;
--- /dev/null
+From 38c1c2e44bacb37efd68b90b3f70386a8ee370ee Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Tue, 19 Aug 2014 23:33:13 +0800
+Subject: Btrfs: fix crash on endio of reading corrupted block
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit 38c1c2e44bacb37efd68b90b3f70386a8ee370ee upstream.
+
+The crash is
+
+------------[ cut here ]------------
+kernel BUG at fs/btrfs/extent_io.c:2124!
+[...]
+Workqueue: btrfs-endio normal_work_helper [btrfs]
+RIP: 0010:[<ffffffffa02d6055>] [<ffffffffa02d6055>] end_bio_extent_readpage+0xb45/0xcd0 [btrfs]
+
+This is in fact a regression.
+
+It is because we forgot to increase @offset properly in reading corrupted block,
+so that the @offset remains, and this leads to checksum errors while reading
+left blocks queued up in the same bio, and then ends up with hiting the above
+BUG_ON.
+
+Reported-by: Chris Murphy <lists@colorremedies.com>
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent_io.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -2532,6 +2532,7 @@ static void end_bio_extent_readpage(stru
+ test_bit(BIO_UPTODATE, &bio->bi_flags);
+ if (err)
+ uptodate = 0;
++ offset += len;
+ continue;
+ }
+ }
--- /dev/null
+From 27b9a8122ff71a8cadfbffb9c4f0694300464f3b Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sat, 9 Aug 2014 21:22:27 +0100
+Subject: Btrfs: fix csum tree corruption, duplicate and outdated checksums
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 27b9a8122ff71a8cadfbffb9c4f0694300464f3b upstream.
+
+Under rare circumstances we can end up leaving 2 versions of a checksum
+for the same file extent range.
+
+The reason for this is that after calling btrfs_next_leaf we process
+slot 0 of the leaf it returns, instead of processing the slot set in
+path->slots[0]. Most of the time (by far) path->slots[0] is 0, but after
+btrfs_next_leaf() releases the path and before it searches for the next
+leaf, another task might cause a split of the next leaf, which migrates
+some of its keys to the leaf we were processing before calling
+btrfs_next_leaf(). In this case btrfs_next_leaf() returns again the
+same leaf but with path->slots[0] having a slot number corresponding
+to the first new key it got, that is, a slot number that didn't exist
+before calling btrfs_next_leaf(), as the leaf now has more keys than
+it had before. So we must really process the returned leaf starting at
+path->slots[0] always, as it isn't always 0, and the key at slot 0 can
+have an offset much lower than our search offset/bytenr.
+
+For example, consider the following scenario, where we have:
+
+sums->bytenr: 40157184, sums->len: 16384, sums end: 40173568
+four 4kb file data blocks with offsets 40157184, 40161280, 40165376, 40169472
+
+ Leaf N:
+
+ slot = 0 slot = btrfs_header_nritems() - 1
+ |-------------------------------------------------------------------|
+ | [(CSUM CSUM 39239680), size 8] ... [(CSUM CSUM 40116224), size 4] |
+ |-------------------------------------------------------------------|
+
+ Leaf N + 1:
+
+ slot = 0 slot = btrfs_header_nritems() - 1
+ |--------------------------------------------------------------------|
+ | [(CSUM CSUM 40161280), size 32] ... [((CSUM CSUM 40615936), size 8 |
+ |--------------------------------------------------------------------|
+
+Because we are at the last slot of leaf N, we call btrfs_next_leaf() to
+find the next highest key, which releases the current path and then searches
+for that next key. However after releasing the path and before finding that
+next key, the item at slot 0 of leaf N + 1 gets moved to leaf N, due to a call
+to ctree.c:push_leaf_left() (via ctree.c:split_leaf()), and therefore
+btrfs_next_leaf() will returns us a path again with leaf N but with the slot
+pointing to its new last key (CSUM CSUM 40161280). This new version of leaf N
+is then:
+
+ slot = 0 slot = btrfs_header_nritems() - 2 slot = btrfs_header_nritems() - 1
+ |----------------------------------------------------------------------------------------------------|
+ | [(CSUM CSUM 39239680), size 8] ... [(CSUM CSUM 40116224), size 4] [(CSUM CSUM 40161280), size 32] |
+ |----------------------------------------------------------------------------------------------------|
+
+And incorrecly using slot 0, makes us set next_offset to 39239680 and we jump
+into the "insert:" label, which will set tmp to:
+
+ tmp = min((sums->len - total_bytes) >> blocksize_bits,
+ (next_offset - file_key.offset) >> blocksize_bits) =
+ min((16384 - 0) >> 12, (39239680 - 40157184) >> 12) =
+ min(4, (u64)-917504 = 18446744073708634112 >> 12) = 4
+
+and
+
+ ins_size = csum_size * tmp = 4 * 4 = 16 bytes.
+
+In other words, we insert a new csum item in the tree with key
+(CSUM_OBJECTID CSUM_KEY 40157184 = sums->bytenr) that contains the checksums
+for all the data (4 blocks of 4096 bytes each = sums->len). Which is wrong,
+because the item with key (CSUM CSUM 40161280) (the one that was moved from
+leaf N + 1 to the end of leaf N) contains the old checksums of the last 12288
+bytes of our data and won't get those old checksums removed.
+
+So this leaves us 2 different checksums for 3 4kb blocks of data in the tree,
+and breaks the logical rule:
+
+ Key_N+1.offset >= Key_N.offset + length_of_data_its_checksums_cover
+
+An obvious bad effect of this is that a subsequent csum tree lookup to get
+the checksum of any of the blocks with logical offset of 40161280, 40165376
+or 40169472 (the last 3 4kb blocks of file data), will get the old checksums.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file-item.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/file-item.c
++++ b/fs/btrfs/file-item.c
+@@ -756,7 +756,7 @@ again:
+ found_next = 1;
+ if (ret != 0)
+ goto insert;
+- slot = 0;
++ slot = path->slots[0];
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
+ if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
--- /dev/null
+From f6dc45c7a93a011dff6eb9b2ffda59c390c7705a Mon Sep 17 00:00:00 2001
+From: Chris Mason <clm@fb.com>
+Date: Wed, 20 Aug 2014 07:15:33 -0700
+Subject: Btrfs: fix filemap_flush call in btrfs_file_release
+
+From: Chris Mason <clm@fb.com>
+
+commit f6dc45c7a93a011dff6eb9b2ffda59c390c7705a upstream.
+
+We should only be flushing on close if the file was flagged as needing
+it during truncate. I broke this with my ordered data vs transaction
+commit deadlock fix.
+
+Thanks to Miao Xie for catching this.
+
+Signed-off-by: Chris Mason <clm@fb.com>
+Reported-by: Miao Xie <miaox@cn.fujitsu.com>
+Reported-by: Fengguang Wu <fengguang.wu@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1840,7 +1840,15 @@ int btrfs_release_file(struct inode *ino
+ {
+ if (filp->private_data)
+ btrfs_ioctl_trans_end(filp);
+- filemap_flush(inode->i_mapping);
++ /*
++ * ordered_data_close is set by settattr when we are about to truncate
++ * a file from a non-zero size to a zero size. This tries to
++ * flush down new bytes that may have been written if the
++ * application were using truncate to replace a file in place.
++ */
++ if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
++ &BTRFS_I(inode)->runtime_flags))
++ filemap_flush(inode->i_mapping);
+ return 0;
+ }
+
--- /dev/null
+From 4eb1f66dce6c4dc28dd90a7ffbe6b2b1cb08aa4e Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Mon, 28 Jul 2014 10:57:04 +0200
+Subject: Btrfs: Fix memory corruption by ulist_add_merge() on 32bit arch
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 4eb1f66dce6c4dc28dd90a7ffbe6b2b1cb08aa4e upstream.
+
+We've got bug reports that btrfs crashes when quota is enabled on
+32bit kernel, typically with the Oops like below:
+ BUG: unable to handle kernel NULL pointer dereference at 00000004
+ IP: [<f9234590>] find_parent_nodes+0x360/0x1380 [btrfs]
+ *pde = 00000000
+ Oops: 0000 [#1] SMP
+ CPU: 0 PID: 151 Comm: kworker/u8:2 Tainted: G S W 3.15.2-1.gd43d97e-default #1
+ Workqueue: btrfs-qgroup-rescan normal_work_helper [btrfs]
+ task: f1478130 ti: f147c000 task.ti: f147c000
+ EIP: 0060:[<f9234590>] EFLAGS: 00010213 CPU: 0
+ EIP is at find_parent_nodes+0x360/0x1380 [btrfs]
+ EAX: f147dda8 EBX: f147ddb0 ECX: 00000011 EDX: 00000000
+ ESI: 00000000 EDI: f147dda4 EBP: f147ddf8 ESP: f147dd38
+ DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
+ CR0: 8005003b CR2: 00000004 CR3: 00bf3000 CR4: 00000690
+ Stack:
+ 00000000 00000000 f147dda4 00000050 00000001 00000000 00000001 00000050
+ 00000001 00000000 d3059000 00000001 00000022 000000a8 00000000 00000000
+ 00000000 000000a1 00000000 00000000 00000001 00000000 00000000 11800000
+ Call Trace:
+ [<f923564d>] __btrfs_find_all_roots+0x9d/0xf0 [btrfs]
+ [<f9237bb1>] btrfs_qgroup_rescan_worker+0x401/0x760 [btrfs]
+ [<f9206148>] normal_work_helper+0xc8/0x270 [btrfs]
+ [<c025e38b>] process_one_work+0x11b/0x390
+ [<c025eea1>] worker_thread+0x101/0x340
+ [<c026432b>] kthread+0x9b/0xb0
+ [<c0712a71>] ret_from_kernel_thread+0x21/0x30
+ [<c0264290>] kthread_create_on_node+0x110/0x110
+
+This indicates a NULL corruption in prefs_delayed list. The further
+investigation and bisection pointed that the call of ulist_add_merge()
+results in the corruption.
+
+ulist_add_merge() takes u64 as aux and writes a 64bit value into
+old_aux. The callers of this function in backref.c, however, pass a
+pointer of a pointer to old_aux. That is, the function overwrites
+64bit value on 32bit pointer. This caused a NULL in the adjacent
+variable, in this case, prefs_delayed.
+
+Here is a quick attempt to band-aid over this: a new function,
+ulist_add_merge_ptr() is introduced to pass/store properly a pointer
+value instead of u64. There are still ugly void ** cast remaining
+in the callers because void ** cannot be taken implicitly. But, it's
+safer than explicit cast to u64, anyway.
+
+Bugzilla: https://bugzilla.novell.com/show_bug.cgi?id=887046
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/backref.c | 11 +++++------
+ fs/btrfs/ulist.h | 15 +++++++++++++++
+ 2 files changed, 20 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -276,9 +276,8 @@ static int add_all_parents(struct btrfs_
+ }
+ if (ret > 0)
+ goto next;
+- ret = ulist_add_merge(parents, eb->start,
+- (uintptr_t)eie,
+- (u64 *)&old, GFP_NOFS);
++ ret = ulist_add_merge_ptr(parents, eb->start,
++ eie, (void **)&old, GFP_NOFS);
+ if (ret < 0)
+ break;
+ if (!ret && extent_item_pos) {
+@@ -1008,9 +1007,9 @@ again:
+ goto out;
+ ref->inode_list = eie;
+ }
+- ret = ulist_add_merge(refs, ref->parent,
+- (uintptr_t)ref->inode_list,
+- (u64 *)&eie, GFP_NOFS);
++ ret = ulist_add_merge_ptr(refs, ref->parent,
++ ref->inode_list,
++ (void **)&eie, GFP_NOFS);
+ if (ret < 0)
+ goto out;
+ if (!ret && extent_item_pos) {
+--- a/fs/btrfs/ulist.h
++++ b/fs/btrfs/ulist.h
+@@ -57,6 +57,21 @@ void ulist_free(struct ulist *ulist);
+ int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
+ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
+ u64 *old_aux, gfp_t gfp_mask);
++
++/* just like ulist_add_merge() but take a pointer for the aux data */
++static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
++ void **old_aux, gfp_t gfp_mask)
++{
++#if BITS_PER_LONG == 32
++ u64 old64 = (uintptr_t)*old_aux;
++ int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask);
++ *old_aux = (void *)((uintptr_t)old64);
++ return ret;
++#else
++ return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask);
++#endif
++}
++
+ struct ulist_node *ulist_next(struct ulist *ulist,
+ struct ulist_iterator *uiter);
+
--- /dev/null
+From 9e0af23764344f7f1b68e4eefbe7dc865018b63d Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Fri, 15 Aug 2014 23:36:53 +0800
+Subject: Btrfs: fix task hang under heavy compressed write
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit 9e0af23764344f7f1b68e4eefbe7dc865018b63d upstream.
+
+This has been reported and discussed for a long time, and this hang occurs in
+both 3.15 and 3.16.
+
+Btrfs now migrates to use kernel workqueue, but it introduces this hang problem.
+
+Btrfs has a kind of work queued as an ordered way, which means that its
+ordered_func() must be processed in the way of FIFO, so it usually looks like --
+
+normal_work_helper(arg)
+ work = container_of(arg, struct btrfs_work, normal_work);
+
+ work->func() <---- (we name it work X)
+ for ordered_work in wq->ordered_list
+ ordered_work->ordered_func()
+ ordered_work->ordered_free()
+
+The hang is a rare case, first when we find free space, we get an uncached block
+group, then we go to read its free space cache inode for free space information,
+so it will
+
+file a readahead request
+ btrfs_readpages()
+ for page that is not in page cache
+ __do_readpage()
+ submit_extent_page()
+ btrfs_submit_bio_hook()
+ btrfs_bio_wq_end_io()
+ submit_bio()
+ end_workqueue_bio() <--(ret by the 1st endio)
+ queue a work(named work Y) for the 2nd
+ also the real endio()
+
+So the hang occurs when work Y's work_struct and work X's work_struct happens
+to share the same address.
+
+A bit more explanation,
+
+A,B,C -- struct btrfs_work
+arg -- struct work_struct
+
+kthread:
+worker_thread()
+ pick up a work_struct from @worklist
+ process_one_work(arg)
+ worker->current_work = arg; <-- arg is A->normal_work
+ worker->current_func(arg)
+ normal_work_helper(arg)
+ A = container_of(arg, struct btrfs_work, normal_work);
+
+ A->func()
+ A->ordered_func()
+ A->ordered_free() <-- A gets freed
+
+ B->ordered_func()
+ submit_compressed_extents()
+ find_free_extent()
+ load_free_space_inode()
+ ... <-- (the above readhead stack)
+ end_workqueue_bio()
+ btrfs_queue_work(work C)
+ B->ordered_free()
+
+As if work A has a high priority in wq->ordered_list and there are more ordered
+works queued after it, such as B->ordered_func(), its memory could have been
+freed before normal_work_helper() returns, which means that kernel workqueue
+code worker_thread() still has worker->current_work pointer to be work
+A->normal_work's, ie. arg's address.
+
+Meanwhile, work C is allocated after work A is freed, work C->normal_work
+and work A->normal_work are likely to share the same address(I confirmed this
+with ftrace output, so I'm not just guessing, it's rare though).
+
+When another kthread picks up work C->normal_work to process, and finds our
+kthread is processing it(see find_worker_executing_work()), it'll think
+work C as a collision and skip then, which ends up nobody processing work C.
+
+So the situation is that our kthread is waiting forever on work C.
+
+Besides, there're other cases that can lead to deadlock, but the real problem
+is that all btrfs workqueue shares one work->func, -- normal_work_helper,
+so this makes each workqueue to have its own helper function, but only a
+wraper pf normal_work_helper.
+
+With this patch, I no long hit the above hang.
+
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/async-thread.c | 44 +++++++++++++++++++++++++++++++--------
+ fs/btrfs/async-thread.h | 28 +++++++++++++++++++++++-
+ fs/btrfs/delayed-inode.c | 4 +--
+ fs/btrfs/disk-io.c | 53 +++++++++++++++++++++++++----------------------
+ fs/btrfs/extent-tree.c | 7 +++---
+ fs/btrfs/inode.c | 35 ++++++++++++++++++++-----------
+ fs/btrfs/ordered-data.c | 1
+ fs/btrfs/qgroup.c | 1
+ fs/btrfs/raid56.c | 9 +++++--
+ fs/btrfs/reada.c | 3 +-
+ fs/btrfs/scrub.c | 14 +++++++-----
+ fs/btrfs/volumes.c | 3 +-
+ 12 files changed, 141 insertions(+), 61 deletions(-)
+
+--- a/fs/btrfs/async-thread.c
++++ b/fs/btrfs/async-thread.c
+@@ -22,7 +22,6 @@
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
+ #include <linux/freezer.h>
+-#include <linux/workqueue.h>
+ #include "async-thread.h"
+ #include "ctree.h"
+
+@@ -55,8 +54,39 @@ struct btrfs_workqueue {
+ struct __btrfs_workqueue *high;
+ };
+
+-static inline struct __btrfs_workqueue
+-*__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
++static void normal_work_helper(struct btrfs_work *work);
++
++#define BTRFS_WORK_HELPER(name) \
++void btrfs_##name(struct work_struct *arg) \
++{ \
++ struct btrfs_work *work = container_of(arg, struct btrfs_work, \
++ normal_work); \
++ normal_work_helper(work); \
++}
++
++BTRFS_WORK_HELPER(worker_helper);
++BTRFS_WORK_HELPER(delalloc_helper);
++BTRFS_WORK_HELPER(flush_delalloc_helper);
++BTRFS_WORK_HELPER(cache_helper);
++BTRFS_WORK_HELPER(submit_helper);
++BTRFS_WORK_HELPER(fixup_helper);
++BTRFS_WORK_HELPER(endio_helper);
++BTRFS_WORK_HELPER(endio_meta_helper);
++BTRFS_WORK_HELPER(endio_meta_write_helper);
++BTRFS_WORK_HELPER(endio_raid56_helper);
++BTRFS_WORK_HELPER(rmw_helper);
++BTRFS_WORK_HELPER(endio_write_helper);
++BTRFS_WORK_HELPER(freespace_write_helper);
++BTRFS_WORK_HELPER(delayed_meta_helper);
++BTRFS_WORK_HELPER(readahead_helper);
++BTRFS_WORK_HELPER(qgroup_rescan_helper);
++BTRFS_WORK_HELPER(extent_refs_helper);
++BTRFS_WORK_HELPER(scrub_helper);
++BTRFS_WORK_HELPER(scrubwrc_helper);
++BTRFS_WORK_HELPER(scrubnc_helper);
++
++static struct __btrfs_workqueue *
++__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
+ int thresh)
+ {
+ struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
+@@ -232,13 +262,11 @@ static void run_ordered_work(struct __bt
+ spin_unlock_irqrestore(lock, flags);
+ }
+
+-static void normal_work_helper(struct work_struct *arg)
++static void normal_work_helper(struct btrfs_work *work)
+ {
+- struct btrfs_work *work;
+ struct __btrfs_workqueue *wq;
+ int need_order = 0;
+
+- work = container_of(arg, struct btrfs_work, normal_work);
+ /*
+ * We should not touch things inside work in the following cases:
+ * 1) after work->func() if it has no ordered_free
+@@ -262,7 +290,7 @@ static void normal_work_helper(struct wo
+ trace_btrfs_all_work_done(work);
+ }
+
+-void btrfs_init_work(struct btrfs_work *work,
++void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
+ btrfs_func_t func,
+ btrfs_func_t ordered_func,
+ btrfs_func_t ordered_free)
+@@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *
+ work->func = func;
+ work->ordered_func = ordered_func;
+ work->ordered_free = ordered_free;
+- INIT_WORK(&work->normal_work, normal_work_helper);
++ INIT_WORK(&work->normal_work, uniq_func);
+ INIT_LIST_HEAD(&work->ordered_list);
+ work->flags = 0;
+ }
+--- a/fs/btrfs/async-thread.h
++++ b/fs/btrfs/async-thread.h
+@@ -19,12 +19,14 @@
+
+ #ifndef __BTRFS_ASYNC_THREAD_
+ #define __BTRFS_ASYNC_THREAD_
++#include <linux/workqueue.h>
+
+ struct btrfs_workqueue;
+ /* Internal use only */
+ struct __btrfs_workqueue;
+ struct btrfs_work;
+ typedef void (*btrfs_func_t)(struct btrfs_work *arg);
++typedef void (*btrfs_work_func_t)(struct work_struct *arg);
+
+ struct btrfs_work {
+ btrfs_func_t func;
+@@ -38,11 +40,35 @@ struct btrfs_work {
+ unsigned long flags;
+ };
+
++#define BTRFS_WORK_HELPER_PROTO(name) \
++void btrfs_##name(struct work_struct *arg)
++
++BTRFS_WORK_HELPER_PROTO(worker_helper);
++BTRFS_WORK_HELPER_PROTO(delalloc_helper);
++BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
++BTRFS_WORK_HELPER_PROTO(cache_helper);
++BTRFS_WORK_HELPER_PROTO(submit_helper);
++BTRFS_WORK_HELPER_PROTO(fixup_helper);
++BTRFS_WORK_HELPER_PROTO(endio_helper);
++BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
++BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
++BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
++BTRFS_WORK_HELPER_PROTO(rmw_helper);
++BTRFS_WORK_HELPER_PROTO(endio_write_helper);
++BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
++BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
++BTRFS_WORK_HELPER_PROTO(readahead_helper);
++BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
++BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
++BTRFS_WORK_HELPER_PROTO(scrub_helper);
++BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
++BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
++
+ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
+ int flags,
+ int max_active,
+ int thresh);
+-void btrfs_init_work(struct btrfs_work *work,
++void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
+ btrfs_func_t func,
+ btrfs_func_t ordered_func,
+ btrfs_func_t ordered_free);
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(str
+ return -ENOMEM;
+
+ async_work->delayed_root = delayed_root;
+- btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root,
+- NULL, NULL);
++ btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
++ btrfs_async_run_delayed_root, NULL, NULL);
+ async_work->nr = nr;
+
+ btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -39,7 +39,6 @@
+ #include "btrfs_inode.h"
+ #include "volumes.h"
+ #include "print-tree.h"
+-#include "async-thread.h"
+ #include "locking.h"
+ #include "tree-log.h"
+ #include "free-space-cache.h"
+@@ -693,35 +692,41 @@ static void end_workqueue_bio(struct bio
+ {
+ struct end_io_wq *end_io_wq = bio->bi_private;
+ struct btrfs_fs_info *fs_info;
++ struct btrfs_workqueue *wq;
++ btrfs_work_func_t func;
+
+ fs_info = end_io_wq->info;
+ end_io_wq->error = err;
+- btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
+
+ if (bio->bi_rw & REQ_WRITE) {
+- if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
+- btrfs_queue_work(fs_info->endio_meta_write_workers,
+- &end_io_wq->work);
+- else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
+- btrfs_queue_work(fs_info->endio_freespace_worker,
+- &end_io_wq->work);
+- else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+- btrfs_queue_work(fs_info->endio_raid56_workers,
+- &end_io_wq->work);
+- else
+- btrfs_queue_work(fs_info->endio_write_workers,
+- &end_io_wq->work);
++ if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
++ wq = fs_info->endio_meta_write_workers;
++ func = btrfs_endio_meta_write_helper;
++ } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
++ wq = fs_info->endio_freespace_worker;
++ func = btrfs_freespace_write_helper;
++ } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
++ wq = fs_info->endio_raid56_workers;
++ func = btrfs_endio_raid56_helper;
++ } else {
++ wq = fs_info->endio_write_workers;
++ func = btrfs_endio_write_helper;
++ }
+ } else {
+- if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+- btrfs_queue_work(fs_info->endio_raid56_workers,
+- &end_io_wq->work);
+- else if (end_io_wq->metadata)
+- btrfs_queue_work(fs_info->endio_meta_workers,
+- &end_io_wq->work);
+- else
+- btrfs_queue_work(fs_info->endio_workers,
+- &end_io_wq->work);
++ if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
++ wq = fs_info->endio_raid56_workers;
++ func = btrfs_endio_raid56_helper;
++ } else if (end_io_wq->metadata) {
++ wq = fs_info->endio_meta_workers;
++ func = btrfs_endio_meta_helper;
++ } else {
++ wq = fs_info->endio_workers;
++ func = btrfs_endio_helper;
++ }
+ }
++
++ btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
++ btrfs_queue_work(wq, &end_io_wq->work);
+ }
+
+ /*
+@@ -828,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_
+ async->submit_bio_start = submit_bio_start;
+ async->submit_bio_done = submit_bio_done;
+
+- btrfs_init_work(&async->work, run_one_async_start,
++ btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
+ run_one_async_done, run_one_async_free);
+
+ async->bio_flags = bio_flags;
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -552,7 +552,8 @@ static int cache_block_group(struct btrf
+ caching_ctl->block_group = cache;
+ caching_ctl->progress = cache->key.objectid;
+ atomic_set(&caching_ctl->count, 1);
+- btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
++ btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
++ caching_thread, NULL, NULL);
+
+ spin_lock(&cache->lock);
+ /*
+@@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct
+ async->sync = 0;
+ init_completion(&async->wait);
+
+- btrfs_init_work(&async->work, delayed_ref_async_start,
+- NULL, NULL);
++ btrfs_init_work(&async->work, btrfs_extent_refs_helper,
++ delayed_ref_async_start, NULL, NULL);
+
+ btrfs_queue_work(root->fs_info->extent_workers, &async->work);
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1096,8 +1096,10 @@ static int cow_file_range_async(struct i
+ async_cow->end = cur_end;
+ INIT_LIST_HEAD(&async_cow->extents);
+
+- btrfs_init_work(&async_cow->work, async_cow_start,
+- async_cow_submit, async_cow_free);
++ btrfs_init_work(&async_cow->work,
++ btrfs_delalloc_helper,
++ async_cow_start, async_cow_submit,
++ async_cow_free);
+
+ nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
+ PAGE_CACHE_SHIFT;
+@@ -1881,7 +1883,8 @@ static int btrfs_writepage_start_hook(st
+
+ SetPageChecked(page);
+ page_cache_get(page);
+- btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
++ btrfs_init_work(&fixup->work, btrfs_fixup_helper,
++ btrfs_writepage_fixup_worker, NULL, NULL);
+ fixup->page = page;
+ btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
+ return -EBUSY;
+@@ -2822,7 +2825,8 @@ static int btrfs_writepage_end_io_hook(s
+ struct inode *inode = page->mapping->host;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_ordered_extent *ordered_extent = NULL;
+- struct btrfs_workqueue *workers;
++ struct btrfs_workqueue *wq;
++ btrfs_work_func_t func;
+
+ trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
+
+@@ -2831,13 +2835,17 @@ static int btrfs_writepage_end_io_hook(s
+ end - start + 1, uptodate))
+ return 0;
+
+- btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
++ if (btrfs_is_free_space_inode(inode)) {
++ wq = root->fs_info->endio_freespace_worker;
++ func = btrfs_freespace_write_helper;
++ } else {
++ wq = root->fs_info->endio_write_workers;
++ func = btrfs_endio_write_helper;
++ }
+
+- if (btrfs_is_free_space_inode(inode))
+- workers = root->fs_info->endio_freespace_worker;
+- else
+- workers = root->fs_info->endio_write_workers;
+- btrfs_queue_work(workers, &ordered_extent->work);
++ btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
++ NULL);
++ btrfs_queue_work(wq, &ordered_extent->work);
+
+ return 0;
+ }
+@@ -7158,7 +7166,8 @@ again:
+ if (!ret)
+ goto out_test;
+
+- btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
++ btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
++ finish_ordered_fn, NULL, NULL);
+ btrfs_queue_work(root->fs_info->endio_write_workers,
+ &ordered->work);
+ out_test:
+@@ -8485,7 +8494,9 @@ struct btrfs_delalloc_work *btrfs_alloc_
+ work->inode = inode;
+ work->wait = wait;
+ work->delay_iput = delay_iput;
+- btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
++ WARN_ON_ONCE(!inode);
++ btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
++ btrfs_run_delalloc_work, NULL, NULL);
+
+ return work;
+ }
+--- a/fs/btrfs/ordered-data.c
++++ b/fs/btrfs/ordered-data.c
+@@ -615,6 +615,7 @@ int btrfs_wait_ordered_extents(struct bt
+ spin_unlock(&root->ordered_extent_lock);
+
+ btrfs_init_work(&ordered->flush_work,
++ btrfs_flush_delalloc_helper,
+ btrfs_run_ordered_extent_work, NULL, NULL);
+ list_add_tail(&ordered->work_list, &works);
+ btrfs_queue_work(root->fs_info->flush_workers,
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2551,6 +2551,7 @@ qgroup_rescan_init(struct btrfs_fs_info
+ memset(&fs_info->qgroup_rescan_work, 0,
+ sizeof(fs_info->qgroup_rescan_work));
+ btrfs_init_work(&fs_info->qgroup_rescan_work,
++ btrfs_qgroup_rescan_helper,
+ btrfs_qgroup_rescan_worker, NULL, NULL);
+
+ if (ret) {
+--- a/fs/btrfs/raid56.c
++++ b/fs/btrfs/raid56.c
+@@ -1416,7 +1416,8 @@ cleanup:
+
+ static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
+ {
+- btrfs_init_work(&rbio->work, rmw_work, NULL, NULL);
++ btrfs_init_work(&rbio->work, btrfs_rmw_helper,
++ rmw_work, NULL, NULL);
+
+ btrfs_queue_work(rbio->fs_info->rmw_workers,
+ &rbio->work);
+@@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrf
+
+ static void async_read_rebuild(struct btrfs_raid_bio *rbio)
+ {
+- btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL);
++ btrfs_init_work(&rbio->work, btrfs_rmw_helper,
++ read_rebuild_work, NULL, NULL);
+
+ btrfs_queue_work(rbio->fs_info->rmw_workers,
+ &rbio->work);
+@@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk
+ plug = container_of(cb, struct btrfs_plug_cb, cb);
+
+ if (from_schedule) {
+- btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
++ btrfs_init_work(&plug->work, btrfs_rmw_helper,
++ unplug_work, NULL, NULL);
+ btrfs_queue_work(plug->info->rmw_workers,
+ &plug->work);
+ return;
+--- a/fs/btrfs/reada.c
++++ b/fs/btrfs/reada.c
+@@ -798,7 +798,8 @@ static void reada_start_machine(struct b
+ /* FIXME we cannot handle this properly right now */
+ BUG();
+ }
+- btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
++ btrfs_init_work(&rmw->work, btrfs_readahead_helper,
++ reada_start_machine_worker, NULL, NULL);
+ rmw->fs_info = fs_info;
+
+ btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -428,8 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct
+ sbio->index = i;
+ sbio->sctx = sctx;
+ sbio->page_count = 0;
+- btrfs_init_work(&sbio->work, scrub_bio_end_io_worker,
+- NULL, NULL);
++ btrfs_init_work(&sbio->work, btrfs_scrub_helper,
++ scrub_bio_end_io_worker, NULL, NULL);
+
+ if (i != SCRUB_BIOS_PER_SCTX - 1)
+ sctx->bios[i]->next_free = i + 1;
+@@ -999,8 +999,8 @@ nodatasum_case:
+ fixup_nodatasum->root = fs_info->extent_root;
+ fixup_nodatasum->mirror_num = failed_mirror_index + 1;
+ scrub_pending_trans_workers_inc(sctx);
+- btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum,
+- NULL, NULL);
++ btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
++ scrub_fixup_nodatasum, NULL, NULL);
+ btrfs_queue_work(fs_info->scrub_workers,
+ &fixup_nodatasum->work);
+ goto out;
+@@ -1616,7 +1616,8 @@ static void scrub_wr_bio_end_io(struct b
+ sbio->err = err;
+ sbio->bio = bio;
+
+- btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
++ btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
++ scrub_wr_bio_end_io_worker, NULL, NULL);
+ btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
+ }
+
+@@ -3203,7 +3204,8 @@ static int copy_nocow_pages(struct scrub
+ nocow_ctx->len = len;
+ nocow_ctx->mirror_num = mirror_num;
+ nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
+- btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL);
++ btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
++ copy_nocow_pages_worker, NULL, NULL);
+ INIT_LIST_HEAD(&nocow_ctx->inodes);
+ btrfs_queue_work(fs_info->scrub_nocow_workers,
+ &nocow_ctx->work);
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -5800,7 +5800,8 @@ struct btrfs_device *btrfs_alloc_device(
+ else
+ generate_random_uuid(dev->uuid);
+
+- btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL);
++ btrfs_init_work(&dev->work, btrfs_submit_helper,
++ pending_bios_fn, NULL, NULL);
+
+ return dev;
+ }
--- /dev/null
+From 6f7ff6d7832c6be13e8c95598884dbc40ad69fb7 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 2 Jul 2014 20:07:54 +0100
+Subject: Btrfs: read lock extent buffer while walking backrefs
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 6f7ff6d7832c6be13e8c95598884dbc40ad69fb7 upstream.
+
+Before processing the extent buffer, acquire a read lock on it, so
+that we're safe against concurrent updates on the extent buffer.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/backref.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -1000,8 +1000,11 @@ again:
+ ret = -EIO;
+ goto out;
+ }
++ btrfs_tree_read_lock(eb);
++ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+ ret = find_extent_in_eb(eb, bytenr,
+ *extent_item_pos, &eie);
++ btrfs_tree_read_unlock_blocking(eb);
+ free_extent_buffer(eb);
+ if (ret < 0)
+ goto out;
--- /dev/null
+From 4631dbf677ded0419fee35ca7408285dabfaef1a Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Sat, 23 Aug 2014 17:48:28 -0400
+Subject: ext4: move i_size,i_disksize update routines to helper function
+
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+
+commit 4631dbf677ded0419fee35ca7408285dabfaef1a upstream.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4.h | 16 ++++++++++++++++
+ fs/ext4/extents.c | 17 ++++-------------
+ fs/ext4/inode.c | 34 ++++++++--------------------------
+ 3 files changed, 28 insertions(+), 39 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -2453,6 +2453,22 @@ static inline void ext4_update_i_disksiz
+ up_write(&EXT4_I(inode)->i_data_sem);
+ }
+
++/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
++static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
++{
++ int changed = 0;
++
++ if (newsize > inode->i_size) {
++ i_size_write(inode, newsize);
++ changed = 1;
++ }
++ if (newsize > EXT4_I(inode)->i_disksize) {
++ ext4_update_i_disksize(inode, newsize);
++ changed |= 2;
++ }
++ return changed;
++}
++
+ struct ext4_group_info {
+ unsigned long bb_state;
+ struct rb_root bb_free_root;
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -4838,12 +4838,8 @@ static long ext4_zero_range(struct file
+ }
+
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+-
+ if (new_size) {
+- if (new_size > i_size_read(inode))
+- i_size_write(inode, new_size);
+- if (new_size > EXT4_I(inode)->i_disksize)
+- ext4_update_i_disksize(inode, new_size);
++ ext4_update_inode_size(inode, new_size);
+ } else {
+ /*
+ * Mark that we allocate beyond EOF so the subsequent truncate
+@@ -4885,7 +4881,6 @@ long ext4_fallocate(struct file *file, i
+ int ret = 0;
+ int flags;
+ ext4_lblk_t lblk;
+- struct timespec tv;
+ unsigned int blkbits = inode->i_blkbits;
+
+ /* Return error if mode is not supported */
+@@ -4944,15 +4939,11 @@ long ext4_fallocate(struct file *file, i
+ if (IS_ERR(handle))
+ goto out;
+
+- tv = inode->i_ctime = ext4_current_time(inode);
++ inode->i_ctime = ext4_current_time(inode);
+
+ if (new_size) {
+- if (new_size > i_size_read(inode)) {
+- i_size_write(inode, new_size);
+- inode->i_mtime = tv;
+- }
+- if (new_size > EXT4_I(inode)->i_disksize)
+- ext4_update_i_disksize(inode, new_size);
++ if (ext4_update_inode_size(inode, new_size) & 0x1)
++ inode->i_mtime = inode->i_ctime;
+ } else {
+ /*
+ * Mark that we allocate beyond EOF so the subsequent truncate
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1092,27 +1092,11 @@ static int ext4_write_end(struct file *f
+ } else
+ copied = block_write_end(file, mapping, pos,
+ len, copied, page, fsdata);
+-
+ /*
+- * No need to use i_size_read() here, the i_size
+- * cannot change under us because we hole i_mutex.
+- *
+- * But it's important to update i_size while still holding page lock:
++ * it's important to update i_size while still holding page lock:
+ * page writeout could otherwise come in and zero beyond i_size.
+ */
+- if (pos + copied > inode->i_size) {
+- i_size_write(inode, pos + copied);
+- i_size_changed = 1;
+- }
+-
+- if (pos + copied > EXT4_I(inode)->i_disksize) {
+- /* We need to mark inode dirty even if
+- * new_i_size is less that inode->i_size
+- * but greater than i_disksize. (hint delalloc)
+- */
+- ext4_update_i_disksize(inode, (pos + copied));
+- i_size_changed = 1;
+- }
++ i_size_changed = ext4_update_inode_size(inode, pos + copied);
+ unlock_page(page);
+ page_cache_release(page);
+
+@@ -1160,7 +1144,7 @@ static int ext4_journalled_write_end(str
+ int ret = 0, ret2;
+ int partial = 0;
+ unsigned from, to;
+- loff_t new_i_size;
++ int size_changed = 0;
+
+ trace_ext4_journalled_write_end(inode, pos, len, copied);
+ from = pos & (PAGE_CACHE_SIZE - 1);
+@@ -1183,20 +1167,18 @@ static int ext4_journalled_write_end(str
+ if (!partial)
+ SetPageUptodate(page);
+ }
+- new_i_size = pos + copied;
+- if (new_i_size > inode->i_size)
+- i_size_write(inode, pos+copied);
++ size_changed = ext4_update_inode_size(inode, pos + copied);
+ ext4_set_inode_state(inode, EXT4_STATE_JDATA);
+ EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
+- if (new_i_size > EXT4_I(inode)->i_disksize) {
+- ext4_update_i_disksize(inode, new_i_size);
++ unlock_page(page);
++ page_cache_release(page);
++
++ if (size_changed) {
+ ret2 = ext4_mark_inode_dirty(handle, inode);
+ if (!ret)
+ ret = ret2;
+ }
+
+- unlock_page(page);
+- page_cache_release(page);
+ if (pos + len > inode->i_size && ext4_can_truncate(inode))
+ /* if we have allocated more blocks and copied
+ * less. We will have blocks allocated outside
--- /dev/null
+From 36de928641ee48b2078d3fe9514242aaa2f92013 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 23 Aug 2014 17:47:19 -0400
+Subject: ext4: propagate errors up to ext4_find_entry()'s callers
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 36de928641ee48b2078d3fe9514242aaa2f92013 upstream.
+
+If we run into some kind of error, such as ENOMEM, while calling
+ext4_getblk() or ext4_dx_find_entry(), we need to make sure this error
+gets propagated up to ext4_find_entry() and then to its callers. This
+way, transient errors such as ENOMEM can get propagated to the VFS.
+This is important so that the system calls return the appropriate
+error, and also so that in the case of ext4_lookup(), we return an
+error instead of a NULL inode, since that will result in a negative
+dentry cache entry that will stick around long past the OOM condition
+which caused a transient ENOMEM error.
+
+Google-Bug-Id: #17142205
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4.h | 2 +-
+ fs/ext4/namei.c | 35 +++++++++++++++++++++++++++++++++--
+ 2 files changed, 34 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1826,7 +1826,7 @@ ext4_group_first_block_no(struct super_b
+ /*
+ * Special error return code only used by dx_probe() and its callers.
+ */
+-#define ERR_BAD_DX_DIR -75000
++#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
+
+ /*
+ * Timeout and state flag for lazy initialization inode thread.
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_en
+ buffer */
+ int num = 0;
+ ext4_lblk_t nblocks;
+- int i, err;
++ int i, err = 0;
+ int namelen;
+
+ *res_dir = NULL;
+@@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_en
+ * return. Otherwise, fall back to doing a search the
+ * old fashioned way.
+ */
+- if (bh || (err != ERR_BAD_DX_DIR))
++ if (err == -ENOENT)
++ return NULL;
++ if (err && err != ERR_BAD_DX_DIR)
++ return ERR_PTR(err);
++ if (bh)
+ return bh;
+ dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
+ "falling back\n"));
+@@ -1295,6 +1299,11 @@ restart:
+ }
+ num++;
+ bh = ext4_getblk(NULL, dir, b++, 0, &err);
++ if (unlikely(err)) {
++ if (ra_max == 0)
++ return ERR_PTR(err);
++ break;
++ }
+ bh_use[ra_max] = bh;
+ if (bh)
+ ll_rw_block(READ | REQ_META | REQ_PRIO,
+@@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct
+ return ERR_PTR(-ENAMETOOLONG);
+
+ bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
++ if (IS_ERR(bh))
++ return (struct dentry *) bh;
+ inode = NULL;
+ if (bh) {
+ __u32 ino = le32_to_cpu(de->inode);
+@@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct de
+ struct buffer_head *bh;
+
+ bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
++ if (IS_ERR(bh))
++ return (struct dentry *) bh;
+ if (!bh)
+ return ERR_PTR(-ENOENT);
+ ino = le32_to_cpu(de->inode);
+@@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir,
+
+ retval = -ENOENT;
+ bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
++ if (IS_ERR(bh))
++ return PTR_ERR(bh);
+ if (!bh)
+ goto end_rmdir;
+
+@@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir
+
+ retval = -ENOENT;
+ bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
++ if (IS_ERR(bh))
++ return PTR_ERR(bh);
+ if (!bh)
+ goto end_unlink;
+
+@@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle
+ struct ext4_dir_entry_2 *de;
+
+ bh = ext4_find_entry(dir, d_name, &de, NULL);
++ if (IS_ERR(bh))
++ return PTR_ERR(bh);
+ if (bh) {
+ retval = ext4_delete_entry(handle, dir, de, bh);
+ brelse(bh);
+@@ -3202,6 +3221,8 @@ static int ext4_rename(struct inode *old
+ dquot_initialize(new.inode);
+
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
++ if (IS_ERR(old.bh))
++ return PTR_ERR(old.bh);
+ /*
+ * Check for inode number is _not_ due to possible IO errors.
+ * We might rmdir the source, keep it as pwd of some process
+@@ -3214,6 +3235,10 @@ static int ext4_rename(struct inode *old
+
+ new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
+ &new.de, &new.inlined);
++ if (IS_ERR(new.bh)) {
++ retval = PTR_ERR(new.bh);
++ goto end_rename;
++ }
+ if (new.bh) {
+ if (!new.inode) {
+ brelse(new.bh);
+@@ -3330,6 +3355,8 @@ static int ext4_cross_rename(struct inod
+
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
+ &old.de, &old.inlined);
++ if (IS_ERR(old.bh))
++ return PTR_ERR(old.bh);
+ /*
+ * Check for inode number is _not_ due to possible IO errors.
+ * We might rmdir the source, keep it as pwd of some process
+@@ -3342,6 +3369,10 @@ static int ext4_cross_rename(struct inod
+
+ new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
+ &new.de, &new.inlined);
++ if (IS_ERR(new.bh)) {
++ retval = PTR_ERR(new.bh);
++ goto end_rename;
++ }
+
+ /* RENAME_EXCHANGE case: old *and* new must both exist */
+ if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
--- /dev/null
+From 8e8248b1369c97c7bb6f8bcaee1f05deeabab8ef Mon Sep 17 00:00:00 2001
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+Date: Tue, 12 Aug 2014 18:07:57 +0300
+Subject: mei: nfc: fix memory leak in error path
+
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+
+commit 8e8248b1369c97c7bb6f8bcaee1f05deeabab8ef upstream.
+
+NFC will leak buffer if send failed.
+Use single exit point that does the freeing
+
+Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/nfc.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/drivers/misc/mei/nfc.c
++++ b/drivers/misc/mei/nfc.c
+@@ -342,9 +342,10 @@ static int mei_nfc_send(struct mei_cl_de
+ ndev = (struct mei_nfc_dev *) cldev->priv_data;
+ dev = ndev->cl->dev;
+
++ err = -ENOMEM;
+ mei_buf = kzalloc(length + MEI_NFC_HEADER_SIZE, GFP_KERNEL);
+ if (!mei_buf)
+- return -ENOMEM;
++ goto out;
+
+ hdr = (struct mei_nfc_hci_hdr *) mei_buf;
+ hdr->cmd = MEI_NFC_CMD_HCI_SEND;
+@@ -354,12 +355,9 @@ static int mei_nfc_send(struct mei_cl_de
+ hdr->data_size = length;
+
+ memcpy(mei_buf + MEI_NFC_HEADER_SIZE, buf, length);
+-
+ err = __mei_cl_send(ndev->cl, mei_buf, length + MEI_NFC_HEADER_SIZE);
+ if (err < 0)
+- return err;
+-
+- kfree(mei_buf);
++ goto out;
+
+ if (!wait_event_interruptible_timeout(ndev->send_wq,
+ ndev->recv_req_id == ndev->req_id, HZ)) {
+@@ -368,7 +366,8 @@ static int mei_nfc_send(struct mei_cl_de
+ } else {
+ ndev->req_id++;
+ }
+-
++out:
++ kfree(mei_buf);
+ return err;
+ }
+
--- /dev/null
+From 73ab4232388b7a08f17c8d08141ff2099fa0b161 Mon Sep 17 00:00:00 2001
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+Date: Tue, 12 Aug 2014 18:07:56 +0300
+Subject: mei: reset client state on queued connect request
+
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+
+commit 73ab4232388b7a08f17c8d08141ff2099fa0b161 upstream.
+
+If connect request is queued (e.g. device in pg) set client state
+to initializing, thus avoid preliminary exit in wait if current
+state is disconnected.
+
+This is regression from:
+
+commit e4d8270e604c3202131bac607969605ac397b893
+Author: Alexander Usyskin <alexander.usyskin@intel.com>
+mei: set connecting state just upon connection request is sent to the fw
+
+Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/client.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/misc/mei/client.c
++++ b/drivers/misc/mei/client.c
+@@ -601,6 +601,7 @@ int mei_cl_connect(struct mei_cl *cl, st
+ cl->timer_count = MEI_CONNECT_TIMEOUT;
+ list_add_tail(&cb->list, &dev->ctrl_rd_list.list);
+ } else {
++ cl->state = MEI_FILE_INITIALIZING;
+ list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+ }
+
x86-xen-resume-timer-irqs-early.patch
x86-mm-fix-pte_special-versus-pte_numa.patch
hpsa-fix-bad-enomem-return-value-in-hpsa_big_passthru_ioctl.patch
+btrfs-fix-memory-corruption-by-ulist_add_merge-on-32bit-arch.patch
+btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch
+btrfs-read-lock-extent-buffer-while-walking-backrefs.patch
+btrfs-fix-compressed-write-corruption-on-enospc.patch
+btrfs-disable-strict-file-flushes-for-renames-and-truncates.patch
+btrfs-fix-crash-on-endio-of-reading-corrupted-block.patch
+btrfs-fix-filemap_flush-call-in-btrfs_file_release.patch
+btrfs-fix-task-hang-under-heavy-compressed-write.patch
+mei-reset-client-state-on-queued-connect-request.patch
+mei-nfc-fix-memory-leak-in-error-path.patch
+ext4-propagate-errors-up-to-ext4_find_entry-s-callers.patch
+ext4-move-i_size-i_disksize-update-routines-to-helper-function.patch