3.16-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 3 Sep 2014 21:36:04 +0000 (14:36 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 3 Sep 2014 21:36:04 +0000 (14:36 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 3 Sep 2014 21:36:04 +0000 (14:36 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 3 Sep 2014 21:36:04 +0000 (14:36 -0700)
diff --git a/queue-3.16/btrfs-disable-strict-file-flushes-for-renames-and-truncates.patch b/queue-3.16/btrfs-disable-strict-file-flushes-for-renames-and-truncates.patch

new file mode 100644 (file)

index 0000000..fba4853
--- /dev/null
+++ b/queue-3.16/btrfs-disable-strict-file-flushes-for-renames-and-truncates.patch
@@ -0,0 +1,473 @@
+From 8d875f95da43c6a8f18f77869f2ef26e9594fecc Mon Sep 17 00:00:00 2001
+From: Chris Mason <clm@fb.com>
+Date: Tue, 12 Aug 2014 10:47:42 -0700
+Subject: btrfs: disable strict file flushes for renames and truncates
+
+From: Chris Mason <clm@fb.com>
+
+commit 8d875f95da43c6a8f18f77869f2ef26e9594fecc upstream.
+
+Truncates and renames are often used to replace old versions of a file
+with new versions.  Applications often expect this to be an atomic
+replacement, even if they haven't done anything to make sure the new
+version is fully on disk.
+
+Btrfs has strict flushing in place to make sure that renaming over an
+old file with a new file will fully flush out the new file before
+allowing the transaction commit with the rename to complete.
+
+This ordering means the commit code needs to be able to lock file pages,
+and there are a few paths in the filesystem where we will try to end a
+transaction with the page lock held.  It's rare, but these things can
+deadlock.
+
+This patch removes the ordered flushes and switches to a best effort
+filemap_flush like ext4 uses. It's not perfect, but it should fix the
+deadlocks.
+
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/btrfs_inode.h  |    6 --
+ fs/btrfs/disk-io.c      |   32 ------------
+ fs/btrfs/file.c         |   26 ----------
+ fs/btrfs/inode.c        |   47 +-----------------
+ fs/btrfs/ordered-data.c |  123 ------------------------------------------------
+ fs/btrfs/ordered-data.h |    5 -
+ fs/btrfs/transaction.c  |   33 ------------
+ fs/btrfs/transaction.h  |    1 
+ 8 files changed, 6 insertions(+), 267 deletions(-)
+
+--- a/fs/btrfs/btrfs_inode.h
++++ b/fs/btrfs/btrfs_inode.h
+@@ -84,12 +84,6 @@ struct btrfs_inode {
+        */
+       struct list_head delalloc_inodes;
+ 
+-      /*
+-       * list for tracking inodes that must be sent to disk before a
+-       * rename or truncate commit
+-       */
+-      struct list_head ordered_operations;
+-
+       /* node for the red-black tree that links inodes in subvolume root */
+       struct rb_node rb_node;
+ 
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -60,8 +60,6 @@ static void end_workqueue_fn(struct btrf
+ static void free_fs_root(struct btrfs_root *root);
+ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+                                   int read_only);
+-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
+-                                           struct btrfs_root *root);
+ static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
+ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+                                     struct btrfs_root *root);
+@@ -3829,34 +3827,6 @@ static void btrfs_error_commit_super(str
+       btrfs_cleanup_transaction(root);
+ }
+ 
+-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
+-                                           struct btrfs_root *root)
+-{
+-      struct btrfs_inode *btrfs_inode;
+-      struct list_head splice;
+-
+-      INIT_LIST_HEAD(&splice);
+-
+-      mutex_lock(&root->fs_info->ordered_operations_mutex);
+-      spin_lock(&root->fs_info->ordered_root_lock);
+-
+-      list_splice_init(&t->ordered_operations, &splice);
+-      while (!list_empty(&splice)) {
+-              btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+-                                       ordered_operations);
+-
+-              list_del_init(&btrfs_inode->ordered_operations);
+-              spin_unlock(&root->fs_info->ordered_root_lock);
+-
+-              btrfs_invalidate_inodes(btrfs_inode->root);
+-
+-              spin_lock(&root->fs_info->ordered_root_lock);
+-      }
+-
+-      spin_unlock(&root->fs_info->ordered_root_lock);
+-      mutex_unlock(&root->fs_info->ordered_operations_mutex);
+-}
+-
+ static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
+ {
+       struct btrfs_ordered_extent *ordered;
+@@ -4093,8 +4063,6 @@ again:
+ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
+                                  struct btrfs_root *root)
+ {
+-      btrfs_destroy_ordered_operations(cur_trans, root);
+-
+       btrfs_destroy_delayed_refs(cur_trans, root);
+ 
+       cur_trans->state = TRANS_STATE_COMMIT_START;
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1838,33 +1838,9 @@ out:
+ 
+ int btrfs_release_file(struct inode *inode, struct file *filp)
+ {
+-      /*
+-       * ordered_data_close is set by settattr when we are about to truncate
+-       * a file from a non-zero size to a zero size.  This tries to
+-       * flush down new bytes that may have been written if the
+-       * application were using truncate to replace a file in place.
+-       */
+-      if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
+-                             &BTRFS_I(inode)->runtime_flags)) {
+-              struct btrfs_trans_handle *trans;
+-              struct btrfs_root *root = BTRFS_I(inode)->root;
+-
+-              /*
+-               * We need to block on a committing transaction to keep us from
+-               * throwing a ordered operation on to the list and causing
+-               * something like sync to deadlock trying to flush out this
+-               * inode.
+-               */
+-              trans = btrfs_start_transaction(root, 0);
+-              if (IS_ERR(trans))
+-                      return PTR_ERR(trans);
+-              btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
+-              btrfs_end_transaction(trans, root);
+-              if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+-                      filemap_flush(inode->i_mapping);
+-      }
+       if (filp->private_data)
+               btrfs_ioctl_trans_end(filp);
++      filemap_flush(inode->i_mapping);
+       return 0;
+ }
+ 
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -7951,27 +7951,6 @@ static int btrfs_truncate(struct inode *
+       BUG_ON(ret);
+ 
+       /*
+-       * setattr is responsible for setting the ordered_data_close flag,
+-       * but that is only tested during the last file release.  That
+-       * could happen well after the next commit, leaving a great big
+-       * window where new writes may get lost if someone chooses to write
+-       * to this file after truncating to zero
+-       *
+-       * The inode doesn't have any dirty data here, and so if we commit
+-       * this is a noop.  If someone immediately starts writing to the inode
+-       * it is very likely we'll catch some of their writes in this
+-       * transaction, and the commit will find this file on the ordered
+-       * data list with good things to send down.
+-       *
+-       * This is a best effort solution, there is still a window where
+-       * using truncate to replace the contents of the file will
+-       * end up with a zero length file after a crash.
+-       */
+-      if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
+-                                         &BTRFS_I(inode)->runtime_flags))
+-              btrfs_add_ordered_operation(trans, root, inode);
+-
+-      /*
+        * So if we truncate and then write and fsync we normally would just
+        * write the extents that changed, which is a problem if we need to
+        * first truncate that entire inode.  So set this flag so we write out
+@@ -8118,7 +8097,6 @@ struct inode *btrfs_alloc_inode(struct s
+       mutex_init(&ei->delalloc_mutex);
+       btrfs_ordered_inode_tree_init(&ei->ordered_tree);
+       INIT_LIST_HEAD(&ei->delalloc_inodes);
+-      INIT_LIST_HEAD(&ei->ordered_operations);
+       RB_CLEAR_NODE(&ei->rb_node);
+ 
+       return inode;
+@@ -8158,17 +8136,6 @@ void btrfs_destroy_inode(struct inode *i
+       if (!root)
+               goto free;
+ 
+-      /*
+-       * Make sure we're properly removed from the ordered operation
+-       * lists.
+-       */
+-      smp_mb();
+-      if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
+-              spin_lock(&root->fs_info->ordered_root_lock);
+-              list_del_init(&BTRFS_I(inode)->ordered_operations);
+-              spin_unlock(&root->fs_info->ordered_root_lock);
+-      }
+-
+       if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+                    &BTRFS_I(inode)->runtime_flags)) {
+               btrfs_info(root->fs_info, "inode %llu still on the orphan list",
+@@ -8350,12 +8317,10 @@ static int btrfs_rename(struct inode *ol
+       ret = 0;
+ 
+       /*
+-       * we're using rename to replace one file with another.
+-       * and the replacement file is large.  Start IO on it now so
+-       * we don't add too much work to the end of the transaction
++       * we're using rename to replace one file with another.  Start IO on it
++       * now so  we don't add too much work to the end of the transaction
+        */
+-      if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
+-          old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
++      if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
+               filemap_flush(old_inode->i_mapping);
+ 
+       /* close the racy window with snapshot create/destroy ioctl */
+@@ -8403,12 +8368,6 @@ static int btrfs_rename(struct inode *ol
+                */
+               btrfs_pin_log_trans(root);
+       }
+-      /*
+-       * make sure the inode gets flushed if it is replacing
+-       * something.
+-       */
+-      if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
+-              btrfs_add_ordered_operation(trans, root, old_inode);
+ 
+       inode_inc_iversion(old_dir);
+       inode_inc_iversion(new_dir);
+--- a/fs/btrfs/ordered-data.c
++++ b/fs/btrfs/ordered-data.c
+@@ -571,18 +571,6 @@ void btrfs_remove_ordered_extent(struct
+ 
+       trace_btrfs_ordered_extent_remove(inode, entry);
+ 
+-      /*
+-       * we have no more ordered extents for this inode and
+-       * no dirty pages.  We can safely remove it from the
+-       * list of ordered extents
+-       */
+-      if (RB_EMPTY_ROOT(&tree->tree) &&
+-          !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
+-              spin_lock(&root->fs_info->ordered_root_lock);
+-              list_del_init(&BTRFS_I(inode)->ordered_operations);
+-              spin_unlock(&root->fs_info->ordered_root_lock);
+-      }
+-
+       if (!root->nr_ordered_extents) {
+               spin_lock(&root->fs_info->ordered_root_lock);
+               BUG_ON(list_empty(&root->ordered_root));
+@@ -687,81 +675,6 @@ void btrfs_wait_ordered_roots(struct btr
+ }
+ 
+ /*
+- * this is used during transaction commit to write all the inodes
+- * added to the ordered operation list.  These files must be fully on
+- * disk before the transaction commits.
+- *
+- * we have two modes here, one is to just start the IO via filemap_flush
+- * and the other is to wait for all the io.  When we wait, we have an
+- * extra check to make sure the ordered operation list really is empty
+- * before we return
+- */
+-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
+-                               struct btrfs_root *root, int wait)
+-{
+-      struct btrfs_inode *btrfs_inode;
+-      struct inode *inode;
+-      struct btrfs_transaction *cur_trans = trans->transaction;
+-      struct list_head splice;
+-      struct list_head works;
+-      struct btrfs_delalloc_work *work, *next;
+-      int ret = 0;
+-
+-      INIT_LIST_HEAD(&splice);
+-      INIT_LIST_HEAD(&works);
+-
+-      mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
+-      spin_lock(&root->fs_info->ordered_root_lock);
+-      list_splice_init(&cur_trans->ordered_operations, &splice);
+-      while (!list_empty(&splice)) {
+-              btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+-                                 ordered_operations);
+-              inode = &btrfs_inode->vfs_inode;
+-
+-              list_del_init(&btrfs_inode->ordered_operations);
+-
+-              /*
+-               * the inode may be getting freed (in sys_unlink path).
+-               */
+-              inode = igrab(inode);
+-              if (!inode)
+-                      continue;
+-
+-              if (!wait)
+-                      list_add_tail(&BTRFS_I(inode)->ordered_operations,
+-                                    &cur_trans->ordered_operations);
+-              spin_unlock(&root->fs_info->ordered_root_lock);
+-
+-              work = btrfs_alloc_delalloc_work(inode, wait, 1);
+-              if (!work) {
+-                      spin_lock(&root->fs_info->ordered_root_lock);
+-                      if (list_empty(&BTRFS_I(inode)->ordered_operations))
+-                              list_add_tail(&btrfs_inode->ordered_operations,
+-                                            &splice);
+-                      list_splice_tail(&splice,
+-                                       &cur_trans->ordered_operations);
+-                      spin_unlock(&root->fs_info->ordered_root_lock);
+-                      ret = -ENOMEM;
+-                      goto out;
+-              }
+-              list_add_tail(&work->list, &works);
+-              btrfs_queue_work(root->fs_info->flush_workers,
+-                               &work->work);
+-
+-              cond_resched();
+-              spin_lock(&root->fs_info->ordered_root_lock);
+-      }
+-      spin_unlock(&root->fs_info->ordered_root_lock);
+-out:
+-      list_for_each_entry_safe(work, next, &works, list) {
+-              list_del_init(&work->list);
+-              btrfs_wait_and_free_delalloc_work(work);
+-      }
+-      mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
+-      return ret;
+-}
+-
+-/*
+  * Used to start IO or wait for a given ordered extent to finish.
+  *
+  * If wait is one, this effectively waits on page writeback for all the pages
+@@ -1120,42 +1033,6 @@ out:
+       return index;
+ }
+ 
+-
+-/*
+- * add a given inode to the list of inodes that must be fully on
+- * disk before a transaction commit finishes.
+- *
+- * This basically gives us the ext3 style data=ordered mode, and it is mostly
+- * used to make sure renamed files are fully on disk.
+- *
+- * It is a noop if the inode is already fully on disk.
+- *
+- * If trans is not null, we'll do a friendly check for a transaction that
+- * is already flushing things and force the IO down ourselves.
+- */
+-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
+-                               struct btrfs_root *root, struct inode *inode)
+-{
+-      struct btrfs_transaction *cur_trans = trans->transaction;
+-      u64 last_mod;
+-
+-      last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
+-
+-      /*
+-       * if this file hasn't been changed since the last transaction
+-       * commit, we can safely return without doing anything
+-       */
+-      if (last_mod <= root->fs_info->last_trans_committed)
+-              return;
+-
+-      spin_lock(&root->fs_info->ordered_root_lock);
+-      if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
+-              list_add_tail(&BTRFS_I(inode)->ordered_operations,
+-                            &cur_trans->ordered_operations);
+-      }
+-      spin_unlock(&root->fs_info->ordered_root_lock);
+-}
+-
+ int __init ordered_data_init(void)
+ {
+       btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
+--- a/fs/btrfs/ordered-data.h
++++ b/fs/btrfs/ordered-data.h
+@@ -190,11 +190,6 @@ int btrfs_ordered_update_i_size(struct i
+                               struct btrfs_ordered_extent *ordered);
+ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
+                          u32 *sum, int len);
+-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
+-                               struct btrfs_root *root, int wait);
+-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
+-                               struct btrfs_root *root,
+-                               struct inode *inode);
+ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
+ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
+ void btrfs_get_logged_extents(struct inode *inode,
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -218,7 +218,6 @@ loop:
+       spin_lock_init(&cur_trans->delayed_refs.lock);
+ 
+       INIT_LIST_HEAD(&cur_trans->pending_snapshots);
+-      INIT_LIST_HEAD(&cur_trans->ordered_operations);
+       INIT_LIST_HEAD(&cur_trans->pending_chunks);
+       INIT_LIST_HEAD(&cur_trans->switch_commits);
+       list_add_tail(&cur_trans->list, &fs_info->trans_list);
+@@ -1612,27 +1611,6 @@ static void cleanup_transaction(struct b
+       kmem_cache_free(btrfs_trans_handle_cachep, trans);
+ }
+ 
+-static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
+-                                        struct btrfs_root *root)
+-{
+-      int ret;
+-
+-      ret = btrfs_run_delayed_items(trans, root);
+-      if (ret)
+-              return ret;
+-
+-      /*
+-       * rename don't use btrfs_join_transaction, so, once we
+-       * set the transaction to blocked above, we aren't going
+-       * to get any new ordered operations.  We can safely run
+-       * it here and no for sure that nothing new will be added
+-       * to the list
+-       */
+-      ret = btrfs_run_ordered_operations(trans, root, 1);
+-
+-      return ret;
+-}
+-
+ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
+ {
+       if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
+@@ -1653,13 +1631,6 @@ int btrfs_commit_transaction(struct btrf
+       struct btrfs_transaction *prev_trans = NULL;
+       int ret;
+ 
+-      ret = btrfs_run_ordered_operations(trans, root, 0);
+-      if (ret) {
+-              btrfs_abort_transaction(trans, root, ret);
+-              btrfs_end_transaction(trans, root);
+-              return ret;
+-      }
+-
+       /* Stop the commit early if ->aborted is set */
+       if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
+               ret = cur_trans->aborted;
+@@ -1740,7 +1711,7 @@ int btrfs_commit_transaction(struct btrf
+       if (ret)
+               goto cleanup_transaction;
+ 
+-      ret = btrfs_flush_all_pending_stuffs(trans, root);
++      ret = btrfs_run_delayed_items(trans, root);
+       if (ret)
+               goto cleanup_transaction;
+ 
+@@ -1748,7 +1719,7 @@ int btrfs_commit_transaction(struct btrf
+                  extwriter_counter_read(cur_trans) == 0);
+ 
+       /* some pending stuffs might be added after the previous flush. */
+-      ret = btrfs_flush_all_pending_stuffs(trans, root);
++      ret = btrfs_run_delayed_items(trans, root);
+       if (ret)
+               goto cleanup_transaction;
+ 
+--- a/fs/btrfs/transaction.h
++++ b/fs/btrfs/transaction.h
+@@ -55,7 +55,6 @@ struct btrfs_transaction {
+       wait_queue_head_t writer_wait;
+       wait_queue_head_t commit_wait;
+       struct list_head pending_snapshots;
+-      struct list_head ordered_operations;
+       struct list_head pending_chunks;
+       struct list_head switch_commits;
+       struct btrfs_delayed_ref_root delayed_refs;
diff --git a/queue-3.16/btrfs-fix-compressed-write-corruption-on-enospc.patch b/queue-3.16/btrfs-fix-compressed-write-corruption-on-enospc.patch

new file mode 100644 (file)

index 0000000..2b823eb
--- /dev/null
+++ b/queue-3.16/btrfs-fix-compressed-write-corruption-on-enospc.patch
@@ -0,0 +1,45 @@
+From ce62003f690dff38d3164a632ec69efa15c32cbf Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Thu, 24 Jul 2014 22:48:05 +0800
+Subject: Btrfs: fix compressed write corruption on enospc
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit ce62003f690dff38d3164a632ec69efa15c32cbf upstream.
+
+When failing to allocate space for the whole compressed extent, we'll
+fallback to uncompressed IO, but we've forgotten to redirty the pages
+which belong to this compressed extent, and these 'clean' pages will
+simply skip 'submit' part and go to endio directly, at last we got data
+corruption as we write nothing.
+
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+Tested-By: Martin Steigerwald <martin@lichtvoll.de>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -709,6 +709,18 @@ retry:
+                               unlock_extent(io_tree, async_extent->start,
+                                             async_extent->start +
+                                             async_extent->ram_size - 1);
++
++                              /*
++                               * we need to redirty the pages if we decide to
++                               * fallback to uncompressed IO, otherwise we
++                               * will not submit these pages down to lower
++                               * layers.
++                               */
++                              extent_range_redirty_for_io(inode,
++                                              async_extent->start,
++                                              async_extent->start +
++                                              async_extent->ram_size - 1);
++
+                               goto retry;
+                       }
+                       goto out_free;
diff --git a/queue-3.16/btrfs-fix-crash-on-endio-of-reading-corrupted-block.patch b/queue-3.16/btrfs-fix-crash-on-endio-of-reading-corrupted-block.patch

new file mode 100644 (file)

index 0000000..e39c786
--- /dev/null
+++ b/queue-3.16/btrfs-fix-crash-on-endio-of-reading-corrupted-block.patch
@@ -0,0 +1,43 @@
+From 38c1c2e44bacb37efd68b90b3f70386a8ee370ee Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Tue, 19 Aug 2014 23:33:13 +0800
+Subject: Btrfs: fix crash on endio of reading corrupted block
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit 38c1c2e44bacb37efd68b90b3f70386a8ee370ee upstream.
+
+The crash is
+
+------------[ cut here ]------------
+kernel BUG at fs/btrfs/extent_io.c:2124!
+[...]
+Workqueue: btrfs-endio normal_work_helper [btrfs]
+RIP: 0010:[<ffffffffa02d6055>]  [<ffffffffa02d6055>] end_bio_extent_readpage+0xb45/0xcd0 [btrfs]
+
+This is in fact a regression.
+
+It is because we forgot to increase @offset properly in reading corrupted block,
+so that the @offset remains, and this leads to checksum errors while reading
+left blocks queued up in the same bio, and then ends up with hiting the above
+BUG_ON.
+
+Reported-by: Chris Murphy <lists@colorremedies.com>
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent_io.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -2532,6 +2532,7 @@ static void end_bio_extent_readpage(stru
+                                       test_bit(BIO_UPTODATE, &bio->bi_flags);
+                               if (err)
+                                       uptodate = 0;
++                              offset += len;
+                               continue;
+                       }
+               }
diff --git a/queue-3.16/btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch b/queue-3.16/btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch

new file mode 100644 (file)

index 0000000..6eb12ab
--- /dev/null
+++ b/queue-3.16/btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch
@@ -0,0 +1,106 @@
+From 27b9a8122ff71a8cadfbffb9c4f0694300464f3b Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sat, 9 Aug 2014 21:22:27 +0100
+Subject: Btrfs: fix csum tree corruption, duplicate and outdated checksums
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 27b9a8122ff71a8cadfbffb9c4f0694300464f3b upstream.
+
+Under rare circumstances we can end up leaving 2 versions of a checksum
+for the same file extent range.
+
+The reason for this is that after calling btrfs_next_leaf we process
+slot 0 of the leaf it returns, instead of processing the slot set in
+path->slots[0]. Most of the time (by far) path->slots[0] is 0, but after
+btrfs_next_leaf() releases the path and before it searches for the next
+leaf, another task might cause a split of the next leaf, which migrates
+some of its keys to the leaf we were processing before calling
+btrfs_next_leaf(). In this case btrfs_next_leaf() returns again the
+same leaf but with path->slots[0] having a slot number corresponding
+to the first new key it got, that is, a slot number that didn't exist
+before calling btrfs_next_leaf(), as the leaf now has more keys than
+it had before. So we must really process the returned leaf starting at
+path->slots[0] always, as it isn't always 0, and the key at slot 0 can
+have an offset much lower than our search offset/bytenr.
+
+For example, consider the following scenario, where we have:
+
+sums->bytenr: 40157184, sums->len: 16384, sums end: 40173568
+four 4kb file data blocks with offsets 40157184, 40161280, 40165376, 40169472
+
+  Leaf N:
+
+    slot = 0                           slot = btrfs_header_nritems() - 1
+  |-------------------------------------------------------------------|
+  | [(CSUM CSUM 39239680), size 8] ... [(CSUM CSUM 40116224), size 4] |
+  |-------------------------------------------------------------------|
+
+  Leaf N + 1:
+
+      slot = 0                          slot = btrfs_header_nritems() - 1
+  |--------------------------------------------------------------------|
+  | [(CSUM CSUM 40161280), size 32] ... [((CSUM CSUM 40615936), size 8 |
+  |--------------------------------------------------------------------|
+
+Because we are at the last slot of leaf N, we call btrfs_next_leaf() to
+find the next highest key, which releases the current path and then searches
+for that next key. However after releasing the path and before finding that
+next key, the item at slot 0 of leaf N + 1 gets moved to leaf N, due to a call
+to ctree.c:push_leaf_left() (via ctree.c:split_leaf()), and therefore
+btrfs_next_leaf() will returns us a path again with leaf N but with the slot
+pointing to its new last key (CSUM CSUM 40161280). This new version of leaf N
+is then:
+
+    slot = 0                        slot = btrfs_header_nritems() - 2  slot = btrfs_header_nritems() - 1
+  |----------------------------------------------------------------------------------------------------|
+  | [(CSUM CSUM 39239680), size 8] ... [(CSUM CSUM 40116224), size 4]  [(CSUM CSUM 40161280), size 32] |
+  |----------------------------------------------------------------------------------------------------|
+
+And incorrecly using slot 0, makes us set next_offset to 39239680 and we jump
+into the "insert:" label, which will set tmp to:
+
+    tmp = min((sums->len - total_bytes) >> blocksize_bits,
+        (next_offset - file_key.offset) >> blocksize_bits) =
+    min((16384 - 0) >> 12, (39239680 - 40157184) >> 12) =
+    min(4, (u64)-917504 = 18446744073708634112 >> 12) = 4
+
+and
+
+   ins_size = csum_size * tmp = 4 * 4 = 16 bytes.
+
+In other words, we insert a new csum item in the tree with key
+(CSUM_OBJECTID CSUM_KEY 40157184 = sums->bytenr) that contains the checksums
+for all the data (4 blocks of 4096 bytes each = sums->len). Which is wrong,
+because the item with key (CSUM CSUM 40161280) (the one that was moved from
+leaf N + 1 to the end of leaf N) contains the old checksums of the last 12288
+bytes of our data and won't get those old checksums removed.
+
+So this leaves us 2 different checksums for 3 4kb blocks of data in the tree,
+and breaks the logical rule:
+
+   Key_N+1.offset >= Key_N.offset + length_of_data_its_checksums_cover
+
+An obvious bad effect of this is that a subsequent csum tree lookup to get
+the checksum of any of the blocks with logical offset of 40161280, 40165376
+or 40169472 (the last 3 4kb blocks of file data), will get the old checksums.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file-item.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/file-item.c
++++ b/fs/btrfs/file-item.c
+@@ -756,7 +756,7 @@ again:
+                               found_next = 1;
+                       if (ret != 0)
+                               goto insert;
+-                      slot = 0;
++                      slot = path->slots[0];
+               }
+               btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
+               if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
diff --git a/queue-3.16/btrfs-fix-filemap_flush-call-in-btrfs_file_release.patch b/queue-3.16/btrfs-fix-filemap_flush-call-in-btrfs_file_release.patch

new file mode 100644 (file)

index 0000000..b89384f
--- /dev/null
+++ b/queue-3.16/btrfs-fix-filemap_flush-call-in-btrfs_file_release.patch
@@ -0,0 +1,43 @@
+From f6dc45c7a93a011dff6eb9b2ffda59c390c7705a Mon Sep 17 00:00:00 2001
+From: Chris Mason <clm@fb.com>
+Date: Wed, 20 Aug 2014 07:15:33 -0700
+Subject: Btrfs: fix filemap_flush call in btrfs_file_release
+
+From: Chris Mason <clm@fb.com>
+
+commit f6dc45c7a93a011dff6eb9b2ffda59c390c7705a upstream.
+
+We should only be flushing on close if the file was flagged as needing
+it during truncate.  I broke this with my ordered data vs transaction
+commit deadlock fix.
+
+Thanks to Miao Xie for catching this.
+
+Signed-off-by: Chris Mason <clm@fb.com>
+Reported-by: Miao Xie <miaox@cn.fujitsu.com>
+Reported-by: Fengguang Wu <fengguang.wu@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1840,7 +1840,15 @@ int btrfs_release_file(struct inode *ino
+ {
+       if (filp->private_data)
+               btrfs_ioctl_trans_end(filp);
+-      filemap_flush(inode->i_mapping);
++      /*
++       * ordered_data_close is set by settattr when we are about to truncate
++       * a file from a non-zero size to a zero size.  This tries to
++       * flush down new bytes that may have been written if the
++       * application were using truncate to replace a file in place.
++       */
++      if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
++                             &BTRFS_I(inode)->runtime_flags))
++                      filemap_flush(inode->i_mapping);
+       return 0;
+ }
+ 
diff --git a/queue-3.16/btrfs-fix-memory-corruption-by-ulist_add_merge-on-32bit-arch.patch b/queue-3.16/btrfs-fix-memory-corruption-by-ulist_add_merge-on-32bit-arch.patch

new file mode 100644 (file)

index 0000000..50a9f53
--- /dev/null
+++ b/queue-3.16/btrfs-fix-memory-corruption-by-ulist_add_merge-on-32bit-arch.patch
@@ -0,0 +1,115 @@
+From 4eb1f66dce6c4dc28dd90a7ffbe6b2b1cb08aa4e Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Mon, 28 Jul 2014 10:57:04 +0200
+Subject: Btrfs: Fix memory corruption by ulist_add_merge() on 32bit arch
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 4eb1f66dce6c4dc28dd90a7ffbe6b2b1cb08aa4e upstream.
+
+We've got bug reports that btrfs crashes when quota is enabled on
+32bit kernel, typically with the Oops like below:
+ BUG: unable to handle kernel NULL pointer dereference at 00000004
+ IP: [<f9234590>] find_parent_nodes+0x360/0x1380 [btrfs]
+ *pde = 00000000
+ Oops: 0000 [#1] SMP
+ CPU: 0 PID: 151 Comm: kworker/u8:2 Tainted: G S      W 3.15.2-1.gd43d97e-default #1
+ Workqueue: btrfs-qgroup-rescan normal_work_helper [btrfs]
+ task: f1478130 ti: f147c000 task.ti: f147c000
+ EIP: 0060:[<f9234590>] EFLAGS: 00010213 CPU: 0
+ EIP is at find_parent_nodes+0x360/0x1380 [btrfs]
+ EAX: f147dda8 EBX: f147ddb0 ECX: 00000011 EDX: 00000000
+ ESI: 00000000 EDI: f147dda4 EBP: f147ddf8 ESP: f147dd38
+  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
+ CR0: 8005003b CR2: 00000004 CR3: 00bf3000 CR4: 00000690
+ Stack:
+  00000000 00000000 f147dda4 00000050 00000001 00000000 00000001 00000050
+  00000001 00000000 d3059000 00000001 00000022 000000a8 00000000 00000000
+  00000000 000000a1 00000000 00000000 00000001 00000000 00000000 11800000
+ Call Trace:
+  [<f923564d>] __btrfs_find_all_roots+0x9d/0xf0 [btrfs]
+  [<f9237bb1>] btrfs_qgroup_rescan_worker+0x401/0x760 [btrfs]
+  [<f9206148>] normal_work_helper+0xc8/0x270 [btrfs]
+  [<c025e38b>] process_one_work+0x11b/0x390
+  [<c025eea1>] worker_thread+0x101/0x340
+  [<c026432b>] kthread+0x9b/0xb0
+  [<c0712a71>] ret_from_kernel_thread+0x21/0x30
+  [<c0264290>] kthread_create_on_node+0x110/0x110
+
+This indicates a NULL corruption in prefs_delayed list.  The further
+investigation and bisection pointed that the call of ulist_add_merge()
+results in the corruption.
+
+ulist_add_merge() takes u64 as aux and writes a 64bit value into
+old_aux.  The callers of this function in backref.c, however, pass a
+pointer of a pointer to old_aux.  That is, the function overwrites
+64bit value on 32bit pointer.  This caused a NULL in the adjacent
+variable, in this case, prefs_delayed.
+
+Here is a quick attempt to band-aid over this: a new function,
+ulist_add_merge_ptr() is introduced to pass/store properly a pointer
+value instead of u64.  There are still ugly void ** cast remaining
+in the callers because void ** cannot be taken implicitly.  But, it's
+safer than explicit cast to u64, anyway.
+
+Bugzilla: https://bugzilla.novell.com/show_bug.cgi?id=887046
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/backref.c |   11 +++++------
+ fs/btrfs/ulist.h   |   15 +++++++++++++++
+ 2 files changed, 20 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -276,9 +276,8 @@ static int add_all_parents(struct btrfs_
+                       }
+                       if (ret > 0)
+                               goto next;
+-                      ret = ulist_add_merge(parents, eb->start,
+-                                            (uintptr_t)eie,
+-                                            (u64 *)&old, GFP_NOFS);
++                      ret = ulist_add_merge_ptr(parents, eb->start,
++                                                eie, (void **)&old, GFP_NOFS);
+                       if (ret < 0)
+                               break;
+                       if (!ret && extent_item_pos) {
+@@ -1008,9 +1007,9 @@ again:
+                                       goto out;
+                               ref->inode_list = eie;
+                       }
+-                      ret = ulist_add_merge(refs, ref->parent,
+-                                            (uintptr_t)ref->inode_list,
+-                                            (u64 *)&eie, GFP_NOFS);
++                      ret = ulist_add_merge_ptr(refs, ref->parent,
++                                                ref->inode_list,
++                                                (void **)&eie, GFP_NOFS);
+                       if (ret < 0)
+                               goto out;
+                       if (!ret && extent_item_pos) {
+--- a/fs/btrfs/ulist.h
++++ b/fs/btrfs/ulist.h
+@@ -57,6 +57,21 @@ void ulist_free(struct ulist *ulist);
+ int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
+ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
+                   u64 *old_aux, gfp_t gfp_mask);
++
++/* just like ulist_add_merge() but take a pointer for the aux data */
++static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
++                                    void **old_aux, gfp_t gfp_mask)
++{
++#if BITS_PER_LONG == 32
++      u64 old64 = (uintptr_t)*old_aux;
++      int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask);
++      *old_aux = (void *)((uintptr_t)old64);
++      return ret;
++#else
++      return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask);
++#endif
++}
++
+ struct ulist_node *ulist_next(struct ulist *ulist,
+                             struct ulist_iterator *uiter);
+ 
diff --git a/queue-3.16/btrfs-fix-task-hang-under-heavy-compressed-write.patch b/queue-3.16/btrfs-fix-task-hang-under-heavy-compressed-write.patch

new file mode 100644 (file)

index 0000000..2d993db
--- /dev/null
+++ b/queue-3.16/btrfs-fix-task-hang-under-heavy-compressed-write.patch
@@ -0,0 +1,571 @@
+From 9e0af23764344f7f1b68e4eefbe7dc865018b63d Mon Sep 17 00:00:00 2001
+From: Liu Bo <bo.li.liu@oracle.com>
+Date: Fri, 15 Aug 2014 23:36:53 +0800
+Subject: Btrfs: fix task hang under heavy compressed write
+
+From: Liu Bo <bo.li.liu@oracle.com>
+
+commit 9e0af23764344f7f1b68e4eefbe7dc865018b63d upstream.
+
+This has been reported and discussed for a long time, and this hang occurs in
+both 3.15 and 3.16.
+
+Btrfs now migrates to use kernel workqueue, but it introduces this hang problem.
+
+Btrfs has a kind of work queued as an ordered way, which means that its
+ordered_func() must be processed in the way of FIFO, so it usually looks like --
+
+normal_work_helper(arg)
+    work = container_of(arg, struct btrfs_work, normal_work);
+
+    work->func() <---- (we name it work X)
+    for ordered_work in wq->ordered_list
+            ordered_work->ordered_func()
+            ordered_work->ordered_free()
+
+The hang is a rare case, first when we find free space, we get an uncached block
+group, then we go to read its free space cache inode for free space information,
+so it will
+
+file a readahead request
+    btrfs_readpages()
+         for page that is not in page cache
+                __do_readpage()
+                     submit_extent_page()
+                           btrfs_submit_bio_hook()
+                                 btrfs_bio_wq_end_io()
+                                 submit_bio()
+                                 end_workqueue_bio() <--(ret by the 1st endio)
+                                      queue a work(named work Y) for the 2nd
+                                      also the real endio()
+
+So the hang occurs when work Y's work_struct and work X's work_struct happens
+to share the same address.
+
+A bit more explanation,
+
+A,B,C -- struct btrfs_work
+arg   -- struct work_struct
+
+kthread:
+worker_thread()
+    pick up a work_struct from @worklist
+    process_one_work(arg)
+       worker->current_work = arg;  <-- arg is A->normal_work
+       worker->current_func(arg)
+               normal_work_helper(arg)
+                    A = container_of(arg, struct btrfs_work, normal_work);
+
+                    A->func()
+                    A->ordered_func()
+                    A->ordered_free()  <-- A gets freed
+
+                    B->ordered_func()
+                         submit_compressed_extents()
+                             find_free_extent()
+                                 load_free_space_inode()
+                                     ...   <-- (the above readhead stack)
+                                     end_workqueue_bio()
+                                          btrfs_queue_work(work C)
+                    B->ordered_free()
+
+As if work A has a high priority in wq->ordered_list and there are more ordered
+works queued after it, such as B->ordered_func(), its memory could have been
+freed before normal_work_helper() returns, which means that kernel workqueue
+code worker_thread() still has worker->current_work pointer to be work
+A->normal_work's, ie. arg's address.
+
+Meanwhile, work C is allocated after work A is freed, work C->normal_work
+and work A->normal_work are likely to share the same address(I confirmed this
+with ftrace output, so I'm not just guessing, it's rare though).
+
+When another kthread picks up work C->normal_work to process, and finds our
+kthread is processing it(see find_worker_executing_work()), it'll think
+work C as a collision and skip then, which ends up nobody processing work C.
+
+So the situation is that our kthread is waiting forever on work C.
+
+Besides, there're other cases that can lead to deadlock, but the real problem
+is that all btrfs workqueue shares one work->func, -- normal_work_helper,
+so this makes each workqueue to have its own helper function, but only a
+wraper pf normal_work_helper.
+
+With this patch, I no long hit the above hang.
+
+Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/async-thread.c  |   44 +++++++++++++++++++++++++++++++--------
+ fs/btrfs/async-thread.h  |   28 +++++++++++++++++++++++-
+ fs/btrfs/delayed-inode.c |    4 +--
+ fs/btrfs/disk-io.c       |   53 +++++++++++++++++++++++++----------------------
+ fs/btrfs/extent-tree.c   |    7 +++---
+ fs/btrfs/inode.c         |   35 ++++++++++++++++++++-----------
+ fs/btrfs/ordered-data.c  |    1 
+ fs/btrfs/qgroup.c        |    1 
+ fs/btrfs/raid56.c        |    9 +++++--
+ fs/btrfs/reada.c         |    3 +-
+ fs/btrfs/scrub.c         |   14 +++++++-----
+ fs/btrfs/volumes.c       |    3 +-
+ 12 files changed, 141 insertions(+), 61 deletions(-)
+
+--- a/fs/btrfs/async-thread.c
++++ b/fs/btrfs/async-thread.c
+@@ -22,7 +22,6 @@
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
+ #include <linux/freezer.h>
+-#include <linux/workqueue.h>
+ #include "async-thread.h"
+ #include "ctree.h"
+ 
+@@ -55,8 +54,39 @@ struct btrfs_workqueue {
+       struct __btrfs_workqueue *high;
+ };
+ 
+-static inline struct __btrfs_workqueue
+-*__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
++static void normal_work_helper(struct btrfs_work *work);
++
++#define BTRFS_WORK_HELPER(name)                                       \
++void btrfs_##name(struct work_struct *arg)                            \
++{                                                                     \
++      struct btrfs_work *work = container_of(arg, struct btrfs_work,  \
++                                             normal_work);            \
++      normal_work_helper(work);                                       \
++}
++
++BTRFS_WORK_HELPER(worker_helper);
++BTRFS_WORK_HELPER(delalloc_helper);
++BTRFS_WORK_HELPER(flush_delalloc_helper);
++BTRFS_WORK_HELPER(cache_helper);
++BTRFS_WORK_HELPER(submit_helper);
++BTRFS_WORK_HELPER(fixup_helper);
++BTRFS_WORK_HELPER(endio_helper);
++BTRFS_WORK_HELPER(endio_meta_helper);
++BTRFS_WORK_HELPER(endio_meta_write_helper);
++BTRFS_WORK_HELPER(endio_raid56_helper);
++BTRFS_WORK_HELPER(rmw_helper);
++BTRFS_WORK_HELPER(endio_write_helper);
++BTRFS_WORK_HELPER(freespace_write_helper);
++BTRFS_WORK_HELPER(delayed_meta_helper);
++BTRFS_WORK_HELPER(readahead_helper);
++BTRFS_WORK_HELPER(qgroup_rescan_helper);
++BTRFS_WORK_HELPER(extent_refs_helper);
++BTRFS_WORK_HELPER(scrub_helper);
++BTRFS_WORK_HELPER(scrubwrc_helper);
++BTRFS_WORK_HELPER(scrubnc_helper);
++
++static struct __btrfs_workqueue *
++__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
+                        int thresh)
+ {
+       struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
+@@ -232,13 +262,11 @@ static void run_ordered_work(struct __bt
+       spin_unlock_irqrestore(lock, flags);
+ }
+ 
+-static void normal_work_helper(struct work_struct *arg)
++static void normal_work_helper(struct btrfs_work *work)
+ {
+-      struct btrfs_work *work;
+       struct __btrfs_workqueue *wq;
+       int need_order = 0;
+ 
+-      work = container_of(arg, struct btrfs_work, normal_work);
+       /*
+        * We should not touch things inside work in the following cases:
+        * 1) after work->func() if it has no ordered_free
+@@ -262,7 +290,7 @@ static void normal_work_helper(struct wo
+               trace_btrfs_all_work_done(work);
+ }
+ 
+-void btrfs_init_work(struct btrfs_work *work,
++void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
+                    btrfs_func_t func,
+                    btrfs_func_t ordered_func,
+                    btrfs_func_t ordered_free)
+@@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *
+       work->func = func;
+       work->ordered_func = ordered_func;
+       work->ordered_free = ordered_free;
+-      INIT_WORK(&work->normal_work, normal_work_helper);
++      INIT_WORK(&work->normal_work, uniq_func);
+       INIT_LIST_HEAD(&work->ordered_list);
+       work->flags = 0;
+ }
+--- a/fs/btrfs/async-thread.h
++++ b/fs/btrfs/async-thread.h
+@@ -19,12 +19,14 @@
+ 
+ #ifndef __BTRFS_ASYNC_THREAD_
+ #define __BTRFS_ASYNC_THREAD_
++#include <linux/workqueue.h>
+ 
+ struct btrfs_workqueue;
+ /* Internal use only */
+ struct __btrfs_workqueue;
+ struct btrfs_work;
+ typedef void (*btrfs_func_t)(struct btrfs_work *arg);
++typedef void (*btrfs_work_func_t)(struct work_struct *arg);
+ 
+ struct btrfs_work {
+       btrfs_func_t func;
+@@ -38,11 +40,35 @@ struct btrfs_work {
+       unsigned long flags;
+ };
+ 
++#define BTRFS_WORK_HELPER_PROTO(name)                                 \
++void btrfs_##name(struct work_struct *arg)
++
++BTRFS_WORK_HELPER_PROTO(worker_helper);
++BTRFS_WORK_HELPER_PROTO(delalloc_helper);
++BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
++BTRFS_WORK_HELPER_PROTO(cache_helper);
++BTRFS_WORK_HELPER_PROTO(submit_helper);
++BTRFS_WORK_HELPER_PROTO(fixup_helper);
++BTRFS_WORK_HELPER_PROTO(endio_helper);
++BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
++BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
++BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
++BTRFS_WORK_HELPER_PROTO(rmw_helper);
++BTRFS_WORK_HELPER_PROTO(endio_write_helper);
++BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
++BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
++BTRFS_WORK_HELPER_PROTO(readahead_helper);
++BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
++BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
++BTRFS_WORK_HELPER_PROTO(scrub_helper);
++BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
++BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
++
+ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
+                                             int flags,
+                                             int max_active,
+                                             int thresh);
+-void btrfs_init_work(struct btrfs_work *work,
++void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
+                    btrfs_func_t func,
+                    btrfs_func_t ordered_func,
+                    btrfs_func_t ordered_free);
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(str
+               return -ENOMEM;
+ 
+       async_work->delayed_root = delayed_root;
+-      btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root,
+-                      NULL, NULL);
++      btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
++                      btrfs_async_run_delayed_root, NULL, NULL);
+       async_work->nr = nr;
+ 
+       btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -39,7 +39,6 @@
+ #include "btrfs_inode.h"
+ #include "volumes.h"
+ #include "print-tree.h"
+-#include "async-thread.h"
+ #include "locking.h"
+ #include "tree-log.h"
+ #include "free-space-cache.h"
+@@ -693,35 +692,41 @@ static void end_workqueue_bio(struct bio
+ {
+       struct end_io_wq *end_io_wq = bio->bi_private;
+       struct btrfs_fs_info *fs_info;
++      struct btrfs_workqueue *wq;
++      btrfs_work_func_t func;
+ 
+       fs_info = end_io_wq->info;
+       end_io_wq->error = err;
+-      btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
+ 
+       if (bio->bi_rw & REQ_WRITE) {
+-              if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
+-                      btrfs_queue_work(fs_info->endio_meta_write_workers,
+-                                       &end_io_wq->work);
+-              else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
+-                      btrfs_queue_work(fs_info->endio_freespace_worker,
+-                                       &end_io_wq->work);
+-              else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+-                      btrfs_queue_work(fs_info->endio_raid56_workers,
+-                                       &end_io_wq->work);
+-              else
+-                      btrfs_queue_work(fs_info->endio_write_workers,
+-                                       &end_io_wq->work);
++              if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
++                      wq = fs_info->endio_meta_write_workers;
++                      func = btrfs_endio_meta_write_helper;
++              } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
++                      wq = fs_info->endio_freespace_worker;
++                      func = btrfs_freespace_write_helper;
++              } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
++                      wq = fs_info->endio_raid56_workers;
++                      func = btrfs_endio_raid56_helper;
++              } else {
++                      wq = fs_info->endio_write_workers;
++                      func = btrfs_endio_write_helper;
++              }
+       } else {
+-              if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+-                      btrfs_queue_work(fs_info->endio_raid56_workers,
+-                                       &end_io_wq->work);
+-              else if (end_io_wq->metadata)
+-                      btrfs_queue_work(fs_info->endio_meta_workers,
+-                                       &end_io_wq->work);
+-              else
+-                      btrfs_queue_work(fs_info->endio_workers,
+-                                       &end_io_wq->work);
++              if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
++                      wq = fs_info->endio_raid56_workers;
++                      func = btrfs_endio_raid56_helper;
++              } else if (end_io_wq->metadata) {
++                      wq = fs_info->endio_meta_workers;
++                      func = btrfs_endio_meta_helper;
++              } else {
++                      wq = fs_info->endio_workers;
++                      func = btrfs_endio_helper;
++              }
+       }
++
++      btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
++      btrfs_queue_work(wq, &end_io_wq->work);
+ }
+ 
+ /*
+@@ -828,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_
+       async->submit_bio_start = submit_bio_start;
+       async->submit_bio_done = submit_bio_done;
+ 
+-      btrfs_init_work(&async->work, run_one_async_start,
++      btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
+                       run_one_async_done, run_one_async_free);
+ 
+       async->bio_flags = bio_flags;
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -552,7 +552,8 @@ static int cache_block_group(struct btrf
+       caching_ctl->block_group = cache;
+       caching_ctl->progress = cache->key.objectid;
+       atomic_set(&caching_ctl->count, 1);
+-      btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
++      btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
++                      caching_thread, NULL, NULL);
+ 
+       spin_lock(&cache->lock);
+       /*
+@@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct
+               async->sync = 0;
+       init_completion(&async->wait);
+ 
+-      btrfs_init_work(&async->work, delayed_ref_async_start,
+-                      NULL, NULL);
++      btrfs_init_work(&async->work, btrfs_extent_refs_helper,
++                      delayed_ref_async_start, NULL, NULL);
+ 
+       btrfs_queue_work(root->fs_info->extent_workers, &async->work);
+ 
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1096,8 +1096,10 @@ static int cow_file_range_async(struct i
+               async_cow->end = cur_end;
+               INIT_LIST_HEAD(&async_cow->extents);
+ 
+-              btrfs_init_work(&async_cow->work, async_cow_start,
+-                              async_cow_submit, async_cow_free);
++              btrfs_init_work(&async_cow->work,
++                              btrfs_delalloc_helper,
++                              async_cow_start, async_cow_submit,
++                              async_cow_free);
+ 
+               nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
+                       PAGE_CACHE_SHIFT;
+@@ -1881,7 +1883,8 @@ static int btrfs_writepage_start_hook(st
+ 
+       SetPageChecked(page);
+       page_cache_get(page);
+-      btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
++      btrfs_init_work(&fixup->work, btrfs_fixup_helper,
++                      btrfs_writepage_fixup_worker, NULL, NULL);
+       fixup->page = page;
+       btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
+       return -EBUSY;
+@@ -2822,7 +2825,8 @@ static int btrfs_writepage_end_io_hook(s
+       struct inode *inode = page->mapping->host;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_ordered_extent *ordered_extent = NULL;
+-      struct btrfs_workqueue *workers;
++      struct btrfs_workqueue *wq;
++      btrfs_work_func_t func;
+ 
+       trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
+ 
+@@ -2831,13 +2835,17 @@ static int btrfs_writepage_end_io_hook(s
+                                           end - start + 1, uptodate))
+               return 0;
+ 
+-      btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
++      if (btrfs_is_free_space_inode(inode)) {
++              wq = root->fs_info->endio_freespace_worker;
++              func = btrfs_freespace_write_helper;
++      } else {
++              wq = root->fs_info->endio_write_workers;
++              func = btrfs_endio_write_helper;
++      }
+ 
+-      if (btrfs_is_free_space_inode(inode))
+-              workers = root->fs_info->endio_freespace_worker;
+-      else
+-              workers = root->fs_info->endio_write_workers;
+-      btrfs_queue_work(workers, &ordered_extent->work);
++      btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
++                      NULL);
++      btrfs_queue_work(wq, &ordered_extent->work);
+ 
+       return 0;
+ }
+@@ -7158,7 +7166,8 @@ again:
+       if (!ret)
+               goto out_test;
+ 
+-      btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
++      btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
++                      finish_ordered_fn, NULL, NULL);
+       btrfs_queue_work(root->fs_info->endio_write_workers,
+                        &ordered->work);
+ out_test:
+@@ -8485,7 +8494,9 @@ struct btrfs_delalloc_work *btrfs_alloc_
+       work->inode = inode;
+       work->wait = wait;
+       work->delay_iput = delay_iput;
+-      btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
++      WARN_ON_ONCE(!inode);
++      btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
++                      btrfs_run_delalloc_work, NULL, NULL);
+ 
+       return work;
+ }
+--- a/fs/btrfs/ordered-data.c
++++ b/fs/btrfs/ordered-data.c
+@@ -615,6 +615,7 @@ int btrfs_wait_ordered_extents(struct bt
+               spin_unlock(&root->ordered_extent_lock);
+ 
+               btrfs_init_work(&ordered->flush_work,
++                              btrfs_flush_delalloc_helper,
+                               btrfs_run_ordered_extent_work, NULL, NULL);
+               list_add_tail(&ordered->work_list, &works);
+               btrfs_queue_work(root->fs_info->flush_workers,
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2551,6 +2551,7 @@ qgroup_rescan_init(struct btrfs_fs_info
+       memset(&fs_info->qgroup_rescan_work, 0,
+              sizeof(fs_info->qgroup_rescan_work));
+       btrfs_init_work(&fs_info->qgroup_rescan_work,
++                      btrfs_qgroup_rescan_helper,
+                       btrfs_qgroup_rescan_worker, NULL, NULL);
+ 
+       if (ret) {
+--- a/fs/btrfs/raid56.c
++++ b/fs/btrfs/raid56.c
+@@ -1416,7 +1416,8 @@ cleanup:
+ 
+ static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
+ {
+-      btrfs_init_work(&rbio->work, rmw_work, NULL, NULL);
++      btrfs_init_work(&rbio->work, btrfs_rmw_helper,
++                      rmw_work, NULL, NULL);
+ 
+       btrfs_queue_work(rbio->fs_info->rmw_workers,
+                        &rbio->work);
+@@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrf
+ 
+ static void async_read_rebuild(struct btrfs_raid_bio *rbio)
+ {
+-      btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL);
++      btrfs_init_work(&rbio->work, btrfs_rmw_helper,
++                      read_rebuild_work, NULL, NULL);
+ 
+       btrfs_queue_work(rbio->fs_info->rmw_workers,
+                        &rbio->work);
+@@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk
+       plug = container_of(cb, struct btrfs_plug_cb, cb);
+ 
+       if (from_schedule) {
+-              btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
++              btrfs_init_work(&plug->work, btrfs_rmw_helper,
++                              unplug_work, NULL, NULL);
+               btrfs_queue_work(plug->info->rmw_workers,
+                                &plug->work);
+               return;
+--- a/fs/btrfs/reada.c
++++ b/fs/btrfs/reada.c
+@@ -798,7 +798,8 @@ static void reada_start_machine(struct b
+               /* FIXME we cannot handle this properly right now */
+               BUG();
+       }
+-      btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
++      btrfs_init_work(&rmw->work, btrfs_readahead_helper,
++                      reada_start_machine_worker, NULL, NULL);
+       rmw->fs_info = fs_info;
+ 
+       btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -428,8 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct
+               sbio->index = i;
+               sbio->sctx = sctx;
+               sbio->page_count = 0;
+-              btrfs_init_work(&sbio->work, scrub_bio_end_io_worker,
+-                              NULL, NULL);
++              btrfs_init_work(&sbio->work, btrfs_scrub_helper,
++                              scrub_bio_end_io_worker, NULL, NULL);
+ 
+               if (i != SCRUB_BIOS_PER_SCTX - 1)
+                       sctx->bios[i]->next_free = i + 1;
+@@ -999,8 +999,8 @@ nodatasum_case:
+               fixup_nodatasum->root = fs_info->extent_root;
+               fixup_nodatasum->mirror_num = failed_mirror_index + 1;
+               scrub_pending_trans_workers_inc(sctx);
+-              btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum,
+-                              NULL, NULL);
++              btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
++                              scrub_fixup_nodatasum, NULL, NULL);
+               btrfs_queue_work(fs_info->scrub_workers,
+                                &fixup_nodatasum->work);
+               goto out;
+@@ -1616,7 +1616,8 @@ static void scrub_wr_bio_end_io(struct b
+       sbio->err = err;
+       sbio->bio = bio;
+ 
+-      btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
++      btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
++                       scrub_wr_bio_end_io_worker, NULL, NULL);
+       btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
+ }
+ 
+@@ -3203,7 +3204,8 @@ static int copy_nocow_pages(struct scrub
+       nocow_ctx->len = len;
+       nocow_ctx->mirror_num = mirror_num;
+       nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
+-      btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL);
++      btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
++                      copy_nocow_pages_worker, NULL, NULL);
+       INIT_LIST_HEAD(&nocow_ctx->inodes);
+       btrfs_queue_work(fs_info->scrub_nocow_workers,
+                        &nocow_ctx->work);
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -5800,7 +5800,8 @@ struct btrfs_device *btrfs_alloc_device(
+       else
+               generate_random_uuid(dev->uuid);
+ 
+-      btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL);
++      btrfs_init_work(&dev->work, btrfs_submit_helper,
++                      pending_bios_fn, NULL, NULL);
+ 
+       return dev;
+ }
diff --git a/queue-3.16/btrfs-read-lock-extent-buffer-while-walking-backrefs.patch b/queue-3.16/btrfs-read-lock-extent-buffer-while-walking-backrefs.patch

new file mode 100644 (file)

index 0000000..2c976c3
--- /dev/null
+++ b/queue-3.16/btrfs-read-lock-extent-buffer-while-walking-backrefs.patch
@@ -0,0 +1,34 @@
+From 6f7ff6d7832c6be13e8c95598884dbc40ad69fb7 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 2 Jul 2014 20:07:54 +0100
+Subject: Btrfs: read lock extent buffer while walking backrefs
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 6f7ff6d7832c6be13e8c95598884dbc40ad69fb7 upstream.
+
+Before processing the extent buffer, acquire a read lock on it, so
+that we're safe against concurrent updates on the extent buffer.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/backref.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -1000,8 +1000,11 @@ again:
+                                       ret = -EIO;
+                                       goto out;
+                               }
++                              btrfs_tree_read_lock(eb);
++                              btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+                               ret = find_extent_in_eb(eb, bytenr,
+                                                       *extent_item_pos, &eie);
++                              btrfs_tree_read_unlock_blocking(eb);
+                               free_extent_buffer(eb);
+                               if (ret < 0)
+                                       goto out;
diff --git a/queue-3.16/ext4-move-i_size-i_disksize-update-routines-to-helper-function.patch b/queue-3.16/ext4-move-i_size-i_disksize-update-routines-to-helper-function.patch

new file mode 100644 (file)

index 0000000..d2b8c06
--- /dev/null
+++ b/queue-3.16/ext4-move-i_size-i_disksize-update-routines-to-helper-function.patch
@@ -0,0 +1,154 @@
+From 4631dbf677ded0419fee35ca7408285dabfaef1a Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Sat, 23 Aug 2014 17:48:28 -0400
+Subject: ext4: move i_size,i_disksize update routines to helper function
+
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+
+commit 4631dbf677ded0419fee35ca7408285dabfaef1a upstream.
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4.h    |   16 ++++++++++++++++
+ fs/ext4/extents.c |   17 ++++-------------
+ fs/ext4/inode.c   |   34 ++++++++--------------------------
+ 3 files changed, 28 insertions(+), 39 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -2453,6 +2453,22 @@ static inline void ext4_update_i_disksiz
+       up_write(&EXT4_I(inode)->i_data_sem);
+ }
+ 
++/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
++static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
++{
++      int changed = 0;
++
++      if (newsize > inode->i_size) {
++              i_size_write(inode, newsize);
++              changed = 1;
++      }
++      if (newsize > EXT4_I(inode)->i_disksize) {
++              ext4_update_i_disksize(inode, newsize);
++              changed |= 2;
++      }
++      return changed;
++}
++
+ struct ext4_group_info {
+       unsigned long   bb_state;
+       struct rb_root  bb_free_root;
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -4838,12 +4838,8 @@ static long ext4_zero_range(struct file
+       }
+ 
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+-
+       if (new_size) {
+-              if (new_size > i_size_read(inode))
+-                      i_size_write(inode, new_size);
+-              if (new_size > EXT4_I(inode)->i_disksize)
+-                      ext4_update_i_disksize(inode, new_size);
++              ext4_update_inode_size(inode, new_size);
+       } else {
+               /*
+               * Mark that we allocate beyond EOF so the subsequent truncate
+@@ -4885,7 +4881,6 @@ long ext4_fallocate(struct file *file, i
+       int ret = 0;
+       int flags;
+       ext4_lblk_t lblk;
+-      struct timespec tv;
+       unsigned int blkbits = inode->i_blkbits;
+ 
+       /* Return error if mode is not supported */
+@@ -4944,15 +4939,11 @@ long ext4_fallocate(struct file *file, i
+       if (IS_ERR(handle))
+               goto out;
+ 
+-      tv = inode->i_ctime = ext4_current_time(inode);
++      inode->i_ctime = ext4_current_time(inode);
+ 
+       if (new_size) {
+-              if (new_size > i_size_read(inode)) {
+-                      i_size_write(inode, new_size);
+-                      inode->i_mtime = tv;
+-              }
+-              if (new_size > EXT4_I(inode)->i_disksize)
+-                      ext4_update_i_disksize(inode, new_size);
++              if (ext4_update_inode_size(inode, new_size) & 0x1)
++                      inode->i_mtime = inode->i_ctime;
+       } else {
+               /*
+               * Mark that we allocate beyond EOF so the subsequent truncate
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1092,27 +1092,11 @@ static int ext4_write_end(struct file *f
+       } else
+               copied = block_write_end(file, mapping, pos,
+                                        len, copied, page, fsdata);
+-
+       /*
+-       * No need to use i_size_read() here, the i_size
+-       * cannot change under us because we hole i_mutex.
+-       *
+-       * But it's important to update i_size while still holding page lock:
++       * it's important to update i_size while still holding page lock:
+        * page writeout could otherwise come in and zero beyond i_size.
+        */
+-      if (pos + copied > inode->i_size) {
+-              i_size_write(inode, pos + copied);
+-              i_size_changed = 1;
+-      }
+-
+-      if (pos + copied > EXT4_I(inode)->i_disksize) {
+-              /* We need to mark inode dirty even if
+-               * new_i_size is less that inode->i_size
+-               * but greater than i_disksize. (hint delalloc)
+-               */
+-              ext4_update_i_disksize(inode, (pos + copied));
+-              i_size_changed = 1;
+-      }
++      i_size_changed = ext4_update_inode_size(inode, pos + copied);
+       unlock_page(page);
+       page_cache_release(page);
+ 
+@@ -1160,7 +1144,7 @@ static int ext4_journalled_write_end(str
+       int ret = 0, ret2;
+       int partial = 0;
+       unsigned from, to;
+-      loff_t new_i_size;
++      int size_changed = 0;
+ 
+       trace_ext4_journalled_write_end(inode, pos, len, copied);
+       from = pos & (PAGE_CACHE_SIZE - 1);
+@@ -1183,20 +1167,18 @@ static int ext4_journalled_write_end(str
+               if (!partial)
+                       SetPageUptodate(page);
+       }
+-      new_i_size = pos + copied;
+-      if (new_i_size > inode->i_size)
+-              i_size_write(inode, pos+copied);
++      size_changed = ext4_update_inode_size(inode, pos + copied);
+       ext4_set_inode_state(inode, EXT4_STATE_JDATA);
+       EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
+-      if (new_i_size > EXT4_I(inode)->i_disksize) {
+-              ext4_update_i_disksize(inode, new_i_size);
++      unlock_page(page);
++      page_cache_release(page);
++
++      if (size_changed) {
+               ret2 = ext4_mark_inode_dirty(handle, inode);
+               if (!ret)
+                       ret = ret2;
+       }
+ 
+-      unlock_page(page);
+-      page_cache_release(page);
+       if (pos + len > inode->i_size && ext4_can_truncate(inode))
+               /* if we have allocated more blocks and copied
+                * less. We will have blocks allocated outside
diff --git a/queue-3.16/ext4-propagate-errors-up-to-ext4_find_entry-s-callers.patch b/queue-3.16/ext4-propagate-errors-up-to-ext4_find_entry-s-callers.patch

new file mode 100644 (file)

index 0000000..a238e07
--- /dev/null
+++ b/queue-3.16/ext4-propagate-errors-up-to-ext4_find_entry-s-callers.patch
@@ -0,0 +1,161 @@
+From 36de928641ee48b2078d3fe9514242aaa2f92013 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 23 Aug 2014 17:47:19 -0400
+Subject: ext4: propagate errors up to ext4_find_entry()'s callers
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 36de928641ee48b2078d3fe9514242aaa2f92013 upstream.
+
+If we run into some kind of error, such as ENOMEM, while calling
+ext4_getblk() or ext4_dx_find_entry(), we need to make sure this error
+gets propagated up to ext4_find_entry() and then to its callers.  This
+way, transient errors such as ENOMEM can get propagated to the VFS.
+This is important so that the system calls return the appropriate
+error, and also so that in the case of ext4_lookup(), we return an
+error instead of a NULL inode, since that will result in a negative
+dentry cache entry that will stick around long past the OOM condition
+which caused a transient ENOMEM error.
+
+Google-Bug-Id: #17142205
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4.h  |    2 +-
+ fs/ext4/namei.c |   35 +++++++++++++++++++++++++++++++++--
+ 2 files changed, 34 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1826,7 +1826,7 @@ ext4_group_first_block_no(struct super_b
+ /*
+  * Special error return code only used by dx_probe() and its callers.
+  */
+-#define ERR_BAD_DX_DIR        -75000
++#define ERR_BAD_DX_DIR        (-(MAX_ERRNO - 1))
+ 
+ /*
+  * Timeout and state flag for lazy initialization inode thread.
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_en
+                                  buffer */
+       int num = 0;
+       ext4_lblk_t  nblocks;
+-      int i, err;
++      int i, err = 0;
+       int namelen;
+ 
+       *res_dir = NULL;
+@@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_en
+                * return.  Otherwise, fall back to doing a search the
+                * old fashioned way.
+                */
+-              if (bh || (err != ERR_BAD_DX_DIR))
++              if (err == -ENOENT)
++                      return NULL;
++              if (err && err != ERR_BAD_DX_DIR)
++                      return ERR_PTR(err);
++              if (bh)
+                       return bh;
+               dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
+                              "falling back\n"));
+@@ -1295,6 +1299,11 @@ restart:
+                               }
+                               num++;
+                               bh = ext4_getblk(NULL, dir, b++, 0, &err);
++                              if (unlikely(err)) {
++                                      if (ra_max == 0)
++                                              return ERR_PTR(err);
++                                      break;
++                              }
+                               bh_use[ra_max] = bh;
+                               if (bh)
+                                       ll_rw_block(READ | REQ_META | REQ_PRIO,
+@@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct
+               return ERR_PTR(-ENAMETOOLONG);
+ 
+       bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
++      if (IS_ERR(bh))
++              return (struct dentry *) bh;
+       inode = NULL;
+       if (bh) {
+               __u32 ino = le32_to_cpu(de->inode);
+@@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct de
+       struct buffer_head *bh;
+ 
+       bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
++      if (IS_ERR(bh))
++              return (struct dentry *) bh;
+       if (!bh)
+               return ERR_PTR(-ENOENT);
+       ino = le32_to_cpu(de->inode);
+@@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir,
+ 
+       retval = -ENOENT;
+       bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
++      if (IS_ERR(bh))
++              return PTR_ERR(bh);
+       if (!bh)
+               goto end_rmdir;
+ 
+@@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir
+ 
+       retval = -ENOENT;
+       bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
++      if (IS_ERR(bh))
++              return PTR_ERR(bh);
+       if (!bh)
+               goto end_unlink;
+ 
+@@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle
+       struct ext4_dir_entry_2 *de;
+ 
+       bh = ext4_find_entry(dir, d_name, &de, NULL);
++      if (IS_ERR(bh))
++              return PTR_ERR(bh);
+       if (bh) {
+               retval = ext4_delete_entry(handle, dir, de, bh);
+               brelse(bh);
+@@ -3202,6 +3221,8 @@ static int ext4_rename(struct inode *old
+               dquot_initialize(new.inode);
+ 
+       old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
++      if (IS_ERR(old.bh))
++              return PTR_ERR(old.bh);
+       /*
+        *  Check for inode number is _not_ due to possible IO errors.
+        *  We might rmdir the source, keep it as pwd of some process
+@@ -3214,6 +3235,10 @@ static int ext4_rename(struct inode *old
+ 
+       new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
+                                &new.de, &new.inlined);
++      if (IS_ERR(new.bh)) {
++              retval = PTR_ERR(new.bh);
++              goto end_rename;
++      }
+       if (new.bh) {
+               if (!new.inode) {
+                       brelse(new.bh);
+@@ -3330,6 +3355,8 @@ static int ext4_cross_rename(struct inod
+ 
+       old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
+                                &old.de, &old.inlined);
++      if (IS_ERR(old.bh))
++              return PTR_ERR(old.bh);
+       /*
+        *  Check for inode number is _not_ due to possible IO errors.
+        *  We might rmdir the source, keep it as pwd of some process
+@@ -3342,6 +3369,10 @@ static int ext4_cross_rename(struct inod
+ 
+       new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
+                                &new.de, &new.inlined);
++      if (IS_ERR(new.bh)) {
++              retval = PTR_ERR(new.bh);
++              goto end_rename;
++      }
+ 
+       /* RENAME_EXCHANGE case: old *and* new must both exist */
+       if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
diff --git a/queue-3.16/mei-nfc-fix-memory-leak-in-error-path.patch b/queue-3.16/mei-nfc-fix-memory-leak-in-error-path.patch

new file mode 100644 (file)

index 0000000..1870bf7
--- /dev/null
+++ b/queue-3.16/mei-nfc-fix-memory-leak-in-error-path.patch
@@ -0,0 +1,58 @@
+From 8e8248b1369c97c7bb6f8bcaee1f05deeabab8ef Mon Sep 17 00:00:00 2001
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+Date: Tue, 12 Aug 2014 18:07:57 +0300
+Subject: mei: nfc: fix memory leak in error path
+
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+
+commit 8e8248b1369c97c7bb6f8bcaee1f05deeabab8ef upstream.
+
+NFC will leak buffer if send failed.
+Use single exit point that does the freeing
+
+Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/nfc.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/drivers/misc/mei/nfc.c
++++ b/drivers/misc/mei/nfc.c
+@@ -342,9 +342,10 @@ static int mei_nfc_send(struct mei_cl_de
+       ndev = (struct mei_nfc_dev *) cldev->priv_data;
+       dev = ndev->cl->dev;
+ 
++      err = -ENOMEM;
+       mei_buf = kzalloc(length + MEI_NFC_HEADER_SIZE, GFP_KERNEL);
+       if (!mei_buf)
+-              return -ENOMEM;
++              goto out;
+ 
+       hdr = (struct mei_nfc_hci_hdr *) mei_buf;
+       hdr->cmd = MEI_NFC_CMD_HCI_SEND;
+@@ -354,12 +355,9 @@ static int mei_nfc_send(struct mei_cl_de
+       hdr->data_size = length;
+ 
+       memcpy(mei_buf + MEI_NFC_HEADER_SIZE, buf, length);
+-
+       err = __mei_cl_send(ndev->cl, mei_buf, length + MEI_NFC_HEADER_SIZE);
+       if (err < 0)
+-              return err;
+-
+-      kfree(mei_buf);
++              goto out;
+ 
+       if (!wait_event_interruptible_timeout(ndev->send_wq,
+                               ndev->recv_req_id == ndev->req_id, HZ)) {
+@@ -368,7 +366,8 @@ static int mei_nfc_send(struct mei_cl_de
+       } else {
+               ndev->req_id++;
+       }
+-
++out:
++      kfree(mei_buf);
+       return err;
+ }
+ 
diff --git a/queue-3.16/mei-reset-client-state-on-queued-connect-request.patch b/queue-3.16/mei-reset-client-state-on-queued-connect-request.patch

new file mode 100644 (file)

index 0000000..e3fe6be
--- /dev/null
+++ b/queue-3.16/mei-reset-client-state-on-queued-connect-request.patch
@@ -0,0 +1,37 @@
+From 73ab4232388b7a08f17c8d08141ff2099fa0b161 Mon Sep 17 00:00:00 2001
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+Date: Tue, 12 Aug 2014 18:07:56 +0300
+Subject: mei: reset client state on queued connect request
+
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+
+commit 73ab4232388b7a08f17c8d08141ff2099fa0b161 upstream.
+
+If connect request is queued (e.g. device in pg) set client state
+to initializing, thus avoid preliminary exit in wait if current
+state is disconnected.
+
+This is regression from:
+
+commit e4d8270e604c3202131bac607969605ac397b893
+Author: Alexander Usyskin <alexander.usyskin@intel.com>
+mei: set connecting state just upon connection request is sent to the fw
+
+Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
+Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/mei/client.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/misc/mei/client.c
++++ b/drivers/misc/mei/client.c
+@@ -601,6 +601,7 @@ int mei_cl_connect(struct mei_cl *cl, st
+               cl->timer_count = MEI_CONNECT_TIMEOUT;
+               list_add_tail(&cb->list, &dev->ctrl_rd_list.list);
+       } else {
++              cl->state = MEI_FILE_INITIALIZING;
+               list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+       }
+ 
diff --git a/queue-3.16/series b/queue-3.16/series

index 625ca17250272fb27c4fee6a441f5bb8e201e68b..7397288a18392473338647bfaa5d0b4153812672 100644 (file)
--- a/queue-3.16/series
+++ b/queue-3.16/series
@@ -82,3 +82,15 @@ x86-xen-use-vmap-to-map-grant-table-pages-in-pvh-guests.patch
  x86-xen-resume-timer-irqs-early.patch
  x86-mm-fix-pte_special-versus-pte_numa.patch
  hpsa-fix-bad-enomem-return-value-in-hpsa_big_passthru_ioctl.patch
+btrfs-fix-memory-corruption-by-ulist_add_merge-on-32bit-arch.patch
+btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch
+btrfs-read-lock-extent-buffer-while-walking-backrefs.patch
+btrfs-fix-compressed-write-corruption-on-enospc.patch
+btrfs-disable-strict-file-flushes-for-renames-and-truncates.patch
+btrfs-fix-crash-on-endio-of-reading-corrupted-block.patch
+btrfs-fix-filemap_flush-call-in-btrfs_file_release.patch
+btrfs-fix-task-hang-under-heavy-compressed-write.patch
+mei-reset-client-state-on-queued-connect-request.patch
+mei-nfc-fix-memory-leak-in-error-path.patch
+ext4-propagate-errors-up-to-ext4_find_entry-s-callers.patch
+ext4-move-i_size-i_disksize-update-routines-to-helper-function.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 3 Sep 2014 21:36:04 +0000 (14:36 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 3 Sep 2014 21:36:04 +0000 (14:36 -0700)
queue-3.16/btrfs-disable-strict-file-flushes-for-renames-and-truncates.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/btrfs-fix-compressed-write-corruption-on-enospc.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/btrfs-fix-crash-on-endio-of-reading-corrupted-block.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/btrfs-fix-filemap_flush-call-in-btrfs_file_release.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/btrfs-fix-memory-corruption-by-ulist_add_merge-on-32bit-arch.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/btrfs-fix-task-hang-under-heavy-compressed-write.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/btrfs-read-lock-extent-buffer-while-walking-backrefs.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/ext4-move-i_size-i_disksize-update-routines-to-helper-function.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/ext4-propagate-errors-up-to-ext4_find_entry-s-callers.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/mei-nfc-fix-memory-leak-in-error-path.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/mei-reset-client-state-on-queued-connect-request.patch	[new file with mode: 0644]	patch \| blob
queue-3.16/series		patch \| blob \| blame \| history