]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.13-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 20 Mar 2014 23:23:47 +0000 (16:23 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 20 Mar 2014 23:23:47 +0000 (16:23 -0700)
added patches:
btrfs-fix-data-corruption-when-reading-updating-compressed-extents.patch
btrfs-fix-tree-mod-logging.patch
btrfs-return-immediately-if-tree-log-mod-is-not-necessary.patch
x86-fpu-check-tsk_used_math-in-kernel_fpu_end-for-eager-fpu.patch

queue-3.13/btrfs-fix-data-corruption-when-reading-updating-compressed-extents.patch [new file with mode: 0644]
queue-3.13/btrfs-fix-tree-mod-logging.patch [new file with mode: 0644]
queue-3.13/btrfs-return-immediately-if-tree-log-mod-is-not-necessary.patch [new file with mode: 0644]
queue-3.13/series
queue-3.13/x86-fpu-check-tsk_used_math-in-kernel_fpu_end-for-eager-fpu.patch [new file with mode: 0644]

diff --git a/queue-3.13/btrfs-fix-data-corruption-when-reading-updating-compressed-extents.patch b/queue-3.13/btrfs-fix-data-corruption-when-reading-updating-compressed-extents.patch
new file mode 100644 (file)
index 0000000..5e5194b
--- /dev/null
@@ -0,0 +1,83 @@
+From a2aa75e18a21b21952dc6daa9bac7c9f4426f81f Mon Sep 17 00:00:00 2001
+From: Filipe David Borba Manana <fdmanana@gmail.com>
+Date: Sat, 8 Feb 2014 15:47:46 +0000
+Subject: Btrfs: fix data corruption when reading/updating compressed extents
+
+From: Filipe David Borba Manana <fdmanana@gmail.com>
+
+commit a2aa75e18a21b21952dc6daa9bac7c9f4426f81f upstream.
+
+When using a mix of compressed file extents and prealloc extents, it
+is possible to fill a page of a file with random, garbage data from
+some unrelated previous use of the page, instead of a sequence of zeroes.
+
+A simple sequence of steps to get into such case, taken from the test
+case I made for xfstests, is:
+
+   _scratch_mkfs
+   _scratch_mount "-o compress-force=lzo"
+   $XFS_IO_PROG -f -c "pwrite -S 0x06 -b 18670 266978 18670" $SCRATCH_MNT/foobar
+   $XFS_IO_PROG -c "falloc 26450 665194" $SCRATCH_MNT/foobar
+   $XFS_IO_PROG -c "truncate 542872" $SCRATCH_MNT/foobar
+   $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar
+
+This results in the following file items in the fs tree:
+
+   item 4 key (257 INODE_ITEM 0) itemoff 15879 itemsize 160
+       inode generation 6 transid 6 size 542872 block group 0 mode 100600
+   item 5 key (257 INODE_REF 256) itemoff 15863 itemsize 16
+       inode ref index 2 namelen 6 name: foobar
+   item 6 key (257 EXTENT_DATA 0) itemoff 15810 itemsize 53
+       extent data disk byte 0 nr 0 gen 6
+       extent data offset 0 nr 24576 ram 266240
+       extent compression 0
+   item 7 key (257 EXTENT_DATA 24576) itemoff 15757 itemsize 53
+       prealloc data disk byte 12849152 nr 241664 gen 6
+       prealloc data offset 0 nr 241664
+   item 8 key (257 EXTENT_DATA 266240) itemoff 15704 itemsize 53
+       extent data disk byte 12845056 nr 4096 gen 6
+       extent data offset 0 nr 20480 ram 20480
+       extent compression 2
+   item 9 key (257 EXTENT_DATA 286720) itemoff 15651 itemsize 53
+       prealloc data disk byte 13090816 nr 405504 gen 6
+       prealloc data offset 0 nr 258048
+
+The on disk extent at offset 266240 (which corresponds to 1 single disk block),
+contains 5 compressed chunks of file data. Each of the first 4 compress 4096
+bytes of file data, while the last one only compresses 3024 bytes of file data.
+Therefore a read into the file region [285648 ; 286720[ (length = 4096 - 3024 =
+1072 bytes) should always return zeroes (our next extent is a prealloc one).
+
+The solution here is the compression code path to zero the remaining (untouched)
+bytes of the last page it uncompressed data into, as the information about how
+much space the file data consumes in the last page is not known in the upper layer
+fs/btrfs/extent_io.c:__do_readpage(). In __do_readpage we were correctly zeroing
+the remainder of the page but only if it corresponds to the last page of the inode
+and if the inode's size is not a multiple of the page size.
+
+This would cause not only returning random data on reads, but also permanently
+storing random data when updating parts of the region that should be zeroed.
+For the example above, it means updating a single byte in the region [285648 ; 286720[
+would store that byte correctly but also store random data on disk.
+
+A test case for xfstests follows soon.
+
+Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/compression.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/compression.c
++++ b/fs/btrfs/compression.c
+@@ -1011,6 +1011,8 @@ int btrfs_decompress_buf2page(char *buf,
+               bytes = min(bytes, working_bytes);
+               kaddr = kmap_atomic(page_out);
+               memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
++              if (*pg_index == (vcnt - 1) && *pg_offset == 0)
++                      memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+               kunmap_atomic(kaddr);
+               flush_dcache_page(page_out);
diff --git a/queue-3.13/btrfs-fix-tree-mod-logging.patch b/queue-3.13/btrfs-fix-tree-mod-logging.patch
new file mode 100644 (file)
index 0000000..6a14e79
--- /dev/null
@@ -0,0 +1,671 @@
+From 5de865eebb8330eee19c37b31fb6f315a09d4273 Mon Sep 17 00:00:00 2001
+From: Filipe David Borba Manana <fdmanana@gmail.com>
+Date: Fri, 20 Dec 2013 15:17:46 +0000
+Subject: Btrfs: fix tree mod logging
+
+From: Filipe David Borba Manana <fdmanana@gmail.com>
+
+commit 5de865eebb8330eee19c37b31fb6f315a09d4273 upstream.
+
+While running the test btrfs/004 from xfstests in a loop, it failed
+about 1 time out of 20 runs in my desktop. The failure happened in
+the backref walking part of the test, and the test's error message was
+like this:
+
+#  btrfs/004 93s ... [failed, exit status 1] - output mismatch (see /home/fdmanana/git/hub/xfstests_2/results//btrfs/004.out.bad)
+#      --- tests/btrfs/004.out 2013-11-26 18:25:29.263333714 +0000
+#      +++ /home/fdmanana/git/hub/xfstests_2/results//btrfs/004.out.bad        2013-12-10 15:25:10.327518516 +0000
+#      @@ -1,3 +1,8 @@
+#       QA output created by 004
+#       *** test backref walking
+#      -*** done
+#      +unexpected output from
+#      +       /home/fdmanana/git/hub/btrfs-progs/btrfs inspect-internal logical-resolve -P 141512704 /home/fdmanana/btrfs-tests/scratch_1
+#      +expected inum: 405, expected address: 454656, file: /home/fdmanana/btrfs-tests/scratch_1/snap1/p0/d6/d3d/d156/fce, got:
+#      +
+       ...
+       (Run 'diff -u tests/btrfs/004.out /home/fdmanana/git/hub/xfstests_2/results//btrfs/004.out.bad' to see the entire diff)
+  Ran: btrfs/004
+  Failures: btrfs/004
+  Failed 1 of 1 tests
+
+But immediately after the test finished, the btrfs inspect-internal command
+returned the expected output:
+
+  $ btrfs inspect-internal logical-resolve -P 141512704 /home/fdmanana/btrfs-tests/scratch_1
+  inode 405 offset 454656 root 258
+  inode 405 offset 454656 root 5
+
+It turned out this was because the btrfs_search_old_slot() calls performed
+during backref walking (backref.c:__resolve_indirect_ref) were not finding
+anything. The reason for this turned out to be that the tree mod logging
+code was not logging some node multi-step operations atomically, therefore
+btrfs_search_old_slot() callers iterated often over an incomplete tree that
+wasn't fully consistent with any tree state from the past. Besides missing
+items, this often (but not always) resulted in -EIO errors during old slot
+searches, reported in dmesg like this:
+
+[ 4299.933936] ------------[ cut here ]------------
+[ 4299.933949] WARNING: CPU: 0 PID: 23190 at fs/btrfs/ctree.c:1343 btrfs_search_old_slot+0x57b/0xab0 [btrfs]()
+[ 4299.933950] Modules linked in: btrfs raid6_pq xor pci_stub vboxpci(O) vboxnetadp(O) vboxnetflt(O) vboxdrv(O) bnep rfcomm bluetooth parport_pc ppdev binfmt_misc joydev snd_hda_codec_h
+[ 4299.933977] CPU: 0 PID: 23190 Comm: btrfs Tainted: G        W  O 3.12.0-fdm-btrfs-next-16+ #70
+[ 4299.933978] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./Z77 Pro4, BIOS P1.50 09/04/2012
+[ 4299.933979]  000000000000053f ffff8806f3fd98f8 ffffffff8176d284 0000000000000007
+[ 4299.933982]  0000000000000000 ffff8806f3fd9938 ffffffff8104a81c ffff880659c64b70
+[ 4299.933984]  ffff880659c643d0 ffff8806599233d8 ffff880701e2e938 0000160000000000
+[ 4299.933987] Call Trace:
+[ 4299.933991]  [<ffffffff8176d284>] dump_stack+0x55/0x76
+[ 4299.933994]  [<ffffffff8104a81c>] warn_slowpath_common+0x8c/0xc0
+[ 4299.933997]  [<ffffffff8104a86a>] warn_slowpath_null+0x1a/0x20
+[ 4299.934003]  [<ffffffffa065d3bb>] btrfs_search_old_slot+0x57b/0xab0 [btrfs]
+[ 4299.934005]  [<ffffffff81775f3b>] ? _raw_read_unlock+0x2b/0x50
+[ 4299.934010]  [<ffffffffa0655001>] ? __tree_mod_log_search+0x81/0xc0 [btrfs]
+[ 4299.934019]  [<ffffffffa06dd9b0>] __resolve_indirect_refs+0x130/0x5f0 [btrfs]
+[ 4299.934027]  [<ffffffffa06a21f1>] ? free_extent_buffer+0x61/0xc0 [btrfs]
+[ 4299.934034]  [<ffffffffa06de39c>] find_parent_nodes+0x1fc/0xe40 [btrfs]
+[ 4299.934042]  [<ffffffffa06b13e0>] ? defrag_lookup_extent+0xe0/0xe0 [btrfs]
+[ 4299.934048]  [<ffffffffa06b13e0>] ? defrag_lookup_extent+0xe0/0xe0 [btrfs]
+[ 4299.934056]  [<ffffffffa06df980>] iterate_extent_inodes+0xe0/0x250 [btrfs]
+[ 4299.934058]  [<ffffffff817762db>] ? _raw_spin_unlock+0x2b/0x50
+[ 4299.934065]  [<ffffffffa06dfb82>] iterate_inodes_from_logical+0x92/0xb0 [btrfs]
+[ 4299.934071]  [<ffffffffa06b13e0>] ? defrag_lookup_extent+0xe0/0xe0 [btrfs]
+[ 4299.934078]  [<ffffffffa06b7015>] btrfs_ioctl+0xf65/0x1f60 [btrfs]
+[ 4299.934080]  [<ffffffff811658b8>] ? handle_mm_fault+0x278/0xb00
+[ 4299.934083]  [<ffffffff81075563>] ? up_read+0x23/0x40
+[ 4299.934085]  [<ffffffff8177a41c>] ? __do_page_fault+0x20c/0x5a0
+[ 4299.934088]  [<ffffffff811b2946>] do_vfs_ioctl+0x96/0x570
+[ 4299.934090]  [<ffffffff81776e23>] ? error_sti+0x5/0x6
+[ 4299.934093]  [<ffffffff810b71e8>] ? trace_hardirqs_off_caller+0x28/0xd0
+[ 4299.934096]  [<ffffffff81776a09>] ? retint_swapgs+0xe/0x13
+[ 4299.934098]  [<ffffffff811b2eb1>] SyS_ioctl+0x91/0xb0
+[ 4299.934100]  [<ffffffff813eecde>] ? trace_hardirqs_on_thunk+0x3a/0x3f
+[ 4299.934102]  [<ffffffff8177ef12>] system_call_fastpath+0x16/0x1b
+[ 4299.934102]  [<ffffffff8177ef12>] system_call_fastpath+0x16/0x1b
+[ 4299.934104] ---[ end trace 48f0cfc902491414 ]---
+[ 4299.934378] btrfs bad fsid on block 0
+
+These tree mod log operations that must be performed atomically, tree_mod_log_free_eb,
+tree_mod_log_eb_copy, tree_mod_log_insert_root and tree_mod_log_insert_move, used to
+be performed atomically before the following commit:
+
+  c8cc6341653721b54760480b0d0d9b5f09b46741
+  (Btrfs: stop using GFP_ATOMIC for the tree mod log allocations)
+
+That change removed the atomicity of such operations. This patch restores the
+atomicity while still not doing the GFP_ATOMIC allocations of tree_mod_elem
+structures, so it has to do the allocations using GFP_NOFS before acquiring
+the mod log lock.
+
+This issue has been experienced by several users recently, such as for example:
+
+  http://www.spinics.net/lists/linux-btrfs/msg28574.html
+
+After running the btrfs/004 test for 679 consecutive iterations with this
+patch applied, I didn't ran into the issue anymore.
+
+Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c |  385 ++++++++++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 296 insertions(+), 89 deletions(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -39,7 +39,7 @@ static int balance_node_right(struct btr
+                             struct extent_buffer *src_buf);
+ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
+                   int level, int slot);
+-static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
++static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+                                struct extent_buffer *eb);
+ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
+@@ -475,6 +475,8 @@ void btrfs_put_tree_mod_seq(struct btrfs
+  * the index is the shifted logical of the *new* root node for root replace
+  * operations, or the shifted logical of the affected block for all other
+  * operations.
++ *
++ * Note: must be called with write lock (tree_mod_log_write_lock).
+  */
+ static noinline int
+ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
+@@ -483,24 +485,9 @@ __tree_mod_log_insert(struct btrfs_fs_in
+       struct rb_node **new;
+       struct rb_node *parent = NULL;
+       struct tree_mod_elem *cur;
+-      int ret = 0;
+       BUG_ON(!tm);
+-      tree_mod_log_write_lock(fs_info);
+-      if (list_empty(&fs_info->tree_mod_seq_list)) {
+-              tree_mod_log_write_unlock(fs_info);
+-              /*
+-               * Ok we no longer care about logging modifications, free up tm
+-               * and return 0.  Any callers shouldn't be using tm after
+-               * calling tree_mod_log_insert, but if they do we can just
+-               * change this to return a special error code to let the callers
+-               * do their own thing.
+-               */
+-              kfree(tm);
+-              return 0;
+-      }
+-
+       spin_lock(&fs_info->tree_mod_seq_lock);
+       tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
+       spin_unlock(&fs_info->tree_mod_seq_lock);
+@@ -518,18 +505,13 @@ __tree_mod_log_insert(struct btrfs_fs_in
+                       new = &((*new)->rb_left);
+               else if (cur->seq > tm->seq)
+                       new = &((*new)->rb_right);
+-              else {
+-                      ret = -EEXIST;
+-                      kfree(tm);
+-                      goto out;
+-              }
++              else
++                      return -EEXIST;
+       }
+       rb_link_node(&tm->node, parent, new);
+       rb_insert_color(&tm->node, tm_root);
+-out:
+-      tree_mod_log_write_unlock(fs_info);
+-      return ret;
++      return 0;
+ }
+ /*
+@@ -545,19 +527,38 @@ static inline int tree_mod_dont_log(stru
+               return 1;
+       if (eb && btrfs_header_level(eb) == 0)
+               return 1;
++
++      tree_mod_log_write_lock(fs_info);
++      if (list_empty(&(fs_info)->tree_mod_seq_list)) {
++              tree_mod_log_write_unlock(fs_info);
++              return 1;
++      }
++
+       return 0;
+ }
+-static inline int
+-__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
+-                        struct extent_buffer *eb, int slot,
+-                        enum mod_log_op op, gfp_t flags)
++/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
++static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
++                                  struct extent_buffer *eb)
++{
++      smp_mb();
++      if (list_empty(&(fs_info)->tree_mod_seq_list))
++              return 0;
++      if (eb && btrfs_header_level(eb) == 0)
++              return 0;
++
++      return 1;
++}
++
++static struct tree_mod_elem *
++alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
++                  enum mod_log_op op, gfp_t flags)
+ {
+       struct tree_mod_elem *tm;
+       tm = kzalloc(sizeof(*tm), flags);
+       if (!tm)
+-              return -ENOMEM;
++              return NULL;
+       tm->index = eb->start >> PAGE_CACHE_SHIFT;
+       if (op != MOD_LOG_KEY_ADD) {
+@@ -567,8 +568,9 @@ __tree_mod_log_insert_key(struct btrfs_f
+       tm->op = op;
+       tm->slot = slot;
+       tm->generation = btrfs_node_ptr_generation(eb, slot);
++      RB_CLEAR_NODE(&tm->node);
+-      return __tree_mod_log_insert(fs_info, tm);
++      return tm;
+ }
+ static noinline int
+@@ -576,10 +578,27 @@ tree_mod_log_insert_key(struct btrfs_fs_
+                       struct extent_buffer *eb, int slot,
+                       enum mod_log_op op, gfp_t flags)
+ {
+-      if (tree_mod_dont_log(fs_info, eb))
++      struct tree_mod_elem *tm;
++      int ret;
++
++      if (!tree_mod_need_log(fs_info, eb))
++              return 0;
++
++      tm = alloc_tree_mod_elem(eb, slot, op, flags);
++      if (!tm)
++              return -ENOMEM;
++
++      if (tree_mod_dont_log(fs_info, eb)) {
++              kfree(tm);
+               return 0;
++      }
+-      return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
++      ret = __tree_mod_log_insert(fs_info, tm);
++      tree_mod_log_write_unlock(fs_info);
++      if (ret)
++              kfree(tm);
++
++      return ret;
+ }
+ static noinline int
+@@ -587,53 +606,95 @@ tree_mod_log_insert_move(struct btrfs_fs
+                        struct extent_buffer *eb, int dst_slot, int src_slot,
+                        int nr_items, gfp_t flags)
+ {
+-      struct tree_mod_elem *tm;
+-      int ret;
++      struct tree_mod_elem *tm = NULL;
++      struct tree_mod_elem **tm_list = NULL;
++      int ret = 0;
+       int i;
++      int locked = 0;
+-      if (tree_mod_dont_log(fs_info, eb))
++      if (!tree_mod_need_log(fs_info, eb))
+               return 0;
++      tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
++      if (!tm_list)
++              return -ENOMEM;
++
++      tm = kzalloc(sizeof(*tm), flags);
++      if (!tm) {
++              ret = -ENOMEM;
++              goto free_tms;
++      }
++
++      tm->index = eb->start >> PAGE_CACHE_SHIFT;
++      tm->slot = src_slot;
++      tm->move.dst_slot = dst_slot;
++      tm->move.nr_items = nr_items;
++      tm->op = MOD_LOG_MOVE_KEYS;
++
++      for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
++              tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
++                  MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
++              if (!tm_list[i]) {
++                      ret = -ENOMEM;
++                      goto free_tms;
++              }
++      }
++
++      if (tree_mod_dont_log(fs_info, eb))
++              goto free_tms;
++      locked = 1;
++
+       /*
+        * When we override something during the move, we log these removals.
+        * This can only happen when we move towards the beginning of the
+        * buffer, i.e. dst_slot < src_slot.
+        */
+       for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
+-              ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
+-                              MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
+-              BUG_ON(ret < 0);
++              ret = __tree_mod_log_insert(fs_info, tm_list[i]);
++              if (ret)
++                      goto free_tms;
+       }
+-      tm = kzalloc(sizeof(*tm), flags);
+-      if (!tm)
+-              return -ENOMEM;
++      ret = __tree_mod_log_insert(fs_info, tm);
++      if (ret)
++              goto free_tms;
++      tree_mod_log_write_unlock(fs_info);
++      kfree(tm_list);
+-      tm->index = eb->start >> PAGE_CACHE_SHIFT;
+-      tm->slot = src_slot;
+-      tm->move.dst_slot = dst_slot;
+-      tm->move.nr_items = nr_items;
+-      tm->op = MOD_LOG_MOVE_KEYS;
++      return 0;
++free_tms:
++      for (i = 0; i < nr_items; i++) {
++              if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
++                      rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
++              kfree(tm_list[i]);
++      }
++      if (locked)
++              tree_mod_log_write_unlock(fs_info);
++      kfree(tm_list);
++      kfree(tm);
+-      return __tree_mod_log_insert(fs_info, tm);
++      return ret;
+ }
+-static inline void
+-__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
++static inline int
++__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
++                     struct tree_mod_elem **tm_list,
++                     int nritems)
+ {
+-      int i;
+-      u32 nritems;
++      int i, j;
+       int ret;
+-      if (btrfs_header_level(eb) == 0)
+-              return;
+-
+-      nritems = btrfs_header_nritems(eb);
+       for (i = nritems - 1; i >= 0; i--) {
+-              ret = __tree_mod_log_insert_key(fs_info, eb, i,
+-                              MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
+-              BUG_ON(ret < 0);
++              ret = __tree_mod_log_insert(fs_info, tm_list[i]);
++              if (ret) {
++                      for (j = nritems - 1; j > i; j--)
++                              rb_erase(&tm_list[j]->node,
++                                       &fs_info->tree_mod_log);
++                      return ret;
++              }
+       }
++
++      return 0;
+ }
+ static noinline int
+@@ -642,17 +703,38 @@ tree_mod_log_insert_root(struct btrfs_fs
+                        struct extent_buffer *new_root, gfp_t flags,
+                        int log_removal)
+ {
+-      struct tree_mod_elem *tm;
++      struct tree_mod_elem *tm = NULL;
++      struct tree_mod_elem **tm_list = NULL;
++      int nritems = 0;
++      int ret = 0;
++      int i;
+-      if (tree_mod_dont_log(fs_info, NULL))
++      if (!tree_mod_need_log(fs_info, NULL))
+               return 0;
+-      if (log_removal)
+-              __tree_mod_log_free_eb(fs_info, old_root);
++      if (log_removal && btrfs_header_level(old_root) > 0) {
++              nritems = btrfs_header_nritems(old_root);
++              tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
++                                flags);
++              if (!tm_list) {
++                      ret = -ENOMEM;
++                      goto free_tms;
++              }
++              for (i = 0; i < nritems; i++) {
++                      tm_list[i] = alloc_tree_mod_elem(old_root, i,
++                          MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
++                      if (!tm_list[i]) {
++                              ret = -ENOMEM;
++                              goto free_tms;
++                      }
++              }
++      }
+       tm = kzalloc(sizeof(*tm), flags);
+-      if (!tm)
+-              return -ENOMEM;
++      if (!tm) {
++              ret = -ENOMEM;
++              goto free_tms;
++      }
+       tm->index = new_root->start >> PAGE_CACHE_SHIFT;
+       tm->old_root.logical = old_root->start;
+@@ -660,7 +742,30 @@ tree_mod_log_insert_root(struct btrfs_fs
+       tm->generation = btrfs_header_generation(old_root);
+       tm->op = MOD_LOG_ROOT_REPLACE;
+-      return __tree_mod_log_insert(fs_info, tm);
++      if (tree_mod_dont_log(fs_info, NULL))
++              goto free_tms;
++
++      if (tm_list)
++              ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
++      if (!ret)
++              ret = __tree_mod_log_insert(fs_info, tm);
++
++      tree_mod_log_write_unlock(fs_info);
++      if (ret)
++              goto free_tms;
++      kfree(tm_list);
++
++      return ret;
++
++free_tms:
++      if (tm_list) {
++              for (i = 0; i < nritems; i++)
++                      kfree(tm_list[i]);
++              kfree(tm_list);
++      }
++      kfree(tm);
++
++      return ret;
+ }
+ static struct tree_mod_elem *
+@@ -729,31 +834,75 @@ tree_mod_log_search(struct btrfs_fs_info
+       return __tree_mod_log_search(fs_info, start, min_seq, 0);
+ }
+-static noinline void
++static noinline int
+ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
+                    struct extent_buffer *src, unsigned long dst_offset,
+                    unsigned long src_offset, int nr_items)
+ {
+-      int ret;
++      int ret = 0;
++      struct tree_mod_elem **tm_list = NULL;
++      struct tree_mod_elem **tm_list_add, **tm_list_rem;
+       int i;
++      int locked = 0;
+-      if (tree_mod_dont_log(fs_info, NULL))
+-              return;
++      if (!tree_mod_need_log(fs_info, NULL))
++              return 0;
+       if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
+-              return;
++              return 0;
++
++      tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
++                        GFP_NOFS);
++      if (!tm_list)
++              return -ENOMEM;
++      tm_list_add = tm_list;
++      tm_list_rem = tm_list + nr_items;
+       for (i = 0; i < nr_items; i++) {
+-              ret = __tree_mod_log_insert_key(fs_info, src,
+-                                              i + src_offset,
+-                                              MOD_LOG_KEY_REMOVE, GFP_NOFS);
+-              BUG_ON(ret < 0);
+-              ret = __tree_mod_log_insert_key(fs_info, dst,
+-                                                   i + dst_offset,
+-                                                   MOD_LOG_KEY_ADD,
+-                                                   GFP_NOFS);
+-              BUG_ON(ret < 0);
++              tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
++                  MOD_LOG_KEY_REMOVE, GFP_NOFS);
++              if (!tm_list_rem[i]) {
++                      ret = -ENOMEM;
++                      goto free_tms;
++              }
++
++              tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
++                  MOD_LOG_KEY_ADD, GFP_NOFS);
++              if (!tm_list_add[i]) {
++                      ret = -ENOMEM;
++                      goto free_tms;
++              }
++      }
++
++      if (tree_mod_dont_log(fs_info, NULL))
++              goto free_tms;
++      locked = 1;
++
++      for (i = 0; i < nr_items; i++) {
++              ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
++              if (ret)
++                      goto free_tms;
++              ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
++              if (ret)
++                      goto free_tms;
+       }
++
++      tree_mod_log_write_unlock(fs_info);
++      kfree(tm_list);
++
++      return 0;
++
++free_tms:
++      for (i = 0; i < nr_items * 2; i++) {
++              if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
++                      rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
++              kfree(tm_list[i]);
++      }
++      if (locked)
++              tree_mod_log_write_unlock(fs_info);
++      kfree(tm_list);
++
++      return ret;
+ }
+ static inline void
+@@ -778,12 +927,52 @@ tree_mod_log_set_node_key(struct btrfs_f
+       BUG_ON(ret < 0);
+ }
+-static noinline void
++static noinline int
+ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
+ {
++      struct tree_mod_elem **tm_list = NULL;
++      int nritems = 0;
++      int i;
++      int ret = 0;
++
++      if (btrfs_header_level(eb) == 0)
++              return 0;
++
++      if (!tree_mod_need_log(fs_info, NULL))
++              return 0;
++
++      nritems = btrfs_header_nritems(eb);
++      tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
++                        GFP_NOFS);
++      if (!tm_list)
++              return -ENOMEM;
++
++      for (i = 0; i < nritems; i++) {
++              tm_list[i] = alloc_tree_mod_elem(eb, i,
++                  MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
++              if (!tm_list[i]) {
++                      ret = -ENOMEM;
++                      goto free_tms;
++              }
++      }
++
+       if (tree_mod_dont_log(fs_info, eb))
+-              return;
+-      __tree_mod_log_free_eb(fs_info, eb);
++              goto free_tms;
++
++      ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
++      tree_mod_log_write_unlock(fs_info);
++      if (ret)
++              goto free_tms;
++      kfree(tm_list);
++
++      return 0;
++
++free_tms:
++      for (i = 0; i < nritems; i++)
++              kfree(tm_list[i]);
++      kfree(tm_list);
++
++      return ret;
+ }
+ static noinline void
+@@ -1041,8 +1230,13 @@ static noinline int __btrfs_cow_block(st
+               btrfs_set_node_ptr_generation(parent, parent_slot,
+                                             trans->transid);
+               btrfs_mark_buffer_dirty(parent);
+-              if (last_ref)
+-                      tree_mod_log_free_eb(root->fs_info, buf);
++              if (last_ref) {
++                      ret = tree_mod_log_free_eb(root->fs_info, buf);
++                      if (ret) {
++                              btrfs_abort_transaction(trans, root, ret);
++                              return ret;
++                      }
++              }
+               btrfs_free_tree_block(trans, root, buf, parent_start,
+                                     last_ref);
+       }
+@@ -3022,8 +3216,12 @@ static int push_node_left(struct btrfs_t
+       } else
+               push_items = min(src_nritems - 8, push_items);
+-      tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
+-                           push_items);
++      ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
++                                 push_items);
++      if (ret) {
++              btrfs_abort_transaction(trans, root, ret);
++              return ret;
++      }
+       copy_extent_buffer(dst, src,
+                          btrfs_node_key_ptr_offset(dst_nritems),
+                          btrfs_node_key_ptr_offset(0),
+@@ -3093,8 +3291,12 @@ static int balance_node_right(struct btr
+                                     (dst_nritems) *
+                                     sizeof(struct btrfs_key_ptr));
+-      tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
+-                           src_nritems - push_items, push_items);
++      ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
++                                 src_nritems - push_items, push_items);
++      if (ret) {
++              btrfs_abort_transaction(trans, root, ret);
++              return ret;
++      }
+       copy_extent_buffer(dst, src,
+                          btrfs_node_key_ptr_offset(0),
+                          btrfs_node_key_ptr_offset(src_nritems - push_items),
+@@ -3295,7 +3497,12 @@ static noinline int split_node(struct bt
+                           btrfs_header_chunk_tree_uuid(split),
+                           BTRFS_UUID_SIZE);
+-      tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
++      ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
++                                 mid, c_nritems - mid);
++      if (ret) {
++              btrfs_abort_transaction(trans, root, ret);
++              return ret;
++      }
+       copy_extent_buffer(split, c,
+                          btrfs_node_key_ptr_offset(0),
+                          btrfs_node_key_ptr_offset(mid),
diff --git a/queue-3.13/btrfs-return-immediately-if-tree-log-mod-is-not-necessary.patch b/queue-3.13/btrfs-return-immediately-if-tree-log-mod-is-not-necessary.patch
new file mode 100644 (file)
index 0000000..0c2af9f
--- /dev/null
@@ -0,0 +1,41 @@
+From 783577663507411e36e459390ef056556e93ef29 Mon Sep 17 00:00:00 2001
+From: Filipe David Borba Manana <fdmanana@gmail.com>
+Date: Thu, 12 Dec 2013 19:19:52 +0000
+Subject: Btrfs: return immediately if tree log mod is not necessary
+
+From: Filipe David Borba Manana <fdmanana@gmail.com>
+
+commit 783577663507411e36e459390ef056556e93ef29 upstream.
+
+In ctree.c:tree_mod_log_set_node_key() we were calling
+__tree_mod_log_insert_key() even when the modification doesn't need
+to be logged. This would allocate a tree_mod_elem structure, fill it
+and pass it to  __tree_mod_log_insert(), which would just acquire
+the tree mod log write lock and then free the tree_mod_elem structure
+and return (that is, a no-op).
+
+Therefore call tree_mod_log_insert() instead of __tree_mod_log_insert()
+which just returns immediately if the modification doesn't need to be
+logged (without allocating the structure, fill it, acquire write lock,
+free structure).
+
+Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -772,7 +772,7 @@ tree_mod_log_set_node_key(struct btrfs_f
+ {
+       int ret;
+-      ret = __tree_mod_log_insert_key(fs_info, eb, slot,
++      ret = tree_mod_log_insert_key(fs_info, eb, slot,
+                                       MOD_LOG_KEY_REPLACE,
+                                       atomic ? GFP_ATOMIC : GFP_NOFS);
+       BUG_ON(ret < 0);
index c7c261b888479ac52918d8c4b1627e12d4b05c81..1665a9b4ea26e14064d3b19884a46e2ba9a5532a 100644 (file)
@@ -133,3 +133,7 @@ scsi-isci-correct-erroneous-for_each_isci_host-macro.patch
 scsi-qla2xxx-poll-during-initialization-for-isp25xx-and-isp83xx.patch
 scsi-qla2xxx-fix-multiqueue-msi-x-registration.patch
 scsi-storvsc-null-pointer-dereference-fix.patch
+x86-fpu-check-tsk_used_math-in-kernel_fpu_end-for-eager-fpu.patch
+btrfs-return-immediately-if-tree-log-mod-is-not-necessary.patch
+btrfs-fix-tree-mod-logging.patch
+btrfs-fix-data-corruption-when-reading-updating-compressed-extents.patch
diff --git a/queue-3.13/x86-fpu-check-tsk_used_math-in-kernel_fpu_end-for-eager-fpu.patch b/queue-3.13/x86-fpu-check-tsk_used_math-in-kernel_fpu_end-for-eager-fpu.patch
new file mode 100644 (file)
index 0000000..b1b42e4
--- /dev/null
@@ -0,0 +1,82 @@
+From 731bd6a93a6e9172094a2322bd0ee964bb1f4d63 Mon Sep 17 00:00:00 2001
+From: Suresh Siddha <sbsiddha@gmail.com>
+Date: Sun, 2 Feb 2014 22:56:23 -0800
+Subject: x86, fpu: Check tsk_used_math() in kernel_fpu_end() for eager FPU
+
+From: Suresh Siddha <sbsiddha@gmail.com>
+
+commit 731bd6a93a6e9172094a2322bd0ee964bb1f4d63 upstream.
+
+For non-eager fpu mode, thread's fpu state is allocated during the first
+fpu usage (in the context of device not available exception). This
+(math_state_restore()) can be a blocking call and hence we enable
+interrupts (which were originally disabled when the exception happened),
+allocate memory and disable interrupts etc.
+
+But the eager-fpu mode, call's the same math_state_restore() from
+kernel_fpu_end(). The assumption being that tsk_used_math() is always
+set for the eager-fpu mode and thus avoid the code path of enabling
+interrupts, allocating fpu state using blocking call and disable
+interrupts etc.
+
+But the below issue was noticed by Maarten Baert, Nate Eldredge and
+few others:
+
+If a user process dumps core on an ecrypt fs while aesni-intel is loaded,
+we get a BUG() in __find_get_block() complaining that it was called with
+interrupts disabled; then all further accesses to our ecrypt fs hang
+and we have to reboot.
+
+The aesni-intel code (encrypting the core file that we are writing) needs
+the FPU and quite properly wraps its code in kernel_fpu_{begin,end}(),
+the latter of which calls math_state_restore(). So after kernel_fpu_end(),
+interrupts may be disabled, which nobody seems to expect, and they stay
+that way until we eventually get to __find_get_block() which barfs.
+
+For eager fpu, most the time, tsk_used_math() is true. At few instances
+during thread exit, signal return handling etc, tsk_used_math() might
+be false.
+
+In kernel_fpu_end(), for eager-fpu, call math_state_restore()
+only if tsk_used_math() is set. Otherwise, don't bother. Kernel code
+path which cleared tsk_used_math() knows what needs to be done
+with the fpu state.
+
+Reported-by: Maarten Baert <maarten-baert@hotmail.com>
+Reported-by: Nate Eldredge <nate@thatsmathematics.com>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Suresh Siddha <sbsiddha@gmail.com>
+Link: http://lkml.kernel.org/r/1391410583.3801.6.camel@europa
+Cc: George Spelvin <linux@horizon.com>
+Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/i387.c |   15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/i387.c
++++ b/arch/x86/kernel/i387.c
+@@ -86,10 +86,19 @@ EXPORT_SYMBOL(__kernel_fpu_begin);
+ void __kernel_fpu_end(void)
+ {
+-      if (use_eager_fpu())
+-              math_state_restore();
+-      else
++      if (use_eager_fpu()) {
++              /*
++               * For eager fpu, most the time, tsk_used_math() is true.
++               * Restore the user math as we are done with the kernel usage.
++               * At few instances during thread exit, signal handling etc,
++               * tsk_used_math() is false. Those few places will take proper
++               * actions, so we don't need to restore the math here.
++               */
++              if (likely(tsk_used_math(current)))
++                      math_state_restore();
++      } else {
+               stts();
++      }
+ }
+ EXPORT_SYMBOL(__kernel_fpu_end);