4.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 11 Dec 2015 17:19:25 +0000 (09:19 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 11 Dec 2015 17:19:25 +0000 (09:19 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 11 Dec 2015 17:19:25 +0000 (09:19 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 11 Dec 2015 17:19:25 +0000 (09:19 -0800)
diff --git a/queue-4.1/alsa-hda-hdmi-apply-skylake-fix-ups-to-broxton-display-codec.patch b/queue-4.1/alsa-hda-hdmi-apply-skylake-fix-ups-to-broxton-display-codec.patch

new file mode 100644 (file)

index 0000000..542db35
--- /dev/null
+++ b/queue-4.1/alsa-hda-hdmi-apply-skylake-fix-ups-to-broxton-display-codec.patch
@@ -0,0 +1,33 @@
+From e2656412f2a7343ecfd13eb74bac0a6e6e9c5aad Mon Sep 17 00:00:00 2001
+From: "Lu, Han" <han.lu@intel.com>
+Date: Wed, 11 Nov 2015 16:54:27 +0800
+Subject: ALSA: hda/hdmi - apply Skylake fix-ups to Broxton display codec
+
+From: "Lu, Han" <han.lu@intel.com>
+
+commit e2656412f2a7343ecfd13eb74bac0a6e6e9c5aad upstream.
+
+Broxton and Skylake have the same behavior on display audio. So this patch
+applys Skylake fix-ups to Broxton.
+
+Signed-off-by: Lu, Han <han.lu@intel.com>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/pci/hda/patch_hdmi.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/sound/pci/hda/patch_hdmi.c
++++ b/sound/pci/hda/patch_hdmi.c
+@@ -48,8 +48,9 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't
+ #define is_haswell(codec)  ((codec)->core.vendor_id == 0x80862807)
+ #define is_broadwell(codec)    ((codec)->core.vendor_id == 0x80862808)
+ #define is_skylake(codec) ((codec)->core.vendor_id == 0x80862809)
++#define is_broxton(codec) ((codec)->core.vendor_id == 0x8086280a)
+ #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
+-                                      || is_skylake(codec))
++                              || is_skylake(codec) || is_broxton(codec))
+ 
+ #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
+ #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
diff --git a/queue-4.1/btrfs-check-unsupported-filters-in-balance-arguments.patch b/queue-4.1/btrfs-check-unsupported-filters-in-balance-arguments.patch

new file mode 100644 (file)

index 0000000..aaef7ed
--- /dev/null
+++ b/queue-4.1/btrfs-check-unsupported-filters-in-balance-arguments.patch
@@ -0,0 +1,57 @@
+From 849ef9286f30c88113906dc35f44a499c0cb385d Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Mon, 12 Oct 2015 16:55:54 +0200
+Subject: btrfs: check unsupported filters in balance arguments
+
+From: David Sterba <dsterba@suse.com>
+
+commit 849ef9286f30c88113906dc35f44a499c0cb385d upstream.
+
+We don't verify that all the balance filter arguments supplemented by
+the flags are actually known to the kernel. Thus we let it silently pass
+and do nothing.
+
+At the moment this means only the 'limit' filter, but we're going to add
+a few more soon so it's better to have that fixed. Also in older stable
+kernels so that it works with newer userspace tools.
+
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c   |    5 +++++
+ fs/btrfs/volumes.h |    8 ++++++++
+ 2 files changed, 13 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -4497,6 +4497,11 @@ locked:
+               goto out_bctl;
+       }
+ 
++      if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) {
++              ret = -EINVAL;
++              goto out_bargs;
++      }
++
+ do_balance:
+       /*
+        * Ownership of bctl and mutually_exclusive_operation_running
+--- a/fs/btrfs/volumes.h
++++ b/fs/btrfs/volumes.h
+@@ -380,6 +380,14 @@ struct map_lookup {
+        BTRFS_BALANCE_ARGS_VRANGE |            \
+        BTRFS_BALANCE_ARGS_LIMIT)
+ 
++#define BTRFS_BALANCE_ARGS_MASK                       \
++      (BTRFS_BALANCE_ARGS_PROFILES |          \
++       BTRFS_BALANCE_ARGS_USAGE |             \
++       BTRFS_BALANCE_ARGS_DEVID |             \
++       BTRFS_BALANCE_ARGS_DRANGE |            \
++       BTRFS_BALANCE_ARGS_VRANGE |            \
++       BTRFS_BALANCE_ARGS_LIMIT)
++
+ /*
+  * Profile changing flags.  When SOFT is set we won't relocate chunk if
+  * it already has the target profile (even though it may be
diff --git a/queue-4.1/btrfs-fix-file-corruption-and-data-loss-after-cloning-inline-extents.patch b/queue-4.1/btrfs-fix-file-corruption-and-data-loss-after-cloning-inline-extents.patch

new file mode 100644 (file)

index 0000000..87e2c97
--- /dev/null
+++ b/queue-4.1/btrfs-fix-file-corruption-and-data-loss-after-cloning-inline-extents.patch
@@ -0,0 +1,443 @@
+From 8039d87d9e473aeb740d4fdbd59b9d2f89b2ced9 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 13 Oct 2015 15:15:00 +0100
+Subject: Btrfs: fix file corruption and data loss after cloning inline extents
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 8039d87d9e473aeb740d4fdbd59b9d2f89b2ced9 upstream.
+
+Currently the clone ioctl allows to clone an inline extent from one file
+to another that already has other (non-inlined) extents. This is a problem
+because btrfs is not designed to deal with files having inline and regular
+extents, if a file has an inline extent then it must be the only extent
+in the file and must start at file offset 0. Having a file with an inline
+extent followed by regular extents results in EIO errors when doing reads
+or writes against the first 4K of the file.
+
+Also, the clone ioctl allows one to lose data if the source file consists
+of a single inline extent, with a size of N bytes, and the destination
+file consists of a single inline extent with a size of M bytes, where we
+have M > N. In this case the clone operation removes the inline extent
+from the destination file and then copies the inline extent from the
+source file into the destination file - we lose the M - N bytes from the
+destination file, a read operation will get the value 0x00 for any bytes
+in the the range [N, M] (the destination inode's i_size remained as M,
+that's why we can read past N bytes).
+
+So fix this by not allowing such destructive operations to happen and
+return errno EOPNOTSUPP to user space.
+
+Currently the fstest btrfs/035 tests the data loss case but it totally
+ignores this - i.e. expects the operation to succeed and does not check
+the we got data loss.
+
+The following test case for fstests exercises all these cases that result
+in file corruption and data loss:
+
+  seq=`basename $0`
+  seqres=$RESULT_DIR/$seq
+  echo "QA output created by $seq"
+  tmp=/tmp/$$
+  status=1     # failure is the default!
+  trap "_cleanup; exit \$status" 0 1 2 3 15
+
+  _cleanup()
+  {
+      rm -f $tmp.*
+  }
+
+  # get standard environment, filters and checks
+  . ./common/rc
+  . ./common/filter
+
+  # real QA test starts here
+  _need_to_be_root
+  _supported_fs btrfs
+  _supported_os Linux
+  _require_scratch
+  _require_cloner
+  _require_btrfs_fs_feature "no_holes"
+  _require_btrfs_mkfs_feature "no-holes"
+
+  rm -f $seqres.full
+
+  test_cloning_inline_extents()
+  {
+      local mkfs_opts=$1
+      local mount_opts=$2
+
+      _scratch_mkfs $mkfs_opts >>$seqres.full 2>&1
+      _scratch_mount $mount_opts
+
+      # File bar, the source for all the following clone operations, consists
+      # of a single inline extent (50 bytes).
+      $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 50" $SCRATCH_MNT/bar \
+          | _filter_xfs_io
+
+      # Test cloning into a file with an extent (non-inlined) where the
+      # destination offset overlaps that extent. It should not be possible to
+      # clone the inline extent from file bar into this file.
+      $XFS_IO_PROG -f -c "pwrite -S 0xaa 0K 16K" $SCRATCH_MNT/foo \
+          | _filter_xfs_io
+      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo
+
+      # Doing IO against any range in the first 4K of the file should work.
+      # Due to a past clone ioctl bug which allowed cloning the inline extent,
+      # these operations resulted in EIO errors.
+      echo "File foo data after clone operation:"
+      # All bytes should have the value 0xaa (clone operation failed and did
+      # not modify our file).
+      od -t x1 $SCRATCH_MNT/foo
+      $XFS_IO_PROG -c "pwrite -S 0xcc 0 100" $SCRATCH_MNT/foo | _filter_xfs_io
+
+      # Test cloning the inline extent against a file which has a hole in its
+      # first 4K followed by a non-inlined extent. It should not be possible
+      # as well to clone the inline extent from file bar into this file.
+      $XFS_IO_PROG -f -c "pwrite -S 0xdd 4K 12K" $SCRATCH_MNT/foo2 \
+          | _filter_xfs_io
+      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo2
+
+      # Doing IO against any range in the first 4K of the file should work.
+      # Due to a past clone ioctl bug which allowed cloning the inline extent,
+      # these operations resulted in EIO errors.
+      echo "File foo2 data after clone operation:"
+      # All bytes should have the value 0x00 (clone operation failed and did
+      # not modify our file).
+      od -t x1 $SCRATCH_MNT/foo2
+      $XFS_IO_PROG -c "pwrite -S 0xee 0 90" $SCRATCH_MNT/foo2 | _filter_xfs_io
+
+      # Test cloning the inline extent against a file which has a size of zero
+      # but has a prealloc extent. It should not be possible as well to clone
+      # the inline extent from file bar into this file.
+      $XFS_IO_PROG -f -c "falloc -k 0 1M" $SCRATCH_MNT/foo3 | _filter_xfs_io
+      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo3
+
+      # Doing IO against any range in the first 4K of the file should work.
+      # Due to a past clone ioctl bug which allowed cloning the inline extent,
+      # these operations resulted in EIO errors.
+      echo "First 50 bytes of foo3 after clone operation:"
+      # Should not be able to read any bytes, file has 0 bytes i_size (the
+      # clone operation failed and did not modify our file).
+      od -t x1 $SCRATCH_MNT/foo3
+      $XFS_IO_PROG -c "pwrite -S 0xff 0 90" $SCRATCH_MNT/foo3 | _filter_xfs_io
+
+      # Test cloning the inline extent against a file which consists of a
+      # single inline extent that has a size not greater than the size of
+      # bar's inline extent (40 < 50).
+      # It should be possible to do the extent cloning from bar to this file.
+      $XFS_IO_PROG -f -c "pwrite -S 0x01 0 40" $SCRATCH_MNT/foo4 \
+          | _filter_xfs_io
+      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo4
+
+      # Doing IO against any range in the first 4K of the file should work.
+      echo "File foo4 data after clone operation:"
+      # Must match file bar's content.
+      od -t x1 $SCRATCH_MNT/foo4
+      $XFS_IO_PROG -c "pwrite -S 0x02 0 90" $SCRATCH_MNT/foo4 | _filter_xfs_io
+
+      # Test cloning the inline extent against a file which consists of a
+      # single inline extent that has a size greater than the size of bar's
+      # inline extent (60 > 50).
+      # It should not be possible to clone the inline extent from file bar
+      # into this file.
+      $XFS_IO_PROG -f -c "pwrite -S 0x03 0 60" $SCRATCH_MNT/foo5 \
+          | _filter_xfs_io
+      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo5
+
+      # Reading the file should not fail.
+      echo "File foo5 data after clone operation:"
+      # Must have a size of 60 bytes, with all bytes having a value of 0x03
+      # (the clone operation failed and did not modify our file).
+      od -t x1 $SCRATCH_MNT/foo5
+
+      # Test cloning the inline extent against a file which has no extents but
+      # has a size greater than bar's inline extent (16K > 50).
+      # It should not be possible to clone the inline extent from file bar
+      # into this file.
+      $XFS_IO_PROG -f -c "truncate 16K" $SCRATCH_MNT/foo6 | _filter_xfs_io
+      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo6
+
+      # Reading the file should not fail.
+      echo "File foo6 data after clone operation:"
+      # Must have a size of 16K, with all bytes having a value of 0x00 (the
+      # clone operation failed and did not modify our file).
+      od -t x1 $SCRATCH_MNT/foo6
+
+      # Test cloning the inline extent against a file which has no extents but
+      # has a size not greater than bar's inline extent (30 < 50).
+      # It should be possible to clone the inline extent from file bar into
+      # this file.
+      $XFS_IO_PROG -f -c "truncate 30" $SCRATCH_MNT/foo7 | _filter_xfs_io
+      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo7
+
+      # Reading the file should not fail.
+      echo "File foo7 data after clone operation:"
+      # Must have a size of 50 bytes, with all bytes having a value of 0xbb.
+      od -t x1 $SCRATCH_MNT/foo7
+
+      # Test cloning the inline extent against a file which has a size not
+      # greater than the size of bar's inline extent (20 < 50) but has
+      # a prealloc extent that goes beyond the file's size. It should not be
+      # possible to clone the inline extent from bar into this file.
+      $XFS_IO_PROG -f -c "falloc -k 0 1M" \
+                      -c "pwrite -S 0x88 0 20" \
+                      $SCRATCH_MNT/foo8 | _filter_xfs_io
+      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo8
+
+      echo "File foo8 data after clone operation:"
+      # Must have a size of 20 bytes, with all bytes having a value of 0x88
+      # (the clone operation did not modify our file).
+      od -t x1 $SCRATCH_MNT/foo8
+
+      _scratch_unmount
+  }
+
+  echo -e "\nTesting without compression and without the no-holes feature...\n"
+  test_cloning_inline_extents
+
+  echo -e "\nTesting with compression and without the no-holes feature...\n"
+  test_cloning_inline_extents "" "-o compress"
+
+  echo -e "\nTesting without compression and with the no-holes feature...\n"
+  test_cloning_inline_extents "-O no-holes" ""
+
+  echo -e "\nTesting with compression and with the no-holes feature...\n"
+  test_cloning_inline_extents "-O no-holes" "-o compress"
+
+  status=0
+  exit
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c |  195 ++++++++++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 152 insertions(+), 43 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -3166,6 +3166,150 @@ static void clone_update_extent_map(stru
+                       &BTRFS_I(inode)->runtime_flags);
+ }
+ 
++/*
++ * Make sure we do not end up inserting an inline extent into a file that has
++ * already other (non-inline) extents. If a file has an inline extent it can
++ * not have any other extents and the (single) inline extent must start at the
++ * file offset 0. Failing to respect these rules will lead to file corruption,
++ * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
++ *
++ * We can have extents that have been already written to disk or we can have
++ * dirty ranges still in delalloc, in which case the extent maps and items are
++ * created only when we run delalloc, and the delalloc ranges might fall outside
++ * the range we are currently locking in the inode's io tree. So we check the
++ * inode's i_size because of that (i_size updates are done while holding the
++ * i_mutex, which we are holding here).
++ * We also check to see if the inode has a size not greater than "datal" but has
++ * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
++ * protected against such concurrent fallocate calls by the i_mutex).
++ *
++ * If the file has no extents but a size greater than datal, do not allow the
++ * copy because we would need turn the inline extent into a non-inline one (even
++ * with NO_HOLES enabled). If we find our destination inode only has one inline
++ * extent, just overwrite it with the source inline extent if its size is less
++ * than the source extent's size, or we could copy the source inline extent's
++ * data into the destination inode's inline extent if the later is greater then
++ * the former.
++ */
++static int clone_copy_inline_extent(struct inode *src,
++                                  struct inode *dst,
++                                  struct btrfs_trans_handle *trans,
++                                  struct btrfs_path *path,
++                                  struct btrfs_key *new_key,
++                                  const u64 drop_start,
++                                  const u64 datal,
++                                  const u64 skip,
++                                  const u64 size,
++                                  char *inline_data)
++{
++      struct btrfs_root *root = BTRFS_I(dst)->root;
++      const u64 aligned_end = ALIGN(new_key->offset + datal,
++                                    root->sectorsize);
++      int ret;
++      struct btrfs_key key;
++
++      if (new_key->offset > 0)
++              return -EOPNOTSUPP;
++
++      key.objectid = btrfs_ino(dst);
++      key.type = BTRFS_EXTENT_DATA_KEY;
++      key.offset = 0;
++      ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
++      if (ret < 0) {
++              return ret;
++      } else if (ret > 0) {
++              if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
++                      ret = btrfs_next_leaf(root, path);
++                      if (ret < 0)
++                              return ret;
++                      else if (ret > 0)
++                              goto copy_inline_extent;
++              }
++              btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
++              if (key.objectid == btrfs_ino(dst) &&
++                  key.type == BTRFS_EXTENT_DATA_KEY) {
++                      ASSERT(key.offset > 0);
++                      return -EOPNOTSUPP;
++              }
++      } else if (i_size_read(dst) <= datal) {
++              struct btrfs_file_extent_item *ei;
++              u64 ext_len;
++
++              /*
++               * If the file size is <= datal, make sure there are no other
++               * extents following (can happen do to an fallocate call with
++               * the flag FALLOC_FL_KEEP_SIZE).
++               */
++              ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
++                                  struct btrfs_file_extent_item);
++              /*
++               * If it's an inline extent, it can not have other extents
++               * following it.
++               */
++              if (btrfs_file_extent_type(path->nodes[0], ei) ==
++                  BTRFS_FILE_EXTENT_INLINE)
++                      goto copy_inline_extent;
++
++              ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
++              if (ext_len > aligned_end)
++                      return -EOPNOTSUPP;
++
++              ret = btrfs_next_item(root, path);
++              if (ret < 0) {
++                      return ret;
++              } else if (ret == 0) {
++                      btrfs_item_key_to_cpu(path->nodes[0], &key,
++                                            path->slots[0]);
++                      if (key.objectid == btrfs_ino(dst) &&
++                          key.type == BTRFS_EXTENT_DATA_KEY)
++                              return -EOPNOTSUPP;
++              }
++      }
++
++copy_inline_extent:
++      /*
++       * We have no extent items, or we have an extent at offset 0 which may
++       * or may not be inlined. All these cases are dealt the same way.
++       */
++      if (i_size_read(dst) > datal) {
++              /*
++               * If the destination inode has an inline extent...
++               * This would require copying the data from the source inline
++               * extent into the beginning of the destination's inline extent.
++               * But this is really complex, both extents can be compressed
++               * or just one of them, which would require decompressing and
++               * re-compressing data (which could increase the new compressed
++               * size, not allowing the compressed data to fit anymore in an
++               * inline extent).
++               * So just don't support this case for now (it should be rare,
++               * we are not really saving space when cloning inline extents).
++               */
++              return -EOPNOTSUPP;
++      }
++
++      btrfs_release_path(path);
++      ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
++      if (ret)
++              return ret;
++      ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
++      if (ret)
++              return ret;
++
++      if (skip) {
++              const u32 start = btrfs_file_extent_calc_inline_size(0);
++
++              memmove(inline_data + start, inline_data + start + skip, datal);
++      }
++
++      write_extent_buffer(path->nodes[0], inline_data,
++                          btrfs_item_ptr_offset(path->nodes[0],
++                                                path->slots[0]),
++                          size);
++      inode_add_bytes(dst, datal);
++
++      return 0;
++}
++
+ /**
+  * btrfs_clone() - clone a range from inode file to another
+  *
+@@ -3432,21 +3576,6 @@ process_slot:
+                       } else if (type == BTRFS_FILE_EXTENT_INLINE) {
+                               u64 skip = 0;
+                               u64 trim = 0;
+-                              u64 aligned_end = 0;
+-
+-                              /*
+-                               * Don't copy an inline extent into an offset
+-                               * greater than zero. Having an inline extent
+-                               * at such an offset results in chaos as btrfs
+-                               * isn't prepared for such cases. Just skip
+-                               * this case for the same reasons as commented
+-                               * at btrfs_ioctl_clone().
+-                               */
+-                              if (last_dest_end > 0) {
+-                                      ret = -EOPNOTSUPP;
+-                                      btrfs_end_transaction(trans, root);
+-                                      goto out;
+-                              }
+ 
+                               if (off > key.offset) {
+                                       skip = off - key.offset;
+@@ -3464,42 +3593,22 @@ process_slot:
+                               size -= skip + trim;
+                               datal -= skip + trim;
+ 
+-                              aligned_end = ALIGN(new_key.offset + datal,
+-                                                  root->sectorsize);
+-                              ret = btrfs_drop_extents(trans, root, inode,
+-                                                       drop_start,
+-                                                       aligned_end,
+-                                                       1);
++                              ret = clone_copy_inline_extent(src, inode,
++                                                             trans, path,
++                                                             &new_key,
++                                                             drop_start,
++                                                             datal,
++                                                             skip, size, buf);
+                               if (ret) {
+                                       if (ret != -EOPNOTSUPP)
+                                               btrfs_abort_transaction(trans,
+-                                                      root, ret);
+-                                      btrfs_end_transaction(trans, root);
+-                                      goto out;
+-                              }
+-
+-                              ret = btrfs_insert_empty_item(trans, root, path,
+-                                                            &new_key, size);
+-                              if (ret) {
+-                                      btrfs_abort_transaction(trans, root,
+-                                                              ret);
++                                                                      root,
++                                                                      ret);
+                                       btrfs_end_transaction(trans, root);
+                                       goto out;
+                               }
+-
+-                              if (skip) {
+-                                      u32 start =
+-                                        btrfs_file_extent_calc_inline_size(0);
+-                                      memmove(buf+start, buf+start+skip,
+-                                              datal);
+-                              }
+-
+                               leaf = path->nodes[0];
+                               slot = path->slots[0];
+-                              write_extent_buffer(leaf, buf,
+-                                          btrfs_item_ptr_offset(leaf, slot),
+-                                          size);
+-                              inode_add_bytes(inode, datal);
+                       }
+ 
+                       /* If we have an implicit hole (NO_HOLES feature). */
diff --git a/queue-4.1/btrfs-fix-race-leading-to-bug_on-when-running-delalloc-for-nodatacow.patch b/queue-4.1/btrfs-fix-race-leading-to-bug_on-when-running-delalloc-for-nodatacow.patch

new file mode 100644 (file)

index 0000000..b613df9
--- /dev/null
+++ b/queue-4.1/btrfs-fix-race-leading-to-bug_on-when-running-delalloc-for-nodatacow.patch
@@ -0,0 +1,122 @@
+From 1d512cb77bdbda80f0dd0620a3b260d697fd581d Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 9 Nov 2015 00:33:58 +0000
+Subject: Btrfs: fix race leading to BUG_ON when running delalloc for nodatacow
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 1d512cb77bdbda80f0dd0620a3b260d697fd581d upstream.
+
+If we are using the NO_HOLES feature, we have a tiny time window when
+running delalloc for a nodatacow inode where we can race with a concurrent
+link or xattr add operation leading to a BUG_ON.
+
+This happens because at run_delalloc_nocow() we end up casting a leaf item
+of type BTRFS_INODE_[REF|EXTREF]_KEY or of type BTRFS_XATTR_ITEM_KEY to a
+file extent item (struct btrfs_file_extent_item) and then analyse its
+extent type field, which won't match any of the expected extent types
+(values BTRFS_FILE_EXTENT_[REG|PREALLOC|INLINE]) and therefore trigger an
+explicit BUG_ON(1).
+
+The following sequence diagram shows how the race happens when running a
+no-cow dellaloc range [4K, 8K[ for inode 257 and we have the following
+neighbour leafs:
+
+             Leaf X (has N items)                    Leaf Y
+
+ [ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ]  [ (257 EXTENT_DATA 8192), ... ]
+              slot N - 2         slot N - 1              slot 0
+
+ (Note the implicit hole for inode 257 regarding the [0, 8K[ range)
+
+       CPU 1                                         CPU 2
+
+ run_dealloc_nocow()
+   btrfs_lookup_file_extent()
+     --> searches for a key with value
+         (257 EXTENT_DATA 4096) in the
+         fs/subvol tree
+     --> returns us a path with
+         path->nodes[0] == leaf X and
+         path->slots[0] == N
+
+   because path->slots[0] is >=
+   btrfs_header_nritems(leaf X), it
+   calls btrfs_next_leaf()
+
+   btrfs_next_leaf()
+     --> releases the path
+
+                                              hard link added to our inode,
+                                              with key (257 INODE_REF 500)
+                                              added to the end of leaf X,
+                                              so leaf X now has N + 1 keys
+
+     --> searches for the key
+         (257 INODE_REF 256), because
+         it was the last key in leaf X
+         before it released the path,
+         with path->keep_locks set to 1
+
+     --> ends up at leaf X again and
+         it verifies that the key
+         (257 INODE_REF 256) is no longer
+         the last key in the leaf, so it
+         returns with path->nodes[0] ==
+         leaf X and path->slots[0] == N,
+         pointing to the new item with
+         key (257 INODE_REF 500)
+
+   the loop iteration of run_dealloc_nocow()
+   does not break out the loop and continues
+   because the key referenced in the path
+   at path->nodes[0] and path->slots[0] is
+   for inode 257, its type is < BTRFS_EXTENT_DATA_KEY
+   and its offset (500) is less then our delalloc
+   range's end (8192)
+
+   the item pointed by the path, an inode reference item,
+   is (incorrectly) interpreted as a file extent item and
+   we get an invalid extent type, leading to the BUG_ON(1):
+
+   if (extent_type == BTRFS_FILE_EXTENT_REG ||
+      extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+       (...)
+   } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+       (...)
+   } else {
+       BUG_ON(1)
+   }
+
+The same can happen if a xattr is added concurrently and ends up having
+a key with an offset smaller then the delalloc's range end.
+
+So fix this by skipping keys with a type smaller than
+BTRFS_EXTENT_DATA_KEY.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1294,8 +1294,14 @@ next_slot:
+               num_bytes = 0;
+               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ 
+-              if (found_key.objectid > ino ||
+-                  found_key.type > BTRFS_EXTENT_DATA_KEY ||
++              if (found_key.objectid > ino)
++                      break;
++              if (WARN_ON_ONCE(found_key.objectid < ino) ||
++                  found_key.type < BTRFS_EXTENT_DATA_KEY) {
++                      path->slots[0]++;
++                      goto next_slot;
++              }
++              if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
+                   found_key.offset > end)
+                       break;
+ 
diff --git a/queue-4.1/btrfs-fix-race-leading-to-incorrect-item-deletion-when-dropping-extents.patch b/queue-4.1/btrfs-fix-race-leading-to-incorrect-item-deletion-when-dropping-extents.patch

new file mode 100644 (file)

index 0000000..3211eec
--- /dev/null
+++ b/queue-4.1/btrfs-fix-race-leading-to-incorrect-item-deletion-when-dropping-extents.patch
@@ -0,0 +1,198 @@
+From aeafbf8486c9e2bd53f5cc3c10c0b7fd7149d69c Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Fri, 6 Nov 2015 13:33:33 +0000
+Subject: Btrfs: fix race leading to incorrect item deletion when dropping extents
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit aeafbf8486c9e2bd53f5cc3c10c0b7fd7149d69c upstream.
+
+While running a stress test I got the following warning triggered:
+
+  [191627.672810] ------------[ cut here ]------------
+  [191627.673949] WARNING: CPU: 8 PID: 8447 at fs/btrfs/file.c:779 __btrfs_drop_extents+0x391/0xa50 [btrfs]()
+  (...)
+  [191627.701485] Call Trace:
+  [191627.702037]  [<ffffffff8145f077>] dump_stack+0x4f/0x7b
+  [191627.702992]  [<ffffffff81095de5>] ? console_unlock+0x356/0x3a2
+  [191627.704091]  [<ffffffff8104b3b0>] warn_slowpath_common+0xa1/0xbb
+  [191627.705380]  [<ffffffffa0664499>] ? __btrfs_drop_extents+0x391/0xa50 [btrfs]
+  [191627.706637]  [<ffffffff8104b46d>] warn_slowpath_null+0x1a/0x1c
+  [191627.707789]  [<ffffffffa0664499>] __btrfs_drop_extents+0x391/0xa50 [btrfs]
+  [191627.709155]  [<ffffffff8115663c>] ? cache_alloc_debugcheck_after.isra.32+0x171/0x1d0
+  [191627.712444]  [<ffffffff81155007>] ? kmemleak_alloc_recursive.constprop.40+0x16/0x18
+  [191627.714162]  [<ffffffffa06570c9>] insert_reserved_file_extent.constprop.40+0x83/0x24e [btrfs]
+  [191627.715887]  [<ffffffffa065422b>] ? start_transaction+0x3bb/0x610 [btrfs]
+  [191627.717287]  [<ffffffffa065b604>] btrfs_finish_ordered_io+0x273/0x4e2 [btrfs]
+  [191627.728865]  [<ffffffffa065b888>] finish_ordered_fn+0x15/0x17 [btrfs]
+  [191627.730045]  [<ffffffffa067d688>] normal_work_helper+0x14c/0x32c [btrfs]
+  [191627.731256]  [<ffffffffa067d96a>] btrfs_endio_write_helper+0x12/0x14 [btrfs]
+  [191627.732661]  [<ffffffff81061119>] process_one_work+0x24c/0x4ae
+  [191627.733822]  [<ffffffff810615b0>] worker_thread+0x206/0x2c2
+  [191627.734857]  [<ffffffff810613aa>] ? process_scheduled_works+0x2f/0x2f
+  [191627.736052]  [<ffffffff810613aa>] ? process_scheduled_works+0x2f/0x2f
+  [191627.737349]  [<ffffffff810669a6>] kthread+0xef/0xf7
+  [191627.738267]  [<ffffffff810f3b3a>] ? time_hardirqs_on+0x15/0x28
+  [191627.739330]  [<ffffffff810668b7>] ? __kthread_parkme+0xad/0xad
+  [191627.741976]  [<ffffffff81465592>] ret_from_fork+0x42/0x70
+  [191627.743080]  [<ffffffff810668b7>] ? __kthread_parkme+0xad/0xad
+  [191627.744206] ---[ end trace bbfddacb7aaada8d ]---
+
+  $ cat -n fs/btrfs/file.c
+  691  int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
+  (...)
+  758                  btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+  759                  if (key.objectid > ino ||
+  760                      key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
+  761                          break;
+  762
+  763                  fi = btrfs_item_ptr(leaf, path->slots[0],
+  764                                      struct btrfs_file_extent_item);
+  765                  extent_type = btrfs_file_extent_type(leaf, fi);
+  766
+  767                  if (extent_type == BTRFS_FILE_EXTENT_REG ||
+  768                      extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+  (...)
+  774                  } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+  (...)
+  778                  } else {
+  779                          WARN_ON(1);
+  780                          extent_end = search_start;
+  781                  }
+  (...)
+
+This happened because the item we were processing did not match a file
+extent item (its key type != BTRFS_EXTENT_DATA_KEY), and even on this
+case we cast the item to a struct btrfs_file_extent_item pointer and
+then find a type field value that does not match any of the expected
+values (BTRFS_FILE_EXTENT_[REG|PREALLOC|INLINE]). This scenario happens
+due to a tiny time window where a race can happen as exemplified below.
+For example, consider the following scenario where we're using the
+NO_HOLES feature and we have the following two neighbour leafs:
+
+               Leaf X (has N items)                    Leaf Y
+
+[ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ]  [ (257 EXTENT_DATA 8192), ... ]
+          slot N - 2         slot N - 1              slot 0
+
+Our inode 257 has an implicit hole in the range [0, 8K[ (implicit rather
+than explicit because NO_HOLES is enabled). Now if our inode has an
+ordered extent for the range [4K, 8K[ that is finishing, the following
+can happen:
+
+          CPU 1                                       CPU 2
+
+  btrfs_finish_ordered_io()
+    insert_reserved_file_extent()
+      __btrfs_drop_extents()
+         Searches for the key
+          (257 EXTENT_DATA 4096) through
+          btrfs_lookup_file_extent()
+
+         Key not found and we get a path where
+         path->nodes[0] == leaf X and
+         path->slots[0] == N
+
+         Because path->slots[0] is >=
+         btrfs_header_nritems(leaf X), we call
+         btrfs_next_leaf()
+
+         btrfs_next_leaf() releases the path
+
+                                                  inserts key
+                                                  (257 INODE_REF 4096)
+                                                  at the end of leaf X,
+                                                  leaf X now has N + 1 keys,
+                                                  and the new key is at
+                                                  slot N
+
+         btrfs_next_leaf() searches for
+         key (257 INODE_REF 256), with
+         path->keep_locks set to 1,
+         because it was the last key it
+         saw in leaf X
+
+           finds it in leaf X again and
+           notices it's no longer the last
+           key of the leaf, so it returns 0
+           with path->nodes[0] == leaf X and
+           path->slots[0] == N (which is now
+           < btrfs_header_nritems(leaf X)),
+           pointing to the new key
+           (257 INODE_REF 4096)
+
+         __btrfs_drop_extents() casts the
+         item at path->nodes[0], slot
+         path->slots[0], to a struct
+         btrfs_file_extent_item - it does
+         not skip keys for the target
+         inode with a type less than
+         BTRFS_EXTENT_DATA_KEY
+         (BTRFS_INODE_REF_KEY < BTRFS_EXTENT_DATA_KEY)
+
+         sees a bogus value for the type
+         field triggering the WARN_ON in
+         the trace shown above, and sets
+         extent_end = search_start (4096)
+
+         does the if-then-else logic to
+         fixup 0 length extent items created
+         by a past bug from hole punching:
+
+           if (extent_end == key.offset &&
+               extent_end >= search_start)
+               goto delete_extent_item;
+
+         that evaluates to true and it ends
+         up deleting the key pointed to by
+         path->slots[0], (257 INODE_REF 4096),
+         from leaf X
+
+The same could happen for example for a xattr that ends up having a key
+with an offset value that matches search_start (very unlikely but not
+impossible).
+
+So fix this by ensuring that keys smaller than BTRFS_EXTENT_DATA_KEY are
+skipped, never casted to struct btrfs_file_extent_item and never deleted
+by accident. Also protect against the unexpected case of getting a key
+for a lower inode number by skipping that key and issuing a warning.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c |   16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -756,8 +756,16 @@ next_slot:
+               }
+ 
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+-              if (key.objectid > ino ||
+-                  key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
++
++              if (key.objectid > ino)
++                      break;
++              if (WARN_ON_ONCE(key.objectid < ino) ||
++                  key.type < BTRFS_EXTENT_DATA_KEY) {
++                      ASSERT(del_nr == 0);
++                      path->slots[0]++;
++                      goto next_slot;
++              }
++              if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
+                       break;
+ 
+               fi = btrfs_item_ptr(leaf, path->slots[0],
+@@ -776,8 +784,8 @@ next_slot:
+                               btrfs_file_extent_inline_len(leaf,
+                                                    path->slots[0], fi);
+               } else {
+-                      WARN_ON(1);
+-                      extent_end = search_start;
++                      /* can't happen */
++                      BUG();
+               }
+ 
+               /*
diff --git a/queue-4.1/btrfs-fix-race-when-listing-an-inode-s-xattrs.patch b/queue-4.1/btrfs-fix-race-when-listing-an-inode-s-xattrs.patch

new file mode 100644 (file)

index 0000000..0a13530
--- /dev/null
+++ b/queue-4.1/btrfs-fix-race-when-listing-an-inode-s-xattrs.patch
@@ -0,0 +1,92 @@
+From f1cd1f0b7d1b5d4aaa5711e8f4e4898b0045cb6d Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 9 Nov 2015 18:06:38 +0000
+Subject: Btrfs: fix race when listing an inode's xattrs
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit f1cd1f0b7d1b5d4aaa5711e8f4e4898b0045cb6d upstream.
+
+When listing a inode's xattrs we have a time window where we race against
+a concurrent operation for adding a new hard link for our inode that makes
+us not return any xattr to user space. In order for this to happen, the
+first xattr of our inode needs to be at slot 0 of a leaf and the previous
+leaf must still have room for an inode ref (or extref) item, and this can
+happen because an inode's listxattrs callback does not lock the inode's
+i_mutex (nor does the VFS does it for us), but adding a hard link to an
+inode makes the VFS lock the inode's i_mutex before calling the inode's
+link callback.
+
+If we have the following leafs:
+
+               Leaf X (has N items)                    Leaf Y
+
+ [ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ]  [ (257 XATTR_ITEM 12345), ... ]
+           slot N - 2         slot N - 1              slot 0
+
+The race illustrated by the following sequence diagram is possible:
+
+       CPU 1                                               CPU 2
+
+  btrfs_listxattr()
+
+    searches for key (257 XATTR_ITEM 0)
+
+    gets path with path->nodes[0] == leaf X
+    and path->slots[0] == N
+
+    because path->slots[0] is >=
+    btrfs_header_nritems(leaf X), it calls
+    btrfs_next_leaf()
+
+    btrfs_next_leaf()
+      releases the path
+
+                                                   adds key (257 INODE_REF 666)
+                                                   to the end of leaf X (slot N),
+                                                   and leaf X now has N + 1 items
+
+      searches for the key (257 INODE_REF 256),
+      with path->keep_locks == 1, because that
+      is the last key it saw in leaf X before
+      releasing the path
+
+      ends up at leaf X again and it verifies
+      that the key (257 INODE_REF 256) is no
+      longer the last key in leaf X, so it
+      returns with path->nodes[0] == leaf X
+      and path->slots[0] == N, pointing to
+      the new item with key (257 INODE_REF 666)
+
+    btrfs_listxattr's loop iteration sees that
+    the type of the key pointed by the path is
+    different from the type BTRFS_XATTR_ITEM_KEY
+    and so it breaks the loop and stops looking
+    for more xattr items
+      --> the application doesn't get any xattr
+          listed for our inode
+
+So fix this by breaking the loop only if the key's type is greater than
+BTRFS_XATTR_ITEM_KEY and skip the current key if its type is smaller.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/xattr.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/xattr.c
++++ b/fs/btrfs/xattr.c
+@@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *d
+               /* check to make sure this item is what we want */
+               if (found_key.objectid != key.objectid)
+                       break;
+-              if (found_key.type != BTRFS_XATTR_ITEM_KEY)
++              if (found_key.type > BTRFS_XATTR_ITEM_KEY)
+                       break;
++              if (found_key.type < BTRFS_XATTR_ITEM_KEY)
++                      goto next;
+ 
+               di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+               if (verify_dir_item(root, leaf, di))
diff --git a/queue-4.1/btrfs-fix-truncation-of-compressed-and-inlined-extents.patch b/queue-4.1/btrfs-fix-truncation-of-compressed-and-inlined-extents.patch

new file mode 100644 (file)

index 0000000..d4d7f44
--- /dev/null
+++ b/queue-4.1/btrfs-fix-truncation-of-compressed-and-inlined-extents.patch
@@ -0,0 +1,288 @@
+From 0305cd5f7fca85dae392b9ba85b116896eb7c1c7 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Fri, 16 Oct 2015 12:34:25 +0100
+Subject: Btrfs: fix truncation of compressed and inlined extents
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 0305cd5f7fca85dae392b9ba85b116896eb7c1c7 upstream.
+
+When truncating a file to a smaller size which consists of an inline
+extent that is compressed, we did not discard (or made unusable) the
+data between the new file size and the old file size, wasting metadata
+space and allowing for the truncated data to be leaked and the data
+corruption/loss mentioned below.
+We were also not correctly decrementing the number of bytes used by the
+inode, we were setting it to zero, giving a wrong report for callers of
+the stat(2) syscall. The fsck tool also reported an error about a mismatch
+between the nbytes of the file versus the real space used by the file.
+
+Now because we weren't discarding the truncated region of the file, it
+was possible for a caller of the clone ioctl to actually read the data
+that was truncated, allowing for a security breach without requiring root
+access to the system, using only standard filesystem operations. The
+scenario is the following:
+
+   1) User A creates a file which consists of an inline and compressed
+      extent with a size of 2000 bytes - the file is not accessible to
+      any other users (no read, write or execution permission for anyone
+      else);
+
+   2) The user truncates the file to a size of 1000 bytes;
+
+   3) User A makes the file world readable;
+
+   4) User B creates a file consisting of an inline extent of 2000 bytes;
+
+   5) User B issues a clone operation from user A's file into its own
+      file (using a length argument of 0, clone the whole range);
+
+   6) User B now gets to see the 1000 bytes that user A truncated from
+      its file before it made its file world readbale. User B also lost
+      the bytes in the range [1000, 2000[ bytes from its own file, but
+      that might be ok if his/her intention was reading stale data from
+      user A that was never supposed to be public.
+
+Note that this contrasts with the case where we truncate a file from 2000
+bytes to 1000 bytes and then truncate it back from 1000 to 2000 bytes. In
+this case reading any byte from the range [1000, 2000[ will return a value
+of 0x00, instead of the original data.
+
+This problem exists since the clone ioctl was added and happens both with
+and without my recent data loss and file corruption fixes for the clone
+ioctl (patch "Btrfs: fix file corruption and data loss after cloning
+inline extents").
+
+So fix this by truncating the compressed inline extents as we do for the
+non-compressed case, which involves decompressing, if the data isn't already
+in the page cache, compressing the truncated version of the extent, writing
+the compressed content into the inline extent and then truncate it.
+
+The following test case for fstests reproduces the problem. In order for
+the test to pass both this fix and my previous fix for the clone ioctl
+that forbids cloning a smaller inline extent into a larger one,
+which is titled "Btrfs: fix file corruption and data loss after cloning
+inline extents", are needed. Without that other fix the test fails in a
+different way that does not leak the truncated data, instead part of
+destination file gets replaced with zeroes (because the destination file
+has a larger inline extent than the source).
+
+  seq=`basename $0`
+  seqres=$RESULT_DIR/$seq
+  echo "QA output created by $seq"
+  tmp=/tmp/$$
+  status=1     # failure is the default!
+  trap "_cleanup; exit \$status" 0 1 2 3 15
+
+  _cleanup()
+  {
+      rm -f $tmp.*
+  }
+
+  # get standard environment, filters and checks
+  . ./common/rc
+  . ./common/filter
+
+  # real QA test starts here
+  _need_to_be_root
+  _supported_fs btrfs
+  _supported_os Linux
+  _require_scratch
+  _require_cloner
+
+  rm -f $seqres.full
+
+  _scratch_mkfs >>$seqres.full 2>&1
+  _scratch_mount "-o compress"
+
+  # Create our test files. File foo is going to be the source of a clone operation
+  # and consists of a single inline extent with an uncompressed size of 512 bytes,
+  # while file bar consists of a single inline extent with an uncompressed size of
+  # 256 bytes. For our test's purpose, it's important that file bar has an inline
+  # extent with a size smaller than foo's inline extent.
+  $XFS_IO_PROG -f -c "pwrite -S 0xa1 0 128"   \
+          -c "pwrite -S 0x2a 128 384" \
+          $SCRATCH_MNT/foo | _filter_xfs_io
+  $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 256" $SCRATCH_MNT/bar | _filter_xfs_io
+
+  # Now durably persist all metadata and data. We do this to make sure that we get
+  # on disk an inline extent with a size of 512 bytes for file foo.
+  sync
+
+  # Now truncate our file foo to a smaller size. Because it consists of a
+  # compressed and inline extent, btrfs did not shrink the inline extent to the
+  # new size (if the extent was not compressed, btrfs would shrink it to 128
+  # bytes), it only updates the inode's i_size to 128 bytes.
+  $XFS_IO_PROG -c "truncate 128" $SCRATCH_MNT/foo
+
+  # Now clone foo's inline extent into bar.
+  # This clone operation should fail with errno EOPNOTSUPP because the source
+  # file consists only of an inline extent and the file's size is smaller than
+  # the inline extent of the destination (128 bytes < 256 bytes). However the
+  # clone ioctl was not prepared to deal with a file that has a size smaller
+  # than the size of its inline extent (something that happens only for compressed
+  # inline extents), resulting in copying the full inline extent from the source
+  # file into the destination file.
+  #
+  # Note that btrfs' clone operation for inline extents consists of removing the
+  # inline extent from the destination inode and copy the inline extent from the
+  # source inode into the destination inode, meaning that if the destination
+  # inode's inline extent is larger (N bytes) than the source inode's inline
+  # extent (M bytes), some bytes (N - M bytes) will be lost from the destination
+  # file. Btrfs could copy the source inline extent's data into the destination's
+  # inline extent so that we would not lose any data, but that's currently not
+  # done due to the complexity that would be needed to deal with such cases
+  # (specially when one or both extents are compressed), returning EOPNOTSUPP, as
+  # it's normally not a very common case to clone very small files (only case
+  # where we get inline extents) and copying inline extents does not save any
+  # space (unlike for normal, non-inlined extents).
+  $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
+
+  # Now because the above clone operation used to succeed, and due to foo's inline
+  # extent not being shinked by the truncate operation, our file bar got the whole
+  # inline extent copied from foo, making us lose the last 128 bytes from bar
+  # which got replaced by the bytes in range [128, 256[ from foo before foo was
+  # truncated - in other words, data loss from bar and being able to read old and
+  # stale data from foo that should not be possible to read anymore through normal
+  # filesystem operations. Contrast with the case where we truncate a file from a
+  # size N to a smaller size M, truncate it back to size N and then read the range
+  # [M, N[, we should always get the value 0x00 for all the bytes in that range.
+
+  # We expected the clone operation to fail with errno EOPNOTSUPP and therefore
+  # not modify our file's bar data/metadata. So its content should be 256 bytes
+  # long with all bytes having the value 0xbb.
+  #
+  # Without the btrfs bug fix, the clone operation succeeded and resulted in
+  # leaking truncated data from foo, the bytes that belonged to its range
+  # [128, 256[, and losing data from bar in that same range. So reading the
+  # file gave us the following content:
+  #
+  # 0000000 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1
+  # *
+  # 0000200 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a
+  # *
+  # 0000400
+  echo "File bar's content after the clone operation:"
+  od -t x1 $SCRATCH_MNT/bar
+
+  # Also because the foo's inline extent was not shrunk by the truncate
+  # operation, btrfs' fsck, which is run by the fstests framework everytime a
+  # test completes, failed reporting the following error:
+  #
+  #  root 5 inode 257 errors 400, nbytes wrong
+
+  status=0
+  exit
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |   82 +++++++++++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 68 insertions(+), 14 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -4184,6 +4184,47 @@ static int truncate_space_check(struct b
+ 
+ }
+ 
++static int truncate_inline_extent(struct inode *inode,
++                                struct btrfs_path *path,
++                                struct btrfs_key *found_key,
++                                const u64 item_end,
++                                const u64 new_size)
++{
++      struct extent_buffer *leaf = path->nodes[0];
++      int slot = path->slots[0];
++      struct btrfs_file_extent_item *fi;
++      u32 size = (u32)(new_size - found_key->offset);
++      struct btrfs_root *root = BTRFS_I(inode)->root;
++
++      fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
++
++      if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
++              loff_t offset = new_size;
++              loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
++
++              /*
++               * Zero out the remaining of the last page of our inline extent,
++               * instead of directly truncating our inline extent here - that
++               * would be much more complex (decompressing all the data, then
++               * compressing the truncated data, which might be bigger than
++               * the size of the inline extent, resize the extent, etc).
++               * We release the path because to get the page we might need to
++               * read the extent item from disk (data not in the page cache).
++               */
++              btrfs_release_path(path);
++              return btrfs_truncate_page(inode, offset, page_end - offset, 0);
++      }
++
++      btrfs_set_file_extent_ram_bytes(leaf, fi, size);
++      size = btrfs_file_extent_calc_inline_size(size);
++      btrfs_truncate_item(root, path, size, 1);
++
++      if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
++              inode_sub_bytes(inode, item_end + 1 - new_size);
++
++      return 0;
++}
++
+ /*
+  * this can truncate away extent items, csum items and directory items.
+  * It starts at a high offset and removes keys until it can't find
+@@ -4378,27 +4419,40 @@ search_again:
+                        * special encodings
+                        */
+                       if (!del_item &&
+-                          btrfs_file_extent_compression(leaf, fi) == 0 &&
+                           btrfs_file_extent_encryption(leaf, fi) == 0 &&
+                           btrfs_file_extent_other_encoding(leaf, fi) == 0) {
+-                              u32 size = new_size - found_key.offset;
+-
+-                              if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+-                                      inode_sub_bytes(inode, item_end + 1 -
+-                                                      new_size);
+ 
+                               /*
+-                               * update the ram bytes to properly reflect
+-                               * the new size of our item
++                               * Need to release path in order to truncate a
++                               * compressed extent. So delete any accumulated
++                               * extent items so far.
+                                */
+-                              btrfs_set_file_extent_ram_bytes(leaf, fi, size);
+-                              size =
+-                                  btrfs_file_extent_calc_inline_size(size);
+-                              btrfs_truncate_item(root, path, size, 1);
++                              if (btrfs_file_extent_compression(leaf, fi) !=
++                                  BTRFS_COMPRESS_NONE && pending_del_nr) {
++                                      err = btrfs_del_items(trans, root, path,
++                                                            pending_del_slot,
++                                                            pending_del_nr);
++                                      if (err) {
++                                              btrfs_abort_transaction(trans,
++                                                                      root,
++                                                                      err);
++                                              goto error;
++                                      }
++                                      pending_del_nr = 0;
++                              }
++
++                              err = truncate_inline_extent(inode, path,
++                                                           &found_key,
++                                                           item_end,
++                                                           new_size);
++                              if (err) {
++                                      btrfs_abort_transaction(trans,
++                                                              root, err);
++                                      goto error;
++                              }
+                       } else if (test_bit(BTRFS_ROOT_REF_COWS,
+                                           &root->state)) {
+-                              inode_sub_bytes(inode, item_end + 1 -
+-                                              found_key.offset);
++                              inode_sub_bytes(inode, item_end + 1 - new_size);
+                       }
+               }
+ delete:
diff --git a/queue-4.1/ceph-fix-message-length-computation.patch b/queue-4.1/ceph-fix-message-length-computation.patch

new file mode 100644 (file)

index 0000000..e4059b8
--- /dev/null
+++ b/queue-4.1/ceph-fix-message-length-computation.patch
@@ -0,0 +1,37 @@
+From 777d738a5e58ba3b6f3932ab1543ce93703f4873 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 30 Sep 2015 15:04:42 +0200
+Subject: ceph: fix message length computation
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 777d738a5e58ba3b6f3932ab1543ce93703f4873 upstream.
+
+create_request_message() computes the maximum length of a message,
+but uses the wrong type for the time stamp: sizeof(struct timespec)
+may be 8 or 16 depending on the architecture, while sizeof(struct
+ceph_timespec) is always 8, and that is what gets put into the
+message.
+
+Found while auditing the uses of timespec for y2038 problems.
+
+Fixes: b8e69066d8af ("ceph: include time stamp in every MDS request")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Yan, Zheng <zyan@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/mds_client.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -1905,7 +1905,7 @@ static struct ceph_msg *create_request_m
+ 
+       len = sizeof(*head) +
+               pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
+-              sizeof(struct timespec);
++              sizeof(struct ceph_timespec);
+ 
+       /* calculate (max) length for cap releases */
+       len += sizeof(struct ceph_mds_request_release) *
diff --git a/queue-4.1/debugfs-fix-refcount-imbalance-in-start_creating.patch b/queue-4.1/debugfs-fix-refcount-imbalance-in-start_creating.patch

new file mode 100644 (file)

index 0000000..a1eebf4
--- /dev/null
+++ b/queue-4.1/debugfs-fix-refcount-imbalance-in-start_creating.patch
@@ -0,0 +1,47 @@
+From 0ee9608c89e81a1ccee52ecb58a7ff040e2522d9 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 5 Nov 2015 00:01:51 +0100
+Subject: debugfs: fix refcount imbalance in start_creating
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 0ee9608c89e81a1ccee52ecb58a7ff040e2522d9 upstream.
+
+In debugfs' start_creating(), we pin the file system to safely access
+its root. When we failed to create a file, we unpin the file system via
+failed_creating() to release the mount count and eventually the reference
+of the vfsmount.
+
+However, when we run into an error during lookup_one_len() when still
+in start_creating(), we only release the parent's mutex but not so the
+reference on the mount. Looks like it was done in the past, but after
+splitting portions of __create_file() into start_creating() and
+end_creating() via 190afd81e4a5 ("debugfs: split the beginning and the
+end of __create_file() off"), this seemed missed. Noticed during code
+review.
+
+Fixes: 190afd81e4a5 ("debugfs: split the beginning and the end of __create_file() off")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/debugfs/inode.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/debugfs/inode.c
++++ b/fs/debugfs/inode.c
+@@ -276,8 +276,12 @@ static struct dentry *start_creating(con
+               dput(dentry);
+               dentry = ERR_PTR(-EEXIST);
+       }
+-      if (IS_ERR(dentry))
++
++      if (IS_ERR(dentry)) {
+               mutex_unlock(&d_inode(parent)->i_mutex);
++              simple_release_fs(&debugfs_mount, &debugfs_mount_count);
++      }
++
+       return dentry;
+ }
+ 
diff --git a/queue-4.1/ext4-crypto-fix-memory-leak-in-ext4_bio_write_page.patch b/queue-4.1/ext4-crypto-fix-memory-leak-in-ext4_bio_write_page.patch

new file mode 100644 (file)

index 0000000..a027d21
--- /dev/null
+++ b/queue-4.1/ext4-crypto-fix-memory-leak-in-ext4_bio_write_page.patch
@@ -0,0 +1,51 @@
+From 937d7b84dca58f2565715f2c8e52f14c3d65fb22 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Fri, 2 Oct 2015 23:54:58 -0400
+Subject: ext4 crypto: fix memory leak in ext4_bio_write_page()
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 937d7b84dca58f2565715f2c8e52f14c3d65fb22 upstream.
+
+There are times when ext4_bio_write_page() is called even though we
+don't actually need to do any I/O.  This happens when ext4_writepage()
+gets called by the jbd2 commit path when an inode needs to force its
+pages written out in order to provide data=ordered guarantees --- and
+a page is backed by an unwritten (e.g., uninitialized) block on disk,
+or if delayed allocation means the page's backing store hasn't been
+allocated yet.  In that case, we need to skip the call to
+ext4_encrypt_page(), since in addition to wasting CPU, it leads to a
+bounce page and an ext4 crypto context getting leaked.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/page-io.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -426,6 +426,7 @@ int ext4_bio_write_page(struct ext4_io_s
+       struct buffer_head *bh, *head;
+       int ret = 0;
+       int nr_submitted = 0;
++      int nr_to_submit = 0;
+ 
+       blocksize = 1 << inode->i_blkbits;
+ 
+@@ -478,11 +479,13 @@ int ext4_bio_write_page(struct ext4_io_s
+                       unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+               }
+               set_buffer_async_write(bh);
++              nr_to_submit++;
+       } while ((bh = bh->b_this_page) != head);
+ 
+       bh = head = page_buffers(page);
+ 
+-      if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
++      if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) &&
++          nr_to_submit) {
+               data_page = ext4_encrypt(inode, page);
+               if (IS_ERR(data_page)) {
+                       ret = PTR_ERR(data_page);
diff --git a/queue-4.1/ext4-fix-potential-use-after-free-in-__ext4_journal_stop.patch b/queue-4.1/ext4-fix-potential-use-after-free-in-__ext4_journal_stop.patch

new file mode 100644 (file)

index 0000000..a427ddc
--- /dev/null
+++ b/queue-4.1/ext4-fix-potential-use-after-free-in-__ext4_journal_stop.patch
@@ -0,0 +1,44 @@
+From 6934da9238da947628be83635e365df41064b09b Mon Sep 17 00:00:00 2001
+From: Lukas Czerner <lczerner@redhat.com>
+Date: Sat, 17 Oct 2015 22:57:06 -0400
+Subject: ext4: fix potential use after free in __ext4_journal_stop
+
+From: Lukas Czerner <lczerner@redhat.com>
+
+commit 6934da9238da947628be83635e365df41064b09b upstream.
+
+There is a use-after-free possibility in __ext4_journal_stop() in the
+case that we free the handle in the first jbd2_journal_stop() because
+we're referencing handle->h_err afterwards. This was introduced in
+9705acd63b125dee8b15c705216d7186daea4625 and it is wrong. Fix it by
+storing the handle->h_err value beforehand and avoid referencing
+potentially freed handle.
+
+Fixes: 9705acd63b125dee8b15c705216d7186daea4625
+Signed-off-by: Lukas Czerner <lczerner@redhat.com>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4_jbd2.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/ext4_jbd2.c
++++ b/fs/ext4/ext4_jbd2.c
+@@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *wher
+               return 0;
+       }
+ 
++      err = handle->h_err;
+       if (!handle->h_transaction) {
+-              err = jbd2_journal_stop(handle);
+-              return handle->h_err ? handle->h_err : err;
++              rc = jbd2_journal_stop(handle);
++              return err ? err : rc;
+       }
+ 
+       sb = handle->h_transaction->t_journal->j_private;
+-      err = handle->h_err;
+       rc = jbd2_journal_stop(handle);
+ 
+       if (!err)
diff --git a/queue-4.1/ext4-jbd2-ensure-entering-into-panic-after-recording-an-error-in-superblock.patch b/queue-4.1/ext4-jbd2-ensure-entering-into-panic-after-recording-an-error-in-superblock.patch

new file mode 100644 (file)

index 0000000..aec7730
--- /dev/null
+++ b/queue-4.1/ext4-jbd2-ensure-entering-into-panic-after-recording-an-error-in-superblock.patch
@@ -0,0 +1,104 @@
+From 4327ba52afd03fc4b5afa0ee1d774c9c5b0e85c5 Mon Sep 17 00:00:00 2001
+From: Daeho Jeong <daeho.jeong@samsung.com>
+Date: Sun, 18 Oct 2015 17:02:56 -0400
+Subject: ext4, jbd2: ensure entering into panic after recording an error in superblock
+
+From: Daeho Jeong <daeho.jeong@samsung.com>
+
+commit 4327ba52afd03fc4b5afa0ee1d774c9c5b0e85c5 upstream.
+
+If a EXT4 filesystem utilizes JBD2 journaling and an error occurs, the
+journaling will be aborted first and the error number will be recorded
+into JBD2 superblock and, finally, the system will enter into the
+panic state in "errors=panic" option.  But, in the rare case, this
+sequence is little twisted like the below figure and it will happen
+that the system enters into panic state, which means the system reset
+in mobile environment, before completion of recording an error in the
+journal superblock. In this case, e2fsck cannot recognize that the
+filesystem failure occurred in the previous run and the corruption
+wouldn't be fixed.
+
+Task A                        Task B
+ext4_handle_error()
+-> jbd2_journal_abort()
+  -> __journal_abort_soft()
+    -> __jbd2_journal_abort_hard()
+    | -> journal->j_flags |= JBD2_ABORT;
+    |
+    |                         __ext4_abort()
+    |                         -> jbd2_journal_abort()
+    |                         | -> __journal_abort_soft()
+    |                         |   -> if (journal->j_flags & JBD2_ABORT)
+    |                         |           return;
+    |                         -> panic()
+    |
+    -> jbd2_journal_update_sb_errno()
+
+Tested-by: Hobin Woo <hobin.woo@samsung.com>
+Signed-off-by: Daeho Jeong <daeho.jeong@samsung.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c      |   12 ++++++++++--
+ fs/jbd2/journal.c    |    6 +++++-
+ include/linux/jbd2.h |    1 +
+ 3 files changed, 16 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -396,9 +396,13 @@ static void ext4_handle_error(struct sup
+               smp_wmb();
+               sb->s_flags |= MS_RDONLY;
+       }
+-      if (test_opt(sb, ERRORS_PANIC))
++      if (test_opt(sb, ERRORS_PANIC)) {
++              if (EXT4_SB(sb)->s_journal &&
++                !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
++                      return;
+               panic("EXT4-fs (device %s): panic forced after error\n",
+                       sb->s_id);
++      }
+ }
+ 
+ #define ext4_error_ratelimit(sb)                                      \
+@@ -587,8 +591,12 @@ void __ext4_abort(struct super_block *sb
+                       jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+               save_error_info(sb, function, line);
+       }
+-      if (test_opt(sb, ERRORS_PANIC))
++      if (test_opt(sb, ERRORS_PANIC)) {
++              if (EXT4_SB(sb)->s_journal &&
++                !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
++                      return;
+               panic("EXT4-fs panic from previous error\n");
++      }
+ }
+ 
+ void __ext4_msg(struct super_block *sb,
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -2086,8 +2086,12 @@ static void __journal_abort_soft (journa
+ 
+       __jbd2_journal_abort_hard(journal);
+ 
+-      if (errno)
++      if (errno) {
+               jbd2_journal_update_sb_errno(journal);
++              write_lock(&journal->j_state_lock);
++              journal->j_flags |= JBD2_REC_ERR;
++              write_unlock(&journal->j_state_lock);
++      }
+ }
+ 
+ /**
+--- a/include/linux/jbd2.h
++++ b/include/linux/jbd2.h
+@@ -1007,6 +1007,7 @@ struct journal_s
+ #define JBD2_ABORT_ON_SYNCDATA_ERR    0x040   /* Abort the journal on file
+                                                * data write error in ordered
+                                                * mode */
++#define JBD2_REC_ERR  0x080   /* The errno in the sb has been recorded */
+ 
+ /*
+  * Function declarations for the journaling transaction and buffer
diff --git a/queue-4.1/firewire-ohci-fix-jmicron-jmb38x-it-context-discovery.patch b/queue-4.1/firewire-ohci-fix-jmicron-jmb38x-it-context-discovery.patch

new file mode 100644 (file)

index 0000000..a034d06
--- /dev/null
+++ b/queue-4.1/firewire-ohci-fix-jmicron-jmb38x-it-context-discovery.patch
@@ -0,0 +1,71 @@
+From 100ceb66d5c40cc0c7018e06a9474302470be73c Mon Sep 17 00:00:00 2001
+From: Stefan Richter <stefanr@s5r6.in-berlin.de>
+Date: Tue, 3 Nov 2015 01:46:21 +0100
+Subject: firewire: ohci: fix JMicron JMB38x IT context discovery
+
+From: Stefan Richter <stefanr@s5r6.in-berlin.de>
+
+commit 100ceb66d5c40cc0c7018e06a9474302470be73c upstream.
+
+Reported by Clifford and Craig for JMicron OHCI-1394 + SDHCI combo
+controllers:  Often or even most of the time, the controller is
+initialized with the message "added OHCI v1.10 device as card 0, 4 IR +
+0 IT contexts, quirks 0x10".  With 0 isochronous transmit DMA contexts
+(IT contexts), applications like audio output are impossible.
+
+However, OHCI-1394 demands that at least 4 IT contexts are implemented
+by the link layer controller, and indeed JMicron JMB38x do implement
+four of them.  Only their IsoXmitIntMask register is unreliable at early
+access.
+
+With my own JMB381 single function controller I found:
+  - I can reproduce the problem with a lower probability than Craig's.
+  - If I put a loop around the section which clears and reads
+    IsoXmitIntMask, then either the first or the second attempt will
+    return the correct initial mask of 0x0000000f.  I never encountered
+    a case of needing more than a second attempt.
+  - Consequently, if I put a dummy reg_read(...IsoXmitIntMaskSet)
+    before the first write, the subsequent read will return the correct
+    result.
+  - If I merely ignore a wrong read result and force the known real
+    result, later isochronous transmit DMA usage works just fine.
+
+So let's just fix this chip bug up by the latter method.  Tested with
+JMB381 on kernel 3.13 and 4.3.
+
+Since OHCI-1394 generally requires 4 IT contexts at a minium, this
+workaround is simply applied whenever the initial read of IsoXmitIntMask
+returns 0, regardless whether it's a JMicron chip or not.  I never heard
+of this issue together with any other chip though.
+
+I am not 100% sure that this fix works on the OHCI-1394 part of JMB380
+and JMB388 combo controllers exactly the same as on the JMB381 single-
+function controller, but so far I haven't had a chance to let an owner
+of a combo chip run a patched kernel.
+
+Strangely enough, IsoRecvIntMask is always reported correctly, even
+though it is probed right before IsoXmitIntMask.
+
+Reported-by: Clifford Dunn
+Reported-by: Craig Moore <craig.moore@qenos.com>
+Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/firewire/ohci.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/firewire/ohci.c
++++ b/drivers/firewire/ohci.c
+@@ -3675,6 +3675,11 @@ static int pci_probe(struct pci_dev *dev
+ 
+       reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, ~0);
+       ohci->it_context_support = reg_read(ohci, OHCI1394_IsoXmitIntMaskSet);
++      /* JMicron JMB38x often shows 0 at first read, just ignore it */
++      if (!ohci->it_context_support) {
++              ohci_notice(ohci, "overriding IsoXmitIntMask\n");
++              ohci->it_context_support = 0xf;
++      }
+       reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, ~0);
+       ohci->it_context_mask = ohci->it_context_support;
+       ohci->n_it = hweight32(ohci->it_context_mask);
diff --git a/queue-4.1/nfs-if-we-have-no-valid-attrs-then-don-t-declare-the-attribute-cache-valid.patch b/queue-4.1/nfs-if-we-have-no-valid-attrs-then-don-t-declare-the-attribute-cache-valid.patch

new file mode 100644 (file)

index 0000000..7a18a7b
--- /dev/null
+++ b/queue-4.1/nfs-if-we-have-no-valid-attrs-then-don-t-declare-the-attribute-cache-valid.patch
@@ -0,0 +1,39 @@
+From c812012f9ca7cf89c9e1a1cd512e6c3b5be04b85 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@poochiereds.net>
+Date: Wed, 25 Nov 2015 13:50:11 -0500
+Subject: nfs: if we have no valid attrs, then don't declare the attribute cache valid
+
+From: Jeff Layton <jlayton@poochiereds.net>
+
+commit c812012f9ca7cf89c9e1a1cd512e6c3b5be04b85 upstream.
+
+If we pass in an empty nfs_fattr struct to nfs_update_inode, it will
+(correctly) not update any of the attributes, but it then clears the
+NFS_INO_INVALID_ATTR flag, which indicates that the attributes are
+up to date. Don't clear the flag if the fattr struct has no valid
+attrs to apply.
+
+Reviewed-by: Steve French <steve.french@primarydata.com>
+Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/inode.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/nfs/inode.c
++++ b/fs/nfs/inode.c
+@@ -1813,7 +1813,11 @@ static int nfs_update_inode(struct inode
+               if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+                       nfsi->attr_gencount = fattr->gencount;
+       }
+-      invalid &= ~NFS_INO_INVALID_ATTR;
++
++      /* Don't declare attrcache up to date if there were no attrs! */
++      if (fattr->valid != 0)
++              invalid &= ~NFS_INO_INVALID_ATTR;
++
+       /* Don't invalidate the data if we were to blame */
+       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
+                               || S_ISLNK(inode->i_mode)))
diff --git a/queue-4.1/nfs4-start-callback_ident-at-idr-1.patch b/queue-4.1/nfs4-start-callback_ident-at-idr-1.patch

new file mode 100644 (file)

index 0000000..c9ef318
--- /dev/null
+++ b/queue-4.1/nfs4-start-callback_ident-at-idr-1.patch
@@ -0,0 +1,33 @@
+From c68a027c05709330fe5b2f50c50d5fa02124b5d8 Mon Sep 17 00:00:00 2001
+From: Benjamin Coddington <bcodding@redhat.com>
+Date: Fri, 20 Nov 2015 09:56:20 -0500
+Subject: nfs4: start callback_ident at idr 1
+
+From: Benjamin Coddington <bcodding@redhat.com>
+
+commit c68a027c05709330fe5b2f50c50d5fa02124b5d8 upstream.
+
+If clp->cl_cb_ident is zero, then nfs_cb_idr_remove_locked() skips removing
+it when the nfs_client is freed.  A decoding or server bug can then find
+and try to put that first nfs_client which would lead to a crash.
+
+Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
+Fixes: d6870312659d ("nfs4client: convert to idr_alloc()")
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4client.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/nfs/nfs4client.c
++++ b/fs/nfs/nfs4client.c
+@@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct n
+               return ret;
+       idr_preload(GFP_KERNEL);
+       spin_lock(&nn->nfs_client_lock);
+-      ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
++      ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT);
+       if (ret >= 0)
+               clp->cl_cb_ident = ret;
+       spin_unlock(&nn->nfs_client_lock);
diff --git a/queue-4.1/nfsd-eliminate-sending-duplicate-and-repeated-delegations.patch b/queue-4.1/nfsd-eliminate-sending-duplicate-and-repeated-delegations.patch

new file mode 100644 (file)

index 0000000..e4631f7
--- /dev/null
+++ b/queue-4.1/nfsd-eliminate-sending-duplicate-and-repeated-delegations.patch
@@ -0,0 +1,200 @@
+From 34ed9872e745fa56f10e9bef2cf3d2336c6c8816 Mon Sep 17 00:00:00 2001
+From: Andrew Elble <aweits@rit.edu>
+Date: Thu, 15 Oct 2015 12:07:28 -0400
+Subject: nfsd: eliminate sending duplicate and repeated delegations
+
+From: Andrew Elble <aweits@rit.edu>
+
+commit 34ed9872e745fa56f10e9bef2cf3d2336c6c8816 upstream.
+
+We've observed the nfsd server in a state where there are
+multiple delegations on the same nfs4_file for the same client.
+The nfs client does attempt to DELEGRETURN these when they are presented to
+it - but apparently under some (unknown) circumstances the client does not
+manage to return all of them. This leads to the eventual
+attempt to CB_RECALL more than one delegation with the same nfs
+filehandle to the same client. The first recall will succeed, but the
+next recall will fail with NFS4ERR_BADHANDLE. This leads to the server
+having delegations on cl_revoked that the client has no way to FREE
+or DELEGRETURN, with resulting inability to recover. The state manager
+on the server will continually assert SEQ4_STATUS_RECALLABLE_STATE_REVOKED,
+and the state manager on the client will be looping unable to satisfy
+the server.
+
+List discussion also reports a race between OPEN and DELEGRETURN that
+will be avoided by only sending the delegation once to the
+client. This is also logically in accordance with RFC5561 9.1.1 and 10.2.
+
+So, let's:
+
+1.) Not hand out duplicate delegations.
+2.) Only send them to the client once.
+
+RFC 5561:
+
+9.1.1:
+"Delegations and layouts, on the other hand, are not associated with a
+specific owner but are associated with the client as a whole
+(identified by a client ID)."
+
+10.2:
+"...the stateid for a delegation is associated with a client ID and may be
+used on behalf of all the open-owners for the given client.  A
+delegation is made to the client as a whole and not to any specific
+process or thread of control within it."
+
+Reported-by: Eric Meddaugh <etmsys@rit.edu>
+Cc: Trond Myklebust <trond.myklebust@primarydata.com>
+Cc: Olga Kornievskaia <aglo@umich.edu>
+Signed-off-by: Andrew Elble <aweits@rit.edu>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4state.c |   94 ++++++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 84 insertions(+), 10 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -765,16 +765,68 @@ void nfs4_unhash_stid(struct nfs4_stid *
+       s->sc_type = 0;
+ }
+ 
+-static void
++/**
++ * nfs4_get_existing_delegation - Discover if this delegation already exists
++ * @clp:     a pointer to the nfs4_client we're granting a delegation to
++ * @fp:      a pointer to the nfs4_file we're granting a delegation on
++ *
++ * Return:
++ *      On success: NULL if an existing delegation was not found.
++ *
++ *      On error: -EAGAIN if one was previously granted to this nfs4_client
++ *                 for this nfs4_file.
++ *
++ */
++
++static int
++nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
++{
++      struct nfs4_delegation *searchdp = NULL;
++      struct nfs4_client *searchclp = NULL;
++
++      lockdep_assert_held(&state_lock);
++      lockdep_assert_held(&fp->fi_lock);
++
++      list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
++              searchclp = searchdp->dl_stid.sc_client;
++              if (clp == searchclp) {
++                      return -EAGAIN;
++              }
++      }
++      return 0;
++}
++
++/**
++ * hash_delegation_locked - Add a delegation to the appropriate lists
++ * @dp:     a pointer to the nfs4_delegation we are adding.
++ * @fp:     a pointer to the nfs4_file we're granting a delegation on
++ *
++ * Return:
++ *      On success: NULL if the delegation was successfully hashed.
++ *
++ *      On error: -EAGAIN if one was previously granted to this
++ *                 nfs4_client for this nfs4_file. Delegation is not hashed.
++ *
++ */
++
++static int
+ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
+ {
++      int status;
++      struct nfs4_client *clp = dp->dl_stid.sc_client;
++
+       lockdep_assert_held(&state_lock);
+       lockdep_assert_held(&fp->fi_lock);
+ 
++      status = nfs4_get_existing_delegation(clp, fp);
++      if (status)
++              return status;
++      ++fp->fi_delegees;
+       atomic_inc(&dp->dl_stid.sc_count);
+       dp->dl_stid.sc_type = NFS4_DELEG_STID;
+       list_add(&dp->dl_perfile, &fp->fi_delegations);
+-      list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
++      list_add(&dp->dl_perclnt, &clp->cl_delegations);
++      return 0;
+ }
+ 
+ static bool
+@@ -3941,6 +3993,18 @@ static struct file_lock *nfs4_alloc_init
+       return fl;
+ }
+ 
++/**
++ * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer
++ * @dp:   a pointer to the nfs4_delegation we're adding.
++ *
++ * Return:
++ *      On success: Return code will be 0 on success.
++ *
++ *      On error: -EAGAIN if there was an existing delegation.
++ *                 nonzero if there is an error in other cases.
++ *
++ */
++
+ static int nfs4_setlease(struct nfs4_delegation *dp)
+ {
+       struct nfs4_file *fp = dp->dl_stid.sc_file;
+@@ -3972,16 +4036,19 @@ static int nfs4_setlease(struct nfs4_del
+               goto out_unlock;
+       /* Race breaker */
+       if (fp->fi_deleg_file) {
+-              status = 0;
+-              ++fp->fi_delegees;
+-              hash_delegation_locked(dp, fp);
++              status = hash_delegation_locked(dp, fp);
+               goto out_unlock;
+       }
+       fp->fi_deleg_file = filp;
+-      fp->fi_delegees = 1;
+-      hash_delegation_locked(dp, fp);
++      fp->fi_delegees = 0;
++      status = hash_delegation_locked(dp, fp);
+       spin_unlock(&fp->fi_lock);
+       spin_unlock(&state_lock);
++      if (status) {
++              /* Should never happen, this is a new fi_deleg_file  */
++              WARN_ON_ONCE(1);
++              goto out_fput;
++      }
+       return 0;
+ out_unlock:
+       spin_unlock(&fp->fi_lock);
+@@ -4001,6 +4068,15 @@ nfs4_set_delegation(struct nfs4_client *
+       if (fp->fi_had_conflict)
+               return ERR_PTR(-EAGAIN);
+ 
++      spin_lock(&state_lock);
++      spin_lock(&fp->fi_lock);
++      status = nfs4_get_existing_delegation(clp, fp);
++      spin_unlock(&fp->fi_lock);
++      spin_unlock(&state_lock);
++
++      if (status)
++              return ERR_PTR(status);
++
+       dp = alloc_init_deleg(clp, fh, odstate);
+       if (!dp)
+               return ERR_PTR(-ENOMEM);
+@@ -4019,9 +4095,7 @@ nfs4_set_delegation(struct nfs4_client *
+               status = -EAGAIN;
+               goto out_unlock;
+       }
+-      ++fp->fi_delegees;
+-      hash_delegation_locked(dp, fp);
+-      status = 0;
++      status = hash_delegation_locked(dp, fp);
+ out_unlock:
+       spin_unlock(&fp->fi_lock);
+       spin_unlock(&state_lock);
diff --git a/queue-4.1/nfsd-serialize-state-seqid-morphing-operations.patch b/queue-4.1/nfsd-serialize-state-seqid-morphing-operations.patch

new file mode 100644 (file)

index 0000000..388e6cd
--- /dev/null
+++ b/queue-4.1/nfsd-serialize-state-seqid-morphing-operations.patch
@@ -0,0 +1,207 @@
+From 35a92fe8770ce54c5eb275cd76128645bea2d200 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@poochiereds.net>
+Date: Thu, 17 Sep 2015 07:47:08 -0400
+Subject: nfsd: serialize state seqid morphing operations
+
+From: Jeff Layton <jlayton@poochiereds.net>
+
+commit 35a92fe8770ce54c5eb275cd76128645bea2d200 upstream.
+
+Andrew was seeing a race occur when an OPEN and OPEN_DOWNGRADE were
+running in parallel. The server would receive the OPEN_DOWNGRADE first
+and check its seqid, but then an OPEN would race in and bump it. The
+OPEN_DOWNGRADE would then complete and bump the seqid again.  The result
+was that the OPEN_DOWNGRADE would be applied after the OPEN, even though
+it should have been rejected since the seqid changed.
+
+The only recourse we have here I think is to serialize operations that
+bump the seqid in a stateid, particularly when we're given a seqid in
+the call. To address this, we add a new rw_semaphore to the
+nfs4_ol_stateid struct. We do a down_write prior to checking the seqid
+after looking up the stateid to ensure that nothing else is going to
+bump it while we're operating on it.
+
+In the case of OPEN, we do a down_read, as the call doesn't contain a
+seqid. Those can run in parallel -- we just need to serialize them when
+there is a concurrent OPEN_DOWNGRADE or CLOSE.
+
+LOCK and LOCKU however always take the write lock as there is no
+opportunity for parallelizing those.
+
+Reported-and-Tested-by: Andrew W Elble <aweits@rit.edu>
+Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/nfs4state.c |   33 ++++++++++++++++++++++++++++-----
+ fs/nfsd/state.h     |   19 ++++++++++---------
+ 2 files changed, 38 insertions(+), 14 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -3351,6 +3351,7 @@ static void init_open_stateid(struct nfs
+       stp->st_access_bmap = 0;
+       stp->st_deny_bmap = 0;
+       stp->st_openstp = NULL;
++      init_rwsem(&stp->st_rwsem);
+       spin_lock(&oo->oo_owner.so_client->cl_lock);
+       list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
+       spin_lock(&fp->fi_lock);
+@@ -4181,15 +4182,20 @@ nfsd4_process_open2(struct svc_rqst *rqs
+        */
+       if (stp) {
+               /* Stateid was found, this is an OPEN upgrade */
++              down_read(&stp->st_rwsem);
+               status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
+-              if (status)
++              if (status) {
++                      up_read(&stp->st_rwsem);
+                       goto out;
++              }
+       } else {
+               stp = open->op_stp;
+               open->op_stp = NULL;
+               init_open_stateid(stp, fp, open);
++              down_read(&stp->st_rwsem);
+               status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
+               if (status) {
++                      up_read(&stp->st_rwsem);
+                       release_open_stateid(stp);
+                       goto out;
+               }
+@@ -4201,6 +4207,7 @@ nfsd4_process_open2(struct svc_rqst *rqs
+       }
+       update_stateid(&stp->st_stid.sc_stateid);
+       memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
++      up_read(&stp->st_rwsem);
+ 
+       if (nfsd4_has_session(&resp->cstate)) {
+               if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
+@@ -4777,10 +4784,13 @@ static __be32 nfs4_seqid_op_checks(struc
+                * revoked delegations are kept only for free_stateid.
+                */
+               return nfserr_bad_stateid;
++      down_write(&stp->st_rwsem);
+       status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
+-      if (status)
+-              return status;
+-      return nfs4_check_fh(current_fh, &stp->st_stid);
++      if (status == nfs_ok)
++              status = nfs4_check_fh(current_fh, &stp->st_stid);
++      if (status != nfs_ok)
++              up_write(&stp->st_rwsem);
++      return status;
+ }
+ 
+ /* 
+@@ -4827,6 +4837,7 @@ static __be32 nfs4_preprocess_confirmed_
+               return status;
+       oo = openowner(stp->st_stateowner);
+       if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
++              up_write(&stp->st_rwsem);
+               nfs4_put_stid(&stp->st_stid);
+               return nfserr_bad_stateid;
+       }
+@@ -4857,11 +4868,14 @@ nfsd4_open_confirm(struct svc_rqst *rqst
+               goto out;
+       oo = openowner(stp->st_stateowner);
+       status = nfserr_bad_stateid;
+-      if (oo->oo_flags & NFS4_OO_CONFIRMED)
++      if (oo->oo_flags & NFS4_OO_CONFIRMED) {
++              up_write(&stp->st_rwsem);
+               goto put_stateid;
++      }
+       oo->oo_flags |= NFS4_OO_CONFIRMED;
+       update_stateid(&stp->st_stid.sc_stateid);
+       memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
++      up_write(&stp->st_rwsem);
+       dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
+               __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
+ 
+@@ -4940,6 +4954,7 @@ nfsd4_open_downgrade(struct svc_rqst *rq
+       memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+       status = nfs_ok;
+ put_stateid:
++      up_write(&stp->st_rwsem);
+       nfs4_put_stid(&stp->st_stid);
+ out:
+       nfsd4_bump_seqid(cstate, status);
+@@ -4993,6 +5008,7 @@ nfsd4_close(struct svc_rqst *rqstp, stru
+               goto out; 
+       update_stateid(&stp->st_stid.sc_stateid);
+       memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
++      up_write(&stp->st_rwsem);
+ 
+       nfsd4_close_open_stateid(stp);
+ 
+@@ -5223,6 +5239,7 @@ init_lock_stateid(struct nfs4_ol_stateid
+       stp->st_access_bmap = 0;
+       stp->st_deny_bmap = open_stp->st_deny_bmap;
+       stp->st_openstp = open_stp;
++      init_rwsem(&stp->st_rwsem);
+       list_add(&stp->st_locks, &open_stp->st_locks);
+       list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
+       spin_lock(&fp->fi_lock);
+@@ -5391,6 +5408,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
+                                       &open_stp, nn);
+               if (status)
+                       goto out;
++              up_write(&open_stp->st_rwsem);
+               open_sop = openowner(open_stp->st_stateowner);
+               status = nfserr_bad_stateid;
+               if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
+@@ -5398,6 +5416,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
+                       goto out;
+               status = lookup_or_create_lock_state(cstate, open_stp, lock,
+                                                       &lock_stp, &new);
++              if (status == nfs_ok)
++                      down_write(&lock_stp->st_rwsem);
+       } else {
+               status = nfs4_preprocess_seqid_op(cstate,
+                                      lock->lk_old_lock_seqid,
+@@ -5503,6 +5523,8 @@ out:
+                   seqid_mutating_err(ntohl(status)))
+                       lock_sop->lo_owner.so_seqid++;
+ 
++              up_write(&lock_stp->st_rwsem);
++
+               /*
+                * If this is a new, never-before-used stateid, and we are
+                * returning an error, then just go ahead and release it.
+@@ -5673,6 +5695,7 @@ nfsd4_locku(struct svc_rqst *rqstp, stru
+ fput:
+       fput(filp);
+ put_stateid:
++      up_write(&stp->st_rwsem);
+       nfs4_put_stid(&stp->st_stid);
+ out:
+       nfsd4_bump_seqid(cstate, status);
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -533,15 +533,16 @@ struct nfs4_file {
+  * Better suggestions welcome.
+  */
+ struct nfs4_ol_stateid {
+-      struct nfs4_stid    st_stid; /* must be first field */
+-      struct list_head              st_perfile;
+-      struct list_head              st_perstateowner;
+-      struct list_head              st_locks;
+-      struct nfs4_stateowner      * st_stateowner;
+-      struct nfs4_clnt_odstate    * st_clnt_odstate;
+-      unsigned char                 st_access_bmap;
+-      unsigned char                 st_deny_bmap;
+-      struct nfs4_ol_stateid         * st_openstp;
++      struct nfs4_stid                st_stid;
++      struct list_head                st_perfile;
++      struct list_head                st_perstateowner;
++      struct list_head                st_locks;
++      struct nfs4_stateowner          *st_stateowner;
++      struct nfs4_clnt_odstate        *st_clnt_odstate;
++      unsigned char                   st_access_bmap;
++      unsigned char                   st_deny_bmap;
++      struct nfs4_ol_stateid          *st_openstp;
++      struct rw_semaphore             st_rwsem;
+ };
+ 
+ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
diff --git a/queue-4.1/ocfs2-fix-umask-ignored-issue.patch b/queue-4.1/ocfs2-fix-umask-ignored-issue.patch

new file mode 100644 (file)

index 0000000..e7564e8
--- /dev/null
+++ b/queue-4.1/ocfs2-fix-umask-ignored-issue.patch
@@ -0,0 +1,36 @@
+From 8f1eb48758aacf6c1ffce18179295adbf3bd7640 Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Fri, 20 Nov 2015 15:57:30 -0800
+Subject: ocfs2: fix umask ignored issue
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 8f1eb48758aacf6c1ffce18179295adbf3bd7640 upstream.
+
+New created file's mode is not masked with umask, and this makes umask not
+work for ocfs2 volume.
+
+Fixes: 702e5bc ("ocfs2: use generic posix ACL infrastructure")
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Gang He <ghe@suse.com>
+Cc: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/namei.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ocfs2/namei.c
++++ b/fs/ocfs2/namei.c
+@@ -365,6 +365,8 @@ static int ocfs2_mknod(struct inode *dir
+               mlog_errno(status);
+               goto leave;
+       }
++      /* update inode->i_mode after mask with "umask". */
++      inode->i_mode = mode;
+ 
+       handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
+                                                           S_ISDIR(mode),
diff --git a/queue-4.1/rbd-don-t-put-snap_context-twice-in-rbd_queue_workfn.patch b/queue-4.1/rbd-don-t-put-snap_context-twice-in-rbd_queue_workfn.patch

new file mode 100644 (file)

index 0000000..8c1adf8
--- /dev/null
+++ b/queue-4.1/rbd-don-t-put-snap_context-twice-in-rbd_queue_workfn.patch
@@ -0,0 +1,34 @@
+From 70b16db86f564977df074072143284aec2cb1162 Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Fri, 27 Nov 2015 19:23:24 +0100
+Subject: rbd: don't put snap_context twice in rbd_queue_workfn()
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 70b16db86f564977df074072143284aec2cb1162 upstream.
+
+Commit 4e752f0ab0e8 ("rbd: access snapshot context and mapping size
+safely") moved ceph_get_snap_context() out of rbd_img_request_create()
+and into rbd_queue_workfn(), adding a ceph_put_snap_context() to the
+error path in rbd_queue_workfn().  However, rbd_img_request_create()
+consumes a ref on snapc, so calling ceph_put_snap_context() after
+a successful rbd_img_request_create() leads to an extra put.  Fix it.
+
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Josh Durgin <jdurgin@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/rbd.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -3417,6 +3417,7 @@ static void rbd_queue_workfn(struct work
+               goto err_rq;
+       }
+       img_request->rq = rq;
++      snapc = NULL; /* img_request consumes a ref */
+ 
+       if (op_type == OBJ_OP_DISCARD)
+               result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,
diff --git a/queue-4.1/series b/queue-4.1/series

index b0de9e5252cb8179dedae03f3e5ac01be1ec17fb..75282c391afb459bd4eab214cee271042a91c02e 100644 (file)
--- a/queue-4.1/series
+++ b/queue-4.1/series
@@ -24,3 +24,22 @@ ipv6-add-complete-rcu-protection-around-np-opt.patch
  net-neighbour-fix-crash-at-dumping-device-agnostic-proxy-entries.patch
  ipv6-sctp-implement-sctp_v6_destroy_sock.patch
  net_sched-fix-qdisc_tree_decrease_qlen-races.patch
+btrfs-check-unsupported-filters-in-balance-arguments.patch
+btrfs-fix-file-corruption-and-data-loss-after-cloning-inline-extents.patch
+btrfs-fix-truncation-of-compressed-and-inlined-extents.patch
+btrfs-fix-race-leading-to-incorrect-item-deletion-when-dropping-extents.patch
+btrfs-fix-race-leading-to-bug_on-when-running-delalloc-for-nodatacow.patch
+btrfs-fix-race-when-listing-an-inode-s-xattrs.patch
+rbd-don-t-put-snap_context-twice-in-rbd_queue_workfn.patch
+ext4-crypto-fix-memory-leak-in-ext4_bio_write_page.patch
+ext4-fix-potential-use-after-free-in-__ext4_journal_stop.patch
+ext4-jbd2-ensure-entering-into-panic-after-recording-an-error-in-superblock.patch
+firewire-ohci-fix-jmicron-jmb38x-it-context-discovery.patch
+nfsd-serialize-state-seqid-morphing-operations.patch
+nfsd-eliminate-sending-duplicate-and-repeated-delegations.patch
+debugfs-fix-refcount-imbalance-in-start_creating.patch
+nfs4-start-callback_ident-at-idr-1.patch
+nfs-if-we-have-no-valid-attrs-then-don-t-declare-the-attribute-cache-valid.patch
+ocfs2-fix-umask-ignored-issue.patch
+ceph-fix-message-length-computation.patch
+alsa-hda-hdmi-apply-skylake-fix-ups-to-broxton-display-codec.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 11 Dec 2015 17:19:25 +0000 (09:19 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 11 Dec 2015 17:19:25 +0000 (09:19 -0800)
queue-4.1/alsa-hda-hdmi-apply-skylake-fix-ups-to-broxton-display-codec.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/btrfs-check-unsupported-filters-in-balance-arguments.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/btrfs-fix-file-corruption-and-data-loss-after-cloning-inline-extents.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/btrfs-fix-race-leading-to-bug_on-when-running-delalloc-for-nodatacow.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/btrfs-fix-race-leading-to-incorrect-item-deletion-when-dropping-extents.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/btrfs-fix-race-when-listing-an-inode-s-xattrs.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/btrfs-fix-truncation-of-compressed-and-inlined-extents.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ceph-fix-message-length-computation.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/debugfs-fix-refcount-imbalance-in-start_creating.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ext4-crypto-fix-memory-leak-in-ext4_bio_write_page.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ext4-fix-potential-use-after-free-in-__ext4_journal_stop.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ext4-jbd2-ensure-entering-into-panic-after-recording-an-error-in-superblock.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/firewire-ohci-fix-jmicron-jmb38x-it-context-discovery.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/nfs-if-we-have-no-valid-attrs-then-don-t-declare-the-attribute-cache-valid.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/nfs4-start-callback_ident-at-idr-1.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/nfsd-eliminate-sending-duplicate-and-repeated-delegations.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/nfsd-serialize-state-seqid-morphing-operations.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ocfs2-fix-umask-ignored-issue.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/rbd-don-t-put-snap_context-twice-in-rbd_queue_workfn.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/series		patch \| blob \| blame \| history