]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 5 Jan 2019 17:46:24 +0000 (18:46 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 5 Jan 2019 17:46:24 +0000 (18:46 +0100)
added patches:
btrfs-fix-fsync-of-files-with-multiple-hard-links-in-new-directories.patch
btrfs-run-delayed-items-before-dropping-the-snapshot.patch
cdc-acm-fix-abnormal-data-rx-issue-for-mediatek-preloader.patch
clk-rockchip-fix-typo-in-rk3188-spdif_frac-parent.patch
ext4-check-for-shutdown-and-r-o-file-system-in-ext4_write_inode.patch
ext4-fix-ext4_ioc_group_add-ioctl.patch
ext4-fix-possible-use-after-free-in-ext4_quota_enable.patch
ext4-force-inode-writes-when-nfsd-calls-commit_metadata.patch
ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
ext4-missing-unlock-put_page-in-ext4_try_to_write_inline_data.patch
perf-pmu-suppress-potential-format-truncation-warning.patch
platform-msi-free-descriptors-in-platform_msi_domain_free.patch
powerpc-tm-set-msr-just-prior-to-recheckpoint.patch
spi-bcm2835-avoid-finishing-transfer-prematurely-in-irq-mode.patch
spi-bcm2835-fix-book-keeping-of-dma-termination.patch
spi-bcm2835-fix-race-on-dma-termination.patch

17 files changed:
queue-4.9/btrfs-fix-fsync-of-files-with-multiple-hard-links-in-new-directories.patch [new file with mode: 0644]
queue-4.9/btrfs-run-delayed-items-before-dropping-the-snapshot.patch [new file with mode: 0644]
queue-4.9/cdc-acm-fix-abnormal-data-rx-issue-for-mediatek-preloader.patch [new file with mode: 0644]
queue-4.9/clk-rockchip-fix-typo-in-rk3188-spdif_frac-parent.patch [new file with mode: 0644]
queue-4.9/ext4-check-for-shutdown-and-r-o-file-system-in-ext4_write_inode.patch [new file with mode: 0644]
queue-4.9/ext4-fix-ext4_ioc_group_add-ioctl.patch [new file with mode: 0644]
queue-4.9/ext4-fix-possible-use-after-free-in-ext4_quota_enable.patch [new file with mode: 0644]
queue-4.9/ext4-force-inode-writes-when-nfsd-calls-commit_metadata.patch [new file with mode: 0644]
queue-4.9/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch [new file with mode: 0644]
queue-4.9/ext4-missing-unlock-put_page-in-ext4_try_to_write_inline_data.patch [new file with mode: 0644]
queue-4.9/perf-pmu-suppress-potential-format-truncation-warning.patch [new file with mode: 0644]
queue-4.9/platform-msi-free-descriptors-in-platform_msi_domain_free.patch [new file with mode: 0644]
queue-4.9/powerpc-tm-set-msr-just-prior-to-recheckpoint.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/spi-bcm2835-avoid-finishing-transfer-prematurely-in-irq-mode.patch [new file with mode: 0644]
queue-4.9/spi-bcm2835-fix-book-keeping-of-dma-termination.patch [new file with mode: 0644]
queue-4.9/spi-bcm2835-fix-race-on-dma-termination.patch [new file with mode: 0644]

diff --git a/queue-4.9/btrfs-fix-fsync-of-files-with-multiple-hard-links-in-new-directories.patch b/queue-4.9/btrfs-fix-fsync-of-files-with-multiple-hard-links-in-new-directories.patch
new file mode 100644 (file)
index 0000000..eec7986
--- /dev/null
@@ -0,0 +1,150 @@
+From 41bd60676923822de1df2c50b3f9a10171f4338a Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 28 Nov 2018 14:54:28 +0000
+Subject: Btrfs: fix fsync of files with multiple hard links in new directories
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 41bd60676923822de1df2c50b3f9a10171f4338a upstream.
+
+The log tree has a long standing problem that when a file is fsync'ed we
+only check for new ancestors, created in the current transaction, by
+following only the hard link for which the fsync was issued. We follow the
+ancestors using the VFS' dget_parent() API. This means that if we create a
+new link for a file in a directory that is new (or in an any other new
+ancestor directory) and then fsync the file using an old hard link, we end
+up not logging the new ancestor, and on log replay that new hard link and
+ancestor do not exist. In some cases, involving renames, the file will not
+exist at all.
+
+Example:
+
+  mkfs.btrfs -f /dev/sdb
+  mount /dev/sdb /mnt
+
+  mkdir /mnt/A
+  touch /mnt/foo
+  ln /mnt/foo /mnt/A/bar
+  xfs_io -c fsync /mnt/foo
+
+  <power failure>
+
+In this example after log replay only the hard link named 'foo' exists
+and directory A does not exist, which is unexpected. In other major linux
+filesystems, such as ext4, xfs and f2fs for example, both hard links exist
+and so does directory A after mounting again the filesystem.
+
+Checking if any new ancestors are new and need to be logged was added in
+2009 by commit 12fcfd22fe5b ("Btrfs: tree logging unlink/rename fixes"),
+however only for the ancestors of the hard link (dentry) for which the
+fsync was issued, instead of checking for all ancestors for all of the
+inode's hard links.
+
+So fix this by tracking the id of the last transaction where a hard link
+was created for an inode and then on fsync fallback to a full transaction
+commit when an inode has more than one hard link and at least one new hard
+link was created in the current transaction. This is the simplest solution
+since this is not a common use case (adding frequently hard links for
+which there's an ancestor created in the current transaction and then
+fsync the file). In case it ever becomes a common use case, a solution
+that consists of iterating the fs/subvol btree for each hard link and
+check if any ancestor is new, could be implemented.
+
+This solves many unexpected scenarios reported by Jayashree Mohan and
+Vijay Chidambaram, and for which there is a new test case for fstests
+under review.
+
+Fixes: 12fcfd22fe5b ("Btrfs: tree logging unlink/rename fixes")
+CC: stable@vger.kernel.org # 4.4+
+Reported-by: Vijay Chidambaram <vvijay03@gmail.com>
+Reported-by: Jayashree Mohan <jayashree2912@gmail.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/btrfs_inode.h |    6 ++++++
+ fs/btrfs/inode.c       |   17 +++++++++++++++++
+ fs/btrfs/tree-log.c    |   16 ++++++++++++++++
+ 3 files changed, 39 insertions(+)
+
+--- a/fs/btrfs/btrfs_inode.h
++++ b/fs/btrfs/btrfs_inode.h
+@@ -154,6 +154,12 @@ struct btrfs_inode {
+       u64 last_unlink_trans;
+       /*
++       * Track the transaction id of the last transaction used to create a
++       * hard link for the inode. This is used by the log tree (fsync).
++       */
++      u64 last_link_trans;
++
++      /*
+        * Number of bytes outstanding that are going to need csums.  This is
+        * used in ENOSPC accounting.
+        */
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3805,6 +3805,21 @@ cache_index:
+        * inode is not a directory, logging its parent unnecessarily.
+        */
+       BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
++      /*
++       * Similar reasoning for last_link_trans, needs to be set otherwise
++       * for a case like the following:
++       *
++       * mkdir A
++       * touch foo
++       * ln foo A/bar
++       * echo 2 > /proc/sys/vm/drop_caches
++       * fsync foo
++       * <power failure>
++       *
++       * Would result in link bar and directory A not existing after the power
++       * failure.
++       */
++      BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans;
+       path->slots[0]++;
+       if (inode->i_nlink != 1 ||
+@@ -6670,6 +6685,7 @@ static int btrfs_link(struct dentry *old
+                       if (err)
+                               goto fail;
+               }
++              BTRFS_I(inode)->last_link_trans = trans->transid;
+               d_instantiate(dentry, inode);
+               btrfs_log_new_name(trans, inode, NULL, parent);
+       }
+@@ -9383,6 +9399,7 @@ struct inode *btrfs_alloc_inode(struct s
+       ei->index_cnt = (u64)-1;
+       ei->dir_index = 0;
+       ei->last_unlink_trans = 0;
++      ei->last_link_trans = 0;
+       ei->last_log_commit = 0;
+       ei->delayed_iput_count = 0;
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -5549,6 +5549,22 @@ static int btrfs_log_inode_parent(struct
+                       goto end_trans;
+       }
++      /*
++       * If a new hard link was added to the inode in the current transaction
++       * and its link count is now greater than 1, we need to fallback to a
++       * transaction commit, otherwise we can end up not logging all its new
++       * parents for all the hard links. Here just from the dentry used to
++       * fsync, we can not visit the ancestor inodes for all the other hard
++       * links to figure out if any is new, so we fallback to a transaction
++       * commit (instead of adding a lot of complexity of scanning a btree,
++       * since this scenario is not a common use case).
++       */
++      if (inode->vfs_inode.i_nlink > 1 &&
++          inode->last_link_trans > last_committed) {
++              ret = -EMLINK;
++              goto end_trans;
++      }
++
+       while (1) {
+               if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
+                       break;
diff --git a/queue-4.9/btrfs-run-delayed-items-before-dropping-the-snapshot.patch b/queue-4.9/btrfs-run-delayed-items-before-dropping-the-snapshot.patch
new file mode 100644 (file)
index 0000000..3d39783
--- /dev/null
@@ -0,0 +1,86 @@
+From 0568e82dbe2510fc1fa664f58e5c997d3f1e649e Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Fri, 30 Nov 2018 11:52:14 -0500
+Subject: btrfs: run delayed items before dropping the snapshot
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit 0568e82dbe2510fc1fa664f58e5c997d3f1e649e upstream.
+
+With my delayed refs patches in place we started seeing a large amount
+of aborts in __btrfs_free_extent:
+
+ BTRFS error (device sdb1): unable to find ref byte nr 91947008 parent 0 root 35964  owner 1 offset 0
+ Call Trace:
+  ? btrfs_merge_delayed_refs+0xaf/0x340
+  __btrfs_run_delayed_refs+0x6ea/0xfc0
+  ? btrfs_set_path_blocking+0x31/0x60
+  btrfs_run_delayed_refs+0xeb/0x180
+  btrfs_commit_transaction+0x179/0x7f0
+  ? btrfs_check_space_for_delayed_refs+0x30/0x50
+  ? should_end_transaction.isra.19+0xe/0x40
+  btrfs_drop_snapshot+0x41c/0x7c0
+  btrfs_clean_one_deleted_snapshot+0xb5/0xd0
+  cleaner_kthread+0xf6/0x120
+  kthread+0xf8/0x130
+  ? btree_invalidatepage+0x90/0x90
+  ? kthread_bind+0x10/0x10
+  ret_from_fork+0x35/0x40
+
+This was because btrfs_drop_snapshot depends on the root not being
+modified while it's dropping the snapshot.  It will unlock the root node
+(and really every node) as it walks down the tree, only to re-lock it
+when it needs to do something.  This is a problem because if we modify
+the tree we could cow a block in our path, which frees our reference to
+that block.  Then once we get back to that shared block we'll free our
+reference to it again, and get ENOENT when trying to lookup our extent
+reference to that block in __btrfs_free_extent.
+
+This is ultimately happening because we have delayed items left to be
+processed for our deleted snapshot _after_ all of the inodes are closed
+for the snapshot.  We only run the delayed inode item if we're deleting
+the inode, and even then we do not run the delayed insertions or delayed
+removals.  These can be run at any point after our final inode does its
+last iput, which is what triggers the snapshot deletion.  We can end up
+with the snapshot deletion happening and then have the delayed items run
+on that file system, resulting in the above problem.
+
+This problem has existed forever, however my patches made it much easier
+to hit as I wake up the cleaner much more often to deal with delayed
+iputs, which made us more likely to start the snapshot dropping work
+before the transaction commits, which is when the delayed items would
+generally be run.  Before, generally speaking, we would run the delayed
+items, commit the transaction, and wakeup the cleaner thread to start
+deleting snapshots, which means we were less likely to hit this problem.
+You could still hit it if you had multiple snapshots to be deleted and
+ended up with lots of delayed items, but it was definitely harder.
+
+Fix for now by simply running all the delayed items before starting to
+drop the snapshot.  We could make this smarter in the future by making
+the delayed items per-root, and then simply drop any delayed items for
+roots that we are going to delete.  But for now just a quick and easy
+solution is the safest.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -9252,6 +9252,10 @@ int btrfs_drop_snapshot(struct btrfs_roo
+               goto out_free;
+       }
++      err = btrfs_run_delayed_items(trans);
++      if (err)
++              goto out_end_trans;
++
+       if (block_rsv)
+               trans->block_rsv = block_rsv;
diff --git a/queue-4.9/cdc-acm-fix-abnormal-data-rx-issue-for-mediatek-preloader.patch b/queue-4.9/cdc-acm-fix-abnormal-data-rx-issue-for-mediatek-preloader.patch
new file mode 100644 (file)
index 0000000..2c60b5e
--- /dev/null
@@ -0,0 +1,80 @@
+From eafb27fa5283599ce6c5492ea18cf636a28222bb Mon Sep 17 00:00:00 2001
+From: Macpaul Lin <macpaul.lin@mediatek.com>
+Date: Wed, 19 Dec 2018 12:11:03 +0800
+Subject: cdc-acm: fix abnormal DATA RX issue for Mediatek Preloader.
+
+From: Macpaul Lin <macpaul.lin@mediatek.com>
+
+commit eafb27fa5283599ce6c5492ea18cf636a28222bb upstream.
+
+Mediatek Preloader is a proprietary embedded boot loader for loading
+Little Kernel and Linux into device DRAM.
+
+This boot loader also handle firmware update. Mediatek Preloader will be
+enumerated as a virtual COM port when the device is connected to Windows
+or Linux OS via CDC-ACM class driver. When the USB enumeration has been
+done, Mediatek Preloader will send out handshake command "READY" to PC
+actively instead of waiting command from the download tool.
+
+Since Linux 4.12, the commit "tty: reset termios state on device
+registration" (93857edd9829e144acb6c7e72d593f6e01aead66) causes Mediatek
+Preloader receiving some abnoraml command like "READYXX" as it sent.
+This will be recognized as an incorrect response. The behavior change
+also causes the download handshake fail. This change only affects
+subsequent connects if the reconnected device happens to get the same minor
+number.
+
+By disabling the ECHO termios flag could avoid this problem. However, it
+cannot be done by user space configuration when download tool open
+/dev/ttyACM0. This is because the device running Mediatek Preloader will
+send handshake command "READY" immediately once the CDC-ACM driver is
+ready.
+
+This patch wants to fix above problem by introducing "DISABLE_ECHO"
+property in driver_info. When Mediatek Preloader is connected, the
+CDC-ACM driver could disable ECHO flag in termios to avoid the problem.
+
+Signed-off-by: Macpaul Lin <macpaul.lin@mediatek.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Johan Hovold <johan@kernel.org>
+Acked-by: Oliver Neukum <oneukum@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/class/cdc-acm.c |   10 ++++++++++
+ drivers/usb/class/cdc-acm.h |    1 +
+ 2 files changed, 11 insertions(+)
+
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -502,6 +502,13 @@ static int acm_tty_install(struct tty_dr
+       if (retval)
+               goto error_init_termios;
++      /*
++       * Suppress initial echoing for some devices which might send data
++       * immediately after acm driver has been installed.
++       */
++      if (acm->quirks & DISABLE_ECHO)
++              tty->termios.c_lflag &= ~ECHO;
++
+       tty->driver_data = acm;
+       return 0;
+@@ -1620,6 +1627,9 @@ static const struct usb_device_id acm_id
+       { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */
+       .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
+       },
++      { USB_DEVICE(0x0e8d, 0x2000), /* MediaTek Inc Preloader */
++      .driver_info = DISABLE_ECHO, /* DISABLE ECHO in termios flag */
++      },
+       { USB_DEVICE(0x0e8d, 0x3329), /* MediaTek Inc GPS */
+       .driver_info = NO_UNION_NORMAL, /* has no union descriptor */
+       },
+--- a/drivers/usb/class/cdc-acm.h
++++ b/drivers/usb/class/cdc-acm.h
+@@ -134,3 +134,4 @@ struct acm {
+ #define QUIRK_CONTROL_LINE_STATE      BIT(6)
+ #define CLEAR_HALT_CONDITIONS         BIT(7)
+ #define SEND_ZERO_PACKET              BIT(8)
++#define DISABLE_ECHO                  BIT(9)
diff --git a/queue-4.9/clk-rockchip-fix-typo-in-rk3188-spdif_frac-parent.patch b/queue-4.9/clk-rockchip-fix-typo-in-rk3188-spdif_frac-parent.patch
new file mode 100644 (file)
index 0000000..363a528
--- /dev/null
@@ -0,0 +1,34 @@
+From 8b19faf6fae2867e2c177212c541e8ae36aa4d32 Mon Sep 17 00:00:00 2001
+From: Johan Jonker <jbx9999@hotmail.com>
+Date: Sat, 3 Nov 2018 23:54:13 +0100
+Subject: clk: rockchip: fix typo in rk3188 spdif_frac parent
+
+From: Johan Jonker <jbx9999@hotmail.com>
+
+commit 8b19faf6fae2867e2c177212c541e8ae36aa4d32 upstream.
+
+Fix typo in common_clk_branches.
+Make spdif_pre parent of spdif_frac.
+
+Fixes: 667464208989 ("clk: rockchip: include downstream muxes into fractional dividers")
+Cc: stable@vger.kernel.org
+Signed-off-by: Johan Jonker <jbx9999@hotmail.com>
+Acked-by: Elaine Zhang <zhangqing@rock-chips.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/clk/rockchip/clk-rk3188.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/clk/rockchip/clk-rk3188.c
++++ b/drivers/clk/rockchip/clk-rk3188.c
+@@ -381,7 +381,7 @@ static struct rockchip_clk_branch common
+       COMPOSITE_NOMUX(0, "spdif_pre", "i2s_src", 0,
+                       RK2928_CLKSEL_CON(5), 0, 7, DFLAGS,
+                       RK2928_CLKGATE_CON(0), 13, GFLAGS),
+-      COMPOSITE_FRACMUX(0, "spdif_frac", "spdif_pll", CLK_SET_RATE_PARENT,
++      COMPOSITE_FRACMUX(0, "spdif_frac", "spdif_pre", CLK_SET_RATE_PARENT,
+                       RK2928_CLKSEL_CON(9), 0,
+                       RK2928_CLKGATE_CON(0), 14, GFLAGS,
+                       &common_spdif_fracmux),
diff --git a/queue-4.9/ext4-check-for-shutdown-and-r-o-file-system-in-ext4_write_inode.patch b/queue-4.9/ext4-check-for-shutdown-and-r-o-file-system-in-ext4_write_inode.patch
new file mode 100644 (file)
index 0000000..b5c8e51
--- /dev/null
@@ -0,0 +1,50 @@
+From 18f2c4fcebf2582f96cbd5f2238f4f354a0e4847 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Wed, 19 Dec 2018 14:36:58 -0500
+Subject: ext4: check for shutdown and r/o file system in ext4_write_inode()
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 18f2c4fcebf2582f96cbd5f2238f4f354a0e4847 upstream.
+
+If the file system has been shut down or is read-only, then
+ext4_write_inode() needs to bail out early.
+
+Also use jbd2_complete_transaction() instead of ext4_force_commit() so
+we only force a commit if it is needed.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5014,9 +5014,13 @@ int ext4_write_inode(struct inode *inode
+ {
+       int err;
+-      if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
++      if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) ||
++          sb_rdonly(inode->i_sb))
+               return 0;
++      if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
++              return -EIO;
++
+       if (EXT4_SB(inode->i_sb)->s_journal) {
+               if (ext4_journal_current_handle()) {
+                       jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
+@@ -5032,7 +5036,8 @@ int ext4_write_inode(struct inode *inode
+               if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
+                       return 0;
+-              err = ext4_force_commit(inode->i_sb);
++              err = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
++                                              EXT4_I(inode)->i_sync_tid);
+       } else {
+               struct ext4_iloc iloc;
diff --git a/queue-4.9/ext4-fix-ext4_ioc_group_add-ioctl.patch b/queue-4.9/ext4-fix-ext4_ioc_group_add-ioctl.patch
new file mode 100644 (file)
index 0000000..ec7972c
--- /dev/null
@@ -0,0 +1,40 @@
+From e647e29196b7f802f8242c39ecb7cc937f5ef217 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?ruippan=20=28=E6=BD=98=E7=9D=BF=29?= <ruippan@tencent.com>
+Date: Tue, 4 Dec 2018 01:04:12 -0500
+Subject: ext4: fix EXT4_IOC_GROUP_ADD ioctl
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: ruippan (潘睿) <ruippan@tencent.com>
+
+commit e647e29196b7f802f8242c39ecb7cc937f5ef217 upstream.
+
+Commit e2b911c53584 ("ext4: clean up feature test macros with
+predicate functions") broke the EXT4_IOC_GROUP_ADD ioctl.  This was
+not noticed since only very old versions of resize2fs (before
+e2fsprogs 1.42) use this ioctl.  However, using a new kernel with an
+enterprise Linux userspace will cause attempts to use online resize to
+fail with "No reserved GDT blocks".
+
+Fixes: e2b911c53584 ("ext4: clean up feature test macros with predicate...")
+Cc: stable@kernel.org # v4.4
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: ruippan (潘睿) <ruippan@tencent.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/resize.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -1600,7 +1600,7 @@ int ext4_group_add(struct super_block *s
+       }
+       if (reserved_gdb || gdb_off == 0) {
+-              if (ext4_has_feature_resize_inode(sb) ||
++              if (!ext4_has_feature_resize_inode(sb) ||
+                   !le16_to_cpu(es->s_reserved_gdt_blocks)) {
+                       ext4_warning(sb,
+                                    "No reserved GDT blocks, can't resize");
diff --git a/queue-4.9/ext4-fix-possible-use-after-free-in-ext4_quota_enable.patch b/queue-4.9/ext4-fix-possible-use-after-free-in-ext4_quota_enable.patch
new file mode 100644 (file)
index 0000000..f623318
--- /dev/null
@@ -0,0 +1,37 @@
+From 61157b24e60fb3cd1f85f2c76a7b1d628f970144 Mon Sep 17 00:00:00 2001
+From: Pan Bian <bianpan2016@163.com>
+Date: Mon, 3 Dec 2018 23:28:02 -0500
+Subject: ext4: fix possible use after free in ext4_quota_enable
+
+From: Pan Bian <bianpan2016@163.com>
+
+commit 61157b24e60fb3cd1f85f2c76a7b1d628f970144 upstream.
+
+The function frees qf_inode via iput but then pass qf_inode to
+lockdep_set_quota_inode on the failure path. This may result in a
+use-after-free bug. The patch frees df_inode only when it is never used.
+
+Fixes: daf647d2dd5 ("ext4: add lockdep annotations for i_data_sem")
+Cc: stable@kernel.org # 4.6
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Pan Bian <bianpan2016@163.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -5425,9 +5425,9 @@ static int ext4_quota_enable(struct supe
+       qf_inode->i_flags |= S_NOQUOTA;
+       lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
+       err = dquot_enable(qf_inode, type, format_id, flags);
+-      iput(qf_inode);
+       if (err)
+               lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
++      iput(qf_inode);
+       return err;
+ }
diff --git a/queue-4.9/ext4-force-inode-writes-when-nfsd-calls-commit_metadata.patch b/queue-4.9/ext4-force-inode-writes-when-nfsd-calls-commit_metadata.patch
new file mode 100644 (file)
index 0000000..0b2d548
--- /dev/null
@@ -0,0 +1,86 @@
+From fde872682e175743e0c3ef939c89e3c6008a1529 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Wed, 19 Dec 2018 14:07:58 -0500
+Subject: ext4: force inode writes when nfsd calls commit_metadata()
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit fde872682e175743e0c3ef939c89e3c6008a1529 upstream.
+
+Some time back, nfsd switched from calling vfs_fsync() to using a new
+commit_metadata() hook in export_operations().  If the file system did
+not provide a commit_metadata() hook, it fell back to using
+sync_inode_metadata().  Unfortunately doesn't work on all file
+systems.  In particular, it doesn't work on ext4 due to how the inode
+gets journalled --- the VFS writeback code will not always call
+ext4_write_inode().
+
+So we need to provide our own ext4_nfs_commit_metdata() method which
+calls ext4_write_inode() directly.
+
+Google-Bug-Id: 121195940
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/super.c             |   11 +++++++++++
+ include/trace/events/ext4.h |   20 ++++++++++++++++++++
+ 2 files changed, 31 insertions(+)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1076,6 +1076,16 @@ static struct dentry *ext4_fh_to_parent(
+                                   ext4_nfs_get_inode);
+ }
++static int ext4_nfs_commit_metadata(struct inode *inode)
++{
++      struct writeback_control wbc = {
++              .sync_mode = WB_SYNC_ALL
++      };
++
++      trace_ext4_nfs_commit_metadata(inode);
++      return ext4_write_inode(inode, &wbc);
++}
++
+ /*
+  * Try to release metadata pages (indirect blocks, directories) which are
+  * mapped via the block device.  Since these pages could have journal heads
+@@ -1258,6 +1268,7 @@ static const struct export_operations ex
+       .fh_to_dentry = ext4_fh_to_dentry,
+       .fh_to_parent = ext4_fh_to_parent,
+       .get_parent = ext4_get_parent,
++      .commit_metadata = ext4_nfs_commit_metadata,
+ };
+ enum {
+--- a/include/trace/events/ext4.h
++++ b/include/trace/events/ext4.h
+@@ -223,6 +223,26 @@ TRACE_EVENT(ext4_drop_inode,
+                 (unsigned long) __entry->ino, __entry->drop)
+ );
++TRACE_EVENT(ext4_nfs_commit_metadata,
++      TP_PROTO(struct inode *inode),
++
++      TP_ARGS(inode),
++
++      TP_STRUCT__entry(
++              __field(        dev_t,  dev                     )
++              __field(        ino_t,  ino                     )
++      ),
++
++      TP_fast_assign(
++              __entry->dev    = inode->i_sb->s_dev;
++              __entry->ino    = inode->i_ino;
++      ),
++
++      TP_printk("dev %d,%d ino %lu",
++                MAJOR(__entry->dev), MINOR(__entry->dev),
++                (unsigned long) __entry->ino)
++);
++
+ TRACE_EVENT(ext4_mark_inode_dirty,
+       TP_PROTO(struct inode *inode, unsigned long IP),
diff --git a/queue-4.9/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch b/queue-4.9/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
new file mode 100644 (file)
index 0000000..ee3afb0
--- /dev/null
@@ -0,0 +1,37 @@
+From a805622a757b6d7f65def4141d29317d8e37b8a1 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Wed, 19 Dec 2018 12:28:13 -0500
+Subject: ext4: include terminating u32 in size of xattr entries when expanding inodes
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit a805622a757b6d7f65def4141d29317d8e37b8a1 upstream.
+
+In ext4_expand_extra_isize_ea(), we calculate the total size of the
+xattr header, plus the xattr entries so we know how much of the
+beginning part of the xattrs to move when expanding the inode extra
+size.  We need to include the terminating u32 at the end of the xattr
+entries, or else if there is uninitialized, non-zero bytes after the
+xattr entries and before the xattr values, the list of xattr entries
+won't be properly terminated.
+
+Reported-by: Steve Graham <stgraham2000@gmail.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/xattr.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1499,7 +1499,7 @@ retry:
+       base = IFIRST(header);
+       end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+       min_offs = end - base;
+-      total_ino = sizeof(struct ext4_xattr_ibody_header);
++      total_ino = sizeof(struct ext4_xattr_ibody_header) + sizeof(u32);
+       error = xattr_check_inode(inode, header, end);
+       if (error)
diff --git a/queue-4.9/ext4-missing-unlock-put_page-in-ext4_try_to_write_inline_data.patch b/queue-4.9/ext4-missing-unlock-put_page-in-ext4_try_to_write_inline_data.patch
new file mode 100644 (file)
index 0000000..0e73c12
--- /dev/null
@@ -0,0 +1,37 @@
+From 132d00becb31e88469334e1e62751c81345280e0 Mon Sep 17 00:00:00 2001
+From: Maurizio Lombardi <mlombard@redhat.com>
+Date: Tue, 4 Dec 2018 00:06:53 -0500
+Subject: ext4: missing unlock/put_page() in ext4_try_to_write_inline_data()
+
+From: Maurizio Lombardi <mlombard@redhat.com>
+
+commit 132d00becb31e88469334e1e62751c81345280e0 upstream.
+
+In case of error, ext4_try_to_write_inline_data() should unlock
+and release the page it holds.
+
+Fixes: f19d5870cbf7 ("ext4: add normal write support for inline data")
+Cc: stable@kernel.org # 3.8
+Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inline.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/inline.c
++++ b/fs/ext4/inline.c
+@@ -702,8 +702,11 @@ int ext4_try_to_write_inline_data(struct
+       if (!PageUptodate(page)) {
+               ret = ext4_read_inline_page(inode, page);
+-              if (ret < 0)
++              if (ret < 0) {
++                      unlock_page(page);
++                      put_page(page);
+                       goto out_up_read;
++              }
+       }
+       ret = 1;
diff --git a/queue-4.9/perf-pmu-suppress-potential-format-truncation-warning.patch b/queue-4.9/perf-pmu-suppress-potential-format-truncation-warning.patch
new file mode 100644 (file)
index 0000000..44f9605
--- /dev/null
@@ -0,0 +1,76 @@
+From 11a64a05dc649815670b1be9fe63d205cb076401 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Sun, 11 Nov 2018 18:45:24 +0000
+Subject: perf pmu: Suppress potential format-truncation warning
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+commit 11a64a05dc649815670b1be9fe63d205cb076401 upstream.
+
+Depending on which functions are inlined in util/pmu.c, the snprintf()
+calls in perf_pmu__parse_{scale,unit,per_pkg,snapshot}() might trigger a
+warning:
+
+  util/pmu.c: In function 'pmu_aliases':
+  util/pmu.c:178:31: error: '%s' directive output may be truncated writing up to 255 bytes into a region of size between 0 and 4095 [-Werror=format-truncation=]
+    snprintf(path, PATH_MAX, "%s/%s.unit", dir, name);
+                               ^~
+
+I found this when trying to build perf from Linux 3.16 with gcc 8.
+However I can reproduce the problem in mainline if I force
+__perf_pmu__new_alias() to be inlined.
+
+Suppress this by using scnprintf() as has been done elsewhere in perf.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: http://lkml.kernel.org/r/20181111184524.fux4taownc6ndbx6@decadent.org.uk
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/perf/util/pmu.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/tools/perf/util/pmu.c
++++ b/tools/perf/util/pmu.c
+@@ -103,7 +103,7 @@ static int perf_pmu__parse_scale(struct
+       char path[PATH_MAX];
+       char *lc;
+-      snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
++      scnprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
+       fd = open(path, O_RDONLY);
+       if (fd == -1)
+@@ -163,7 +163,7 @@ static int perf_pmu__parse_unit(struct p
+       ssize_t sret;
+       int fd;
+-      snprintf(path, PATH_MAX, "%s/%s.unit", dir, name);
++      scnprintf(path, PATH_MAX, "%s/%s.unit", dir, name);
+       fd = open(path, O_RDONLY);
+       if (fd == -1)
+@@ -193,7 +193,7 @@ perf_pmu__parse_per_pkg(struct perf_pmu_
+       char path[PATH_MAX];
+       int fd;
+-      snprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name);
++      scnprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name);
+       fd = open(path, O_RDONLY);
+       if (fd == -1)
+@@ -211,7 +211,7 @@ static int perf_pmu__parse_snapshot(stru
+       char path[PATH_MAX];
+       int fd;
+-      snprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name);
++      scnprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name);
+       fd = open(path, O_RDONLY);
+       if (fd == -1)
diff --git a/queue-4.9/platform-msi-free-descriptors-in-platform_msi_domain_free.patch b/queue-4.9/platform-msi-free-descriptors-in-platform_msi_domain_free.patch
new file mode 100644 (file)
index 0000000..c92cc65
--- /dev/null
@@ -0,0 +1,87 @@
+From 81b1e6e6a8590a19257e37a1633bec098d499c57 Mon Sep 17 00:00:00 2001
+From: Miquel Raynal <miquel.raynal@bootlin.com>
+Date: Thu, 11 Oct 2018 11:12:34 +0200
+Subject: platform-msi: Free descriptors in platform_msi_domain_free()
+
+From: Miquel Raynal <miquel.raynal@bootlin.com>
+
+commit 81b1e6e6a8590a19257e37a1633bec098d499c57 upstream.
+
+Since the addition of platform MSI support, there were two helpers
+supposed to allocate/free IRQs for a device:
+
+    platform_msi_domain_alloc_irqs()
+    platform_msi_domain_free_irqs()
+
+In these helpers, IRQ descriptors are allocated in the "alloc" routine
+while they are freed in the "free" one.
+
+Later, two other helpers have been added to handle IRQ domains on top
+of MSI domains:
+
+    platform_msi_domain_alloc()
+    platform_msi_domain_free()
+
+Seen from the outside, the logic is pretty close with the former
+helpers and people used it with the same logic as before: a
+platform_msi_domain_alloc() call should be balanced with a
+platform_msi_domain_free() call. While this is probably what was
+intended to do, the platform_msi_domain_free() does not remove/free
+the IRQ descriptor(s) created/inserted in
+platform_msi_domain_alloc().
+
+One effect of such situation is that removing a module that requested
+an IRQ will let one orphaned IRQ descriptor (with an allocated MSI
+entry) in the device descriptors list. Next time the module will be
+inserted back, one will observe that the allocation will happen twice
+in the MSI domain, one time for the remaining descriptor, one time for
+the new one. It also has the side effect to quickly overshoot the
+maximum number of allocated MSI and then prevent any module requesting
+an interrupt in the same domain to be inserted anymore.
+
+This situation has been met with loops of insertion/removal of the
+mvpp2.ko module (requesting 15 MSIs each time).
+
+Fixes: 552c494a7666 ("platform-msi: Allow creation of a MSI-based stacked irq domain")
+Cc: stable@vger.kernel.org
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/platform-msi.c |    6 ++++--
+ include/linux/msi.h         |    2 ++
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/base/platform-msi.c
++++ b/drivers/base/platform-msi.c
+@@ -375,14 +375,16 @@ void platform_msi_domain_free(struct irq
+                             unsigned int nvec)
+ {
+       struct platform_msi_priv_data *data = domain->host_data;
+-      struct msi_desc *desc;
+-      for_each_msi_entry(desc, data->dev) {
++      struct msi_desc *desc, *tmp;
++      for_each_msi_entry_safe(desc, tmp, data->dev) {
+               if (WARN_ON(!desc->irq || desc->nvec_used != 1))
+                       return;
+               if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
+                       continue;
+               irq_domain_free_irqs_common(domain, desc->irq, 1);
++              list_del(&desc->list);
++              free_msi_entry(desc);
+       }
+ }
+--- a/include/linux/msi.h
++++ b/include/linux/msi.h
+@@ -108,6 +108,8 @@ struct msi_desc {
+       list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list)
+ #define for_each_msi_entry(desc, dev) \
+       list_for_each_entry((desc), dev_to_msi_list((dev)), list)
++#define for_each_msi_entry_safe(desc, tmp, dev)       \
++      list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list)
+ #ifdef CONFIG_PCI_MSI
+ #define first_pci_msi_entry(pdev)     first_msi_entry(&(pdev)->dev)
diff --git a/queue-4.9/powerpc-tm-set-msr-just-prior-to-recheckpoint.patch b/queue-4.9/powerpc-tm-set-msr-just-prior-to-recheckpoint.patch
new file mode 100644 (file)
index 0000000..17c5af5
--- /dev/null
@@ -0,0 +1,189 @@
+From e1c3743e1a20647c53b719dbf28b48f45d23f2cd Mon Sep 17 00:00:00 2001
+From: Breno Leitao <leitao@debian.org>
+Date: Wed, 21 Nov 2018 17:21:09 -0200
+Subject: powerpc/tm: Set MSR[TS] just prior to recheckpoint
+
+From: Breno Leitao <leitao@debian.org>
+
+commit e1c3743e1a20647c53b719dbf28b48f45d23f2cd upstream.
+
+On a signal handler return, the user could set a context with MSR[TS] bits
+set, and these bits would be copied to task regs->msr.
+
+At restore_tm_sigcontexts(), after current task regs->msr[TS] bits are set,
+several __get_user() are called and then a recheckpoint is executed.
+
+This is a problem since a page fault (in kernel space) could happen when
+calling __get_user(). If it happens, the process MSR[TS] bits were
+already set, but recheckpoint was not executed, and SPRs are still invalid.
+
+The page fault can cause the current process to be de-scheduled, with
+MSR[TS] active and without tm_recheckpoint() being called.  More
+importantly, without TEXASR[FS] bit set also.
+
+Since TEXASR might not have the FS bit set, and when the process is
+scheduled back, it will try to reclaim, which will be aborted because of
+the CPU is not in the suspended state, and, then, recheckpoint. This
+recheckpoint will restore thread->texasr into TEXASR SPR, which might be
+zero, hitting a BUG_ON().
+
+       kernel BUG at /build/linux-sf3Co9/linux-4.9.30/arch/powerpc/kernel/tm.S:434!
+       cpu 0xb: Vector: 700 (Program Check) at [c00000041f1576d0]
+           pc: c000000000054550: restore_gprs+0xb0/0x180
+           lr: 0000000000000000
+           sp: c00000041f157950
+          msr: 8000000100021033
+         current = 0xc00000041f143000
+         paca    = 0xc00000000fb86300   softe: 0        irq_happened: 0x01
+           pid   = 1021, comm = kworker/11:1
+       kernel BUG at /build/linux-sf3Co9/linux-4.9.30/arch/powerpc/kernel/tm.S:434!
+       Linux version 4.9.0-3-powerpc64le (debian-kernel@lists.debian.org) (gcc version 6.3.0 20170516 (Debian 6.3.0-18) ) #1 SMP Debian 4.9.30-2+deb9u2 (2017-06-26)
+       enter ? for help
+       [c00000041f157b30] c00000000001bc3c tm_recheckpoint.part.11+0x6c/0xa0
+       [c00000041f157b70] c00000000001d184 __switch_to+0x1e4/0x4c0
+       [c00000041f157bd0] c00000000082eeb8 __schedule+0x2f8/0x990
+       [c00000041f157cb0] c00000000082f598 schedule+0x48/0xc0
+       [c00000041f157ce0] c0000000000f0d28 worker_thread+0x148/0x610
+       [c00000041f157d80] c0000000000f96b0 kthread+0x120/0x140
+       [c00000041f157e30] c00000000000c0e0 ret_from_kernel_thread+0x5c/0x7c
+
+This patch simply delays the MSR[TS] set, so, if there is any page fault in
+the __get_user() section, it does not have regs->msr[TS] set, since the TM
+structures are still invalid, thus avoiding doing TM operations for
+in-kernel exceptions and possible process reschedule.
+
+With this patch, the MSR[TS] will only be set just before recheckpointing
+and setting TEXASR[FS] = 1, thus avoiding an interrupt with TM registers in
+invalid state.
+
+Other than that, if CONFIG_PREEMPT is set, there might be a preemption just
+after setting MSR[TS] and before tm_recheckpoint(), thus, this block must
+be atomic from a preemption perspective, thus, calling
+preempt_disable/enable() on this code.
+
+It is not possible to move tm_recheckpoint to happen earlier, because it is
+required to get the checkpointed registers from userspace, with
+__get_user(), thus, the only way to avoid this undesired behavior is
+delaying the MSR[TS] set.
+
+The 32-bits signal handler seems to be safe this current issue, but, it
+might be exposed to the preemption issue, thus, disabling preemption in
+this chunk of code.
+
+Changes from v2:
+ * Run the critical section with preempt_disable.
+
+Fixes: 87b4e5393af7 ("powerpc/tm: Fix return of active 64bit signals")
+Cc: stable@vger.kernel.org (v3.9+)
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kernel/signal_32.c |   20 +++++++++++++++++-
+ arch/powerpc/kernel/signal_64.c |   44 +++++++++++++++++++++++++++-------------
+ 2 files changed, 49 insertions(+), 15 deletions(-)
+
+--- a/arch/powerpc/kernel/signal_32.c
++++ b/arch/powerpc/kernel/signal_32.c
+@@ -866,7 +866,23 @@ static long restore_tm_user_regs(struct
+       /* If TM bits are set to the reserved value, it's an invalid context */
+       if (MSR_TM_RESV(msr_hi))
+               return 1;
+-      /* Pull in the MSR TM bits from the user context */
++
++      /*
++       * Disabling preemption, since it is unsafe to be preempted
++       * with MSR[TS] set without recheckpointing.
++       */
++      preempt_disable();
++
++      /*
++       * CAUTION:
++       * After regs->MSR[TS] being updated, make sure that get_user(),
++       * put_user() or similar functions are *not* called. These
++       * functions can generate page faults which will cause the process
++       * to be de-scheduled with MSR[TS] set but without calling
++       * tm_recheckpoint(). This can cause a bug.
++       *
++       * Pull in the MSR TM bits from the user context
++       */
+       regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK);
+       /* Now, recheckpoint.  This loads up all of the checkpointed (older)
+        * registers, including FP and V[S]Rs.  After recheckpointing, the
+@@ -891,6 +907,8 @@ static long restore_tm_user_regs(struct
+       }
+ #endif
++      preempt_enable();
++
+       return 0;
+ }
+ #endif
+--- a/arch/powerpc/kernel/signal_64.c
++++ b/arch/powerpc/kernel/signal_64.c
+@@ -452,20 +452,6 @@ static long restore_tm_sigcontexts(struc
+       if (MSR_TM_RESV(msr))
+               return -EINVAL;
+-      /* pull in MSR TS bits from user context */
+-      regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
+-
+-      /*
+-       * Ensure that TM is enabled in regs->msr before we leave the signal
+-       * handler. It could be the case that (a) user disabled the TM bit
+-       * through the manipulation of the MSR bits in uc_mcontext or (b) the
+-       * TM bit was disabled because a sufficient number of context switches
+-       * happened whilst in the signal handler and load_tm overflowed,
+-       * disabling the TM bit. In either case we can end up with an illegal
+-       * TM state leading to a TM Bad Thing when we return to userspace.
+-       */
+-      regs->msr |= MSR_TM;
+-
+       /* pull in MSR LE from user context */
+       regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+@@ -557,6 +543,34 @@ static long restore_tm_sigcontexts(struc
+       tm_enable();
+       /* Make sure the transaction is marked as failed */
+       tsk->thread.tm_texasr |= TEXASR_FS;
++
++      /*
++       * Disabling preemption, since it is unsafe to be preempted
++       * with MSR[TS] set without recheckpointing.
++       */
++      preempt_disable();
++
++      /* pull in MSR TS bits from user context */
++      regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
++
++      /*
++       * Ensure that TM is enabled in regs->msr before we leave the signal
++       * handler. It could be the case that (a) user disabled the TM bit
++       * through the manipulation of the MSR bits in uc_mcontext or (b) the
++       * TM bit was disabled because a sufficient number of context switches
++       * happened whilst in the signal handler and load_tm overflowed,
++       * disabling the TM bit. In either case we can end up with an illegal
++       * TM state leading to a TM Bad Thing when we return to userspace.
++       *
++       * CAUTION:
++       * After regs->MSR[TS] being updated, make sure that get_user(),
++       * put_user() or similar functions are *not* called. These
++       * functions can generate page faults which will cause the process
++       * to be de-scheduled with MSR[TS] set but without calling
++       * tm_recheckpoint(). This can cause a bug.
++       */
++      regs->msr |= MSR_TM;
++
+       /* This loads the checkpointed FP/VEC state, if used */
+       tm_recheckpoint(&tsk->thread, msr);
+@@ -570,6 +584,8 @@ static long restore_tm_sigcontexts(struc
+               regs->msr |= MSR_VEC;
+       }
++      preempt_enable();
++
+       return err;
+ }
+ #endif
index dd4098557e4229558c2528f8bfd5da50c98a3ccb..fd33f1be64e677257b31908de581cbeab83a3f73 100644 (file)
@@ -41,3 +41,19 @@ staging-wilc1000-fix-missing-read_write-setting-when-reading-data.patch
 qmi_wwan-apply-set_dtr-quirk-to-the-simcom-shared-device-id.patch
 input-elan_i2c-add-acpi-id-for-touchpad-in-asus-aspire-f5-573g.patch
 kvm-x86-use-jmp-to-invoke-kvm_spurious_fault-from-.fixup.patch
+platform-msi-free-descriptors-in-platform_msi_domain_free.patch
+perf-pmu-suppress-potential-format-truncation-warning.patch
+ext4-fix-possible-use-after-free-in-ext4_quota_enable.patch
+ext4-missing-unlock-put_page-in-ext4_try_to_write_inline_data.patch
+ext4-fix-ext4_ioc_group_add-ioctl.patch
+ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
+ext4-force-inode-writes-when-nfsd-calls-commit_metadata.patch
+ext4-check-for-shutdown-and-r-o-file-system-in-ext4_write_inode.patch
+spi-bcm2835-fix-race-on-dma-termination.patch
+spi-bcm2835-fix-book-keeping-of-dma-termination.patch
+spi-bcm2835-avoid-finishing-transfer-prematurely-in-irq-mode.patch
+clk-rockchip-fix-typo-in-rk3188-spdif_frac-parent.patch
+cdc-acm-fix-abnormal-data-rx-issue-for-mediatek-preloader.patch
+btrfs-fix-fsync-of-files-with-multiple-hard-links-in-new-directories.patch
+btrfs-run-delayed-items-before-dropping-the-snapshot.patch
+powerpc-tm-set-msr-just-prior-to-recheckpoint.patch
diff --git a/queue-4.9/spi-bcm2835-avoid-finishing-transfer-prematurely-in-irq-mode.patch b/queue-4.9/spi-bcm2835-avoid-finishing-transfer-prematurely-in-irq-mode.patch
new file mode 100644 (file)
index 0000000..de1346e
--- /dev/null
@@ -0,0 +1,60 @@
+From 56c1723426d3cfd4723bfbfce531d7b38bae6266 Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Thu, 8 Nov 2018 08:06:10 +0100
+Subject: spi: bcm2835: Avoid finishing transfer prematurely in IRQ mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 56c1723426d3cfd4723bfbfce531d7b38bae6266 upstream.
+
+The IRQ handler bcm2835_spi_interrupt() first reads as much as possible
+from the RX FIFO, then writes as much as possible to the TX FIFO.
+Afterwards it decides whether the transfer is finished by checking if
+the TX FIFO is empty.
+
+If very few bytes were written to the TX FIFO, they may already have
+been transmitted by the time the FIFO's emptiness is checked.  As a
+result, the transfer will be declared finished and the chip will be
+reset without reading the corresponding received bytes from the RX FIFO.
+
+The odds of this happening increase with a high clock frequency (such
+that the TX FIFO drains quickly) and either passing "threadirqs" on the
+command line or enabling CONFIG_PREEMPT_RT_BASE (such that the IRQ
+handler may be preempted between filling the TX FIFO and checking its
+emptiness).
+
+Fix by instead checking whether rx_len has reached zero, which means
+that the transfer has been received in full.  This is also more
+efficient as it avoids one bus read access per interrupt.  Note that
+bcm2835_spi_transfer_one_poll() likewise uses rx_len to determine
+whether the transfer has finished.
+
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Fixes: e34ff011c70e ("spi: bcm2835: move to the transfer_one driver model")
+Cc: stable@vger.kernel.org # v4.1+
+Cc: Mathias Duckeck <m.duckeck@kunbus.de>
+Cc: Frank Pavlic <f.pavlic@kunbus.de>
+Cc: Martin Sperl <kernel@martin.sperl.org>
+Cc: Noralf Trønnes <noralf@tronnes.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/spi-bcm2835.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/spi/spi-bcm2835.c
++++ b/drivers/spi/spi-bcm2835.c
+@@ -155,8 +155,7 @@ static irqreturn_t bcm2835_spi_interrupt
+       /* Write as many bytes as possible to FIFO */
+       bcm2835_wr_fifo(bs);
+-      /* based on flags decide if we can finish the transfer */
+-      if (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) {
++      if (!bs->rx_len) {
+               /* Transfer complete - reset SPI HW */
+               bcm2835_spi_reset_hw(master);
+               /* wake up the framework */
diff --git a/queue-4.9/spi-bcm2835-fix-book-keeping-of-dma-termination.patch b/queue-4.9/spi-bcm2835-fix-book-keeping-of-dma-termination.patch
new file mode 100644 (file)
index 0000000..25145cb
--- /dev/null
@@ -0,0 +1,45 @@
+From dbc944115eed48af110646992893dc43321368d8 Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Thu, 8 Nov 2018 08:06:10 +0100
+Subject: spi: bcm2835: Fix book-keeping of DMA termination
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit dbc944115eed48af110646992893dc43321368d8 upstream.
+
+If submission of a DMA TX transfer succeeds but submission of the
+corresponding RX transfer does not, the BCM2835 SPI driver terminates
+the TX transfer but neglects to reset the dma_pending flag to false.
+
+Thus, if the next transfer uses interrupt mode (because it is shorter
+than BCM2835_SPI_DMA_MIN_LENGTH) and runs into a timeout,
+dmaengine_terminate_all() will be called both for TX (once more) and
+for RX (which was never started in the first place).  Fix it.
+
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Fixes: 3ecd37edaa2a ("spi: bcm2835: enable dma modes for transfers meeting certain conditions")
+Cc: stable@vger.kernel.org # v4.2+
+Cc: Mathias Duckeck <m.duckeck@kunbus.de>
+Cc: Frank Pavlic <f.pavlic@kunbus.de>
+Cc: Martin Sperl <kernel@martin.sperl.org>
+Cc: Noralf Trønnes <noralf@tronnes.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/spi-bcm2835.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/spi/spi-bcm2835.c
++++ b/drivers/spi/spi-bcm2835.c
+@@ -341,6 +341,7 @@ static int bcm2835_spi_transfer_one_dma(
+       if (ret) {
+               /* need to reset on errors */
+               dmaengine_terminate_all(master->dma_tx);
++              bs->dma_pending = false;
+               bcm2835_spi_reset_hw(master);
+               return ret;
+       }
diff --git a/queue-4.9/spi-bcm2835-fix-race-on-dma-termination.patch b/queue-4.9/spi-bcm2835-fix-race-on-dma-termination.patch
new file mode 100644 (file)
index 0000000..647b30f
--- /dev/null
@@ -0,0 +1,62 @@
+From e82b0b3828451c1cd331d9f304c6078fcd43b62e Mon Sep 17 00:00:00 2001
+From: Lukas Wunner <lukas@wunner.de>
+Date: Thu, 8 Nov 2018 08:06:10 +0100
+Subject: spi: bcm2835: Fix race on DMA termination
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit e82b0b3828451c1cd331d9f304c6078fcd43b62e upstream.
+
+If a DMA transfer finishes orderly right when spi_transfer_one_message()
+determines that it has timed out, the callbacks bcm2835_spi_dma_done()
+and bcm2835_spi_handle_err() race to call dmaengine_terminate_all(),
+potentially leading to double termination.
+
+Prevent by atomically changing the dma_pending flag before calling
+dmaengine_terminate_all().
+
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Fixes: 3ecd37edaa2a ("spi: bcm2835: enable dma modes for transfers meeting certain conditions")
+Cc: stable@vger.kernel.org # v4.2+
+Cc: Mathias Duckeck <m.duckeck@kunbus.de>
+Cc: Frank Pavlic <f.pavlic@kunbus.de>
+Cc: Martin Sperl <kernel@martin.sperl.org>
+Cc: Noralf Trønnes <noralf@tronnes.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/spi-bcm2835.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/drivers/spi/spi-bcm2835.c
++++ b/drivers/spi/spi-bcm2835.c
+@@ -233,10 +233,9 @@ static void bcm2835_spi_dma_done(void *d
+        * is called the tx-dma must have finished - can't get to this
+        * situation otherwise...
+        */
+-      dmaengine_terminate_all(master->dma_tx);
+-
+-      /* mark as no longer pending */
+-      bs->dma_pending = 0;
++      if (cmpxchg(&bs->dma_pending, true, false)) {
++              dmaengine_terminate_all(master->dma_tx);
++      }
+       /* and mark as completed */;
+       complete(&master->xfer_completion);
+@@ -617,10 +616,9 @@ static void bcm2835_spi_handle_err(struc
+       struct bcm2835_spi *bs = spi_master_get_devdata(master);
+       /* if an error occurred and we have an active dma, then terminate */
+-      if (bs->dma_pending) {
++      if (cmpxchg(&bs->dma_pending, true, false)) {
+               dmaengine_terminate_all(master->dma_tx);
+               dmaengine_terminate_all(master->dma_rx);
+-              bs->dma_pending = 0;
+       }
+       /* and reset */
+       bcm2835_spi_reset_hw(master);