5.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 9 Feb 2020 12:27:43 +0000 (13:27 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 9 Feb 2020 12:27:43 +0000 (13:27 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 9 Feb 2020 12:27:43 +0000 (13:27 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 9 Feb 2020 12:27:43 +0000 (13:27 +0100)
diff --git a/queue-5.4/aio-prevent-potential-eventfd-recursion-on-poll.patch b/queue-5.4/aio-prevent-potential-eventfd-recursion-on-poll.patch

new file mode 100644 (file)

index 0000000..695f9ae
--- /dev/null
+++ b/queue-5.4/aio-prevent-potential-eventfd-recursion-on-poll.patch
@@ -0,0 +1,70 @@
+From 01d7a356872eec22ef34a33a5f9cfa917d145468 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 3 Feb 2020 10:33:42 -0700
+Subject: aio: prevent potential eventfd recursion on poll
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 01d7a356872eec22ef34a33a5f9cfa917d145468 upstream.
+
+If we have nested or circular eventfd wakeups, then we can deadlock if
+we run them inline from our poll waitqueue wakeup handler. It's also
+possible to have very long chains of notifications, to the extent where
+we could risk blowing the stack.
+
+Check the eventfd recursion count before calling eventfd_signal(). If
+it's non-zero, then punt the signaling to async context. This is always
+safe, as it takes us out-of-line in terms of stack and locking context.
+
+Cc: stable@vger.kernel.org # 4.19+
+Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/aio.c |   20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+--- a/fs/aio.c
++++ b/fs/aio.c
+@@ -1610,6 +1610,14 @@ static int aio_fsync(struct fsync_iocb *
+       return 0;
+ }
+ 
++static void aio_poll_put_work(struct work_struct *work)
++{
++      struct poll_iocb *req = container_of(work, struct poll_iocb, work);
++      struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
++
++      iocb_put(iocb);
++}
++
+ static void aio_poll_complete_work(struct work_struct *work)
+ {
+       struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+@@ -1674,6 +1682,8 @@ static int aio_poll_wake(struct wait_que
+       list_del_init(&req->wait.entry);
+ 
+       if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
++              struct kioctx *ctx = iocb->ki_ctx;
++
+               /*
+                * Try to complete the iocb inline if we can. Use
+                * irqsave/irqrestore because not all filesystems (e.g. fuse)
+@@ -1683,8 +1693,14 @@ static int aio_poll_wake(struct wait_que
+               list_del(&iocb->ki_list);
+               iocb->ki_res.res = mangle_poll(mask);
+               req->done = true;
+-              spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
+-              iocb_put(iocb);
++              if (iocb->ki_eventfd && eventfd_signal_count()) {
++                      iocb = NULL;
++                      INIT_WORK(&req->work, aio_poll_put_work);
++                      schedule_work(&req->work);
++              }
++              spin_unlock_irqrestore(&ctx->ctx_lock, flags);
++              if (iocb)
++                      iocb_put(iocb);
+       } else {
+               schedule_work(&req->work);
+       }
diff --git a/queue-5.4/arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch b/queue-5.4/arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch

new file mode 100644 (file)

index 0000000..b856c3a
--- /dev/null
+++ b/queue-5.4/arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch
@@ -0,0 +1,70 @@
+From 1a3388d506bf5b45bb283e6a4c4706cfb4897333 Mon Sep 17 00:00:00 2001
+From: Stephen Warren <swarren@nvidia.com>
+Date: Thu, 3 Oct 2019 14:50:31 -0600
+Subject: ARM: tegra: Enable PLLP bypass during Tegra124 LP1
+
+From: Stephen Warren <swarren@nvidia.com>
+
+commit 1a3388d506bf5b45bb283e6a4c4706cfb4897333 upstream.
+
+For a little over a year, U-Boot has configured the flow controller to
+perform automatic RAM re-repair on off->on power transitions of the CPU
+rail[1]. This is mandatory for correct operation of Tegra124. However,
+RAM re-repair relies on certain clocks, which the kernel must enable and
+leave running. PLLP is one of those clocks. This clock is shut down
+during LP1 in order to save power. Enable bypass (which I believe routes
+osc_div_clk, essentially the crystal clock, to the PLL output) so that
+this clock signal toggles even though the PLL is not active. This is
+required so that LP1 power mode (system suspend) operates correctly.
+
+The bypass configuration must then be undone when resuming from LP1, so
+that all peripheral clocks run at the expected rate. Without this, many
+peripherals won't work correctly; for example, the UART baud rate would
+be incorrect.
+
+NVIDIA's downstream kernel code only does this if not compiled for
+Tegra30, so the added code is made conditional upon the chip ID.
+NVIDIA's downstream code makes this change conditional upon the active
+CPU cluster. The upstream kernel currently doesn't support cluster
+switching, so this patch doesn't test the active CPU cluster ID.
+
+[1] 3cc7942a4ae5 ARM: tegra: implement RAM repair
+
+Reported-by: Jonathan Hunter <jonathanh@nvidia.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Stephen Warren <swarren@nvidia.com>
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/mach-tegra/sleep-tegra30.S |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/arch/arm/mach-tegra/sleep-tegra30.S
++++ b/arch/arm/mach-tegra/sleep-tegra30.S
+@@ -370,6 +370,14 @@ _pll_m_c_x_done:
+       pll_locked r1, r0, CLK_RESET_PLLC_BASE
+       pll_locked r1, r0, CLK_RESET_PLLX_BASE
+ 
++      tegra_get_soc_id TEGRA_APB_MISC_BASE, r1
++      cmp     r1, #TEGRA30
++      beq     1f
++      ldr     r1, [r0, #CLK_RESET_PLLP_BASE]
++      bic     r1, r1, #(1<<31)        @ disable PllP bypass
++      str     r1, [r0, #CLK_RESET_PLLP_BASE]
++1:
++
+       mov32   r7, TEGRA_TMRUS_BASE
+       ldr     r1, [r7]
+       add     r1, r1, #LOCK_DELAY
+@@ -630,7 +638,10 @@ tegra30_switch_cpu_to_clk32k:
+       str     r0, [r4, #PMC_PLLP_WB0_OVERRIDE]
+ 
+       /* disable PLLP, PLLA, PLLC and PLLX */
++      tegra_get_soc_id TEGRA_APB_MISC_BASE, r1
++      cmp     r1, #TEGRA30
+       ldr     r0, [r5, #CLK_RESET_PLLP_BASE]
++      orrne   r0, r0, #(1 << 31)      @ enable PllP bypass on fast cluster
+       bic     r0, r0, #(1 << 30)
+       str     r0, [r5, #CLK_RESET_PLLP_BASE]
+       ldr     r0, [r5, #CLK_RESET_PLLA_BASE]
diff --git a/queue-5.4/bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch b/queue-5.4/bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch

new file mode 100644 (file)

index 0000000..739f3d9
--- /dev/null
+++ b/queue-5.4/bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch
@@ -0,0 +1,139 @@
+From 038ba8cc1bffc51250add4a9b9249d4331576d8f Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Sat, 1 Feb 2020 22:42:33 +0800
+Subject: bcache: add readahead cache policy options via sysfs interface
+
+From: Coly Li <colyli@suse.de>
+
+commit 038ba8cc1bffc51250add4a9b9249d4331576d8f upstream.
+
+In year 2007 high performance SSD was still expensive, in order to
+save more space for real workload or meta data, the readahead I/Os
+for non-meta data was bypassed and not cached on SSD.
+
+In now days, SSD price drops a lot and people can find larger size
+SSD with more comfortable price. It is unncessary to alway bypass
+normal readahead I/Os to save SSD space for now.
+
+This patch adds options for readahead data cache policies via sysfs
+file /sys/block/bcache<N>/readahead_cache_policy, the options are,
+- "all": cache all readahead data I/Os.
+- "meta-only": only cache meta data, and bypass other regular I/Os.
+
+If users want to make bcache continue to only cache readahead request
+for metadata and bypass regular data readahead, please set "meta-only"
+to this sysfs file. By default, bcache will back to cache all read-
+ahead requests now.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Coly Li <colyli@suse.de>
+Acked-by: Eric Wheeler <bcache@linux.ewheeler.net>
+Cc: Michael Lyle <mlyle@lyle.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/bcache/bcache.h  |    3 +++
+ drivers/md/bcache/request.c |   17 ++++++++++++-----
+ drivers/md/bcache/sysfs.c   |   22 ++++++++++++++++++++++
+ 3 files changed, 37 insertions(+), 5 deletions(-)
+
+--- a/drivers/md/bcache/bcache.h
++++ b/drivers/md/bcache/bcache.h
+@@ -329,6 +329,9 @@ struct cached_dev {
+        */
+       atomic_t                has_dirty;
+ 
++#define BCH_CACHE_READA_ALL           0
++#define BCH_CACHE_READA_META_ONLY     1
++      unsigned int            cache_readahead_policy;
+       struct bch_ratelimit    writeback_rate;
+       struct delayed_work     writeback_rate_update;
+ 
+--- a/drivers/md/bcache/request.c
++++ b/drivers/md/bcache/request.c
+@@ -391,13 +391,20 @@ static bool check_should_bypass(struct c
+               goto skip;
+ 
+       /*
+-       * Flag for bypass if the IO is for read-ahead or background,
+-       * unless the read-ahead request is for metadata
++       * If the bio is for read-ahead or background IO, bypass it or
++       * not depends on the following situations,
++       * - If the IO is for meta data, always cache it and no bypass
++       * - If the IO is not meta data, check dc->cache_reada_policy,
++       *      BCH_CACHE_READA_ALL: cache it and not bypass
++       *      BCH_CACHE_READA_META_ONLY: not cache it and bypass
++       * That is, read-ahead request for metadata always get cached
+        * (eg, for gfs2 or xfs).
+        */
+-      if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
+-          !(bio->bi_opf & (REQ_META|REQ_PRIO)))
+-              goto skip;
++      if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
++              if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
++                  (dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
++                      goto skip;
++      }
+ 
+       if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
+           bio_sectors(bio) & (c->sb.block_size - 1)) {
+--- a/drivers/md/bcache/sysfs.c
++++ b/drivers/md/bcache/sysfs.c
+@@ -27,6 +27,12 @@ static const char * const bch_cache_mode
+       NULL
+ };
+ 
++static const char * const bch_reada_cache_policies[] = {
++      "all",
++      "meta-only",
++      NULL
++};
++
+ /* Default is 0 ("auto") */
+ static const char * const bch_stop_on_failure_modes[] = {
+       "auto",
+@@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_u
+ rw_attribute(sequential_cutoff);
+ rw_attribute(data_csum);
+ rw_attribute(cache_mode);
++rw_attribute(readahead_cache_policy);
+ rw_attribute(stop_when_cache_set_failed);
+ rw_attribute(writeback_metadata);
+ rw_attribute(writeback_running);
+@@ -167,6 +174,11 @@ SHOW(__bch_cached_dev)
+                                              bch_cache_modes,
+                                              BDEV_CACHE_MODE(&dc->sb));
+ 
++      if (attr == &sysfs_readahead_cache_policy)
++              return bch_snprint_string_list(buf, PAGE_SIZE,
++                                            bch_reada_cache_policies,
++                                            dc->cache_readahead_policy);
++
+       if (attr == &sysfs_stop_when_cache_set_failed)
+               return bch_snprint_string_list(buf, PAGE_SIZE,
+                                              bch_stop_on_failure_modes,
+@@ -352,6 +364,15 @@ STORE(__cached_dev)
+               }
+       }
+ 
++      if (attr == &sysfs_readahead_cache_policy) {
++              v = __sysfs_match_string(bch_reada_cache_policies, -1, buf);
++              if (v < 0)
++                      return v;
++
++              if ((unsigned int) v != dc->cache_readahead_policy)
++                      dc->cache_readahead_policy = v;
++      }
++
+       if (attr == &sysfs_stop_when_cache_set_failed) {
+               v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
+               if (v < 0)
+@@ -466,6 +487,7 @@ static struct attribute *bch_cached_dev_
+       &sysfs_data_csum,
+ #endif
+       &sysfs_cache_mode,
++      &sysfs_readahead_cache_policy,
+       &sysfs_stop_when_cache_set_failed,
+       &sysfs_writeback_metadata,
+       &sysfs_writeback_running,
diff --git a/queue-5.4/btrfs-correctly-handle-empty-trees-in-find_first_clear_extent_bit.patch b/queue-5.4/btrfs-correctly-handle-empty-trees-in-find_first_clear_extent_bit.patch

new file mode 100644 (file)

index 0000000..c32534b
--- /dev/null
+++ b/queue-5.4/btrfs-correctly-handle-empty-trees-in-find_first_clear_extent_bit.patch
@@ -0,0 +1,122 @@
+From 5750c37523a2c8cbb450b9ef31e21c2ba876b05e Mon Sep 17 00:00:00 2001
+From: Nikolay Borisov <nborisov@suse.com>
+Date: Mon, 27 Jan 2020 11:59:26 +0200
+Subject: btrfs: Correctly handle empty trees in find_first_clear_extent_bit
+
+From: Nikolay Borisov <nborisov@suse.com>
+
+commit 5750c37523a2c8cbb450b9ef31e21c2ba876b05e upstream.
+
+Raviu reported that running his regular fs_trim segfaulted with the
+following backtrace:
+
+[  237.525947] assertion failed: prev, in ../fs/btrfs/extent_io.c:1595
+[  237.525984] ------------[ cut here ]------------
+[  237.525985] kernel BUG at ../fs/btrfs/ctree.h:3117!
+[  237.525992] invalid opcode: 0000 [#1] SMP PTI
+[  237.525998] CPU: 4 PID: 4423 Comm: fstrim Tainted: G     U     OE     5.4.14-8-vanilla #1
+[  237.526001] Hardware name: ASUSTeK COMPUTER INC.
+[  237.526044] RIP: 0010:assfail.constprop.58+0x18/0x1a [btrfs]
+[  237.526079] Call Trace:
+[  237.526120]  find_first_clear_extent_bit+0x13d/0x150 [btrfs]
+[  237.526148]  btrfs_trim_fs+0x211/0x3f0 [btrfs]
+[  237.526184]  btrfs_ioctl_fitrim+0x103/0x170 [btrfs]
+[  237.526219]  btrfs_ioctl+0x129a/0x2ed0 [btrfs]
+[  237.526227]  ? filemap_map_pages+0x190/0x3d0
+[  237.526232]  ? do_filp_open+0xaf/0x110
+[  237.526238]  ? _copy_to_user+0x22/0x30
+[  237.526242]  ? cp_new_stat+0x150/0x180
+[  237.526247]  ? do_vfs_ioctl+0xa4/0x640
+[  237.526278]  ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs]
+[  237.526283]  do_vfs_ioctl+0xa4/0x640
+[  237.526288]  ? __do_sys_newfstat+0x3c/0x60
+[  237.526292]  ksys_ioctl+0x70/0x80
+[  237.526297]  __x64_sys_ioctl+0x16/0x20
+[  237.526303]  do_syscall_64+0x5a/0x1c0
+[  237.526310]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+That was due to btrfs_fs_device::aloc_tree being empty. Initially I
+thought this wasn't possible and as a percaution have put the assert in
+find_first_clear_extent_bit. Turns out this is indeed possible and could
+happen when a file system with SINGLE data/metadata profile has a 2nd
+device added. Until balance is run or a new chunk is allocated on this
+device it will be completely empty.
+
+In this case find_first_clear_extent_bit should return the full range
+[0, -1ULL] and let the caller handle this i.e for trim the end will be
+capped at the size of actual device.
+
+Link: https://lore.kernel.org/linux-btrfs/izW2WNyvy1dEDweBICizKnd2KDwDiDyY2EYQr4YCwk7pkuIpthx-JRn65MPBde00ND6V0_Lh8mW0kZwzDiLDv25pUYWxkskWNJnVP0kgdMA=@protonmail.com/
+Fixes: 45bfcfc168f8 ("btrfs: Implement find_first_clear_extent_bit")
+CC: stable@vger.kernel.org # 5.2+
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent_io.c             |   32 ++++++++++++++++++--------------
+ fs/btrfs/tests/extent-io-tests.c |    9 +++++++++
+ 2 files changed, 27 insertions(+), 14 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1583,21 +1583,25 @@ void find_first_clear_extent_bit(struct
+       /* Find first extent with bits cleared */
+       while (1) {
+               node = __etree_search(tree, start, &next, &prev, NULL, NULL);
+-              if (!node) {
++              if (!node && !next && !prev) {
++                      /*
++                       * Tree is completely empty, send full range and let
++                       * caller deal with it
++                       */
++                      *start_ret = 0;
++                      *end_ret = -1;
++                      goto out;
++              } else if (!node && !next) {
++                      /*
++                       * We are past the last allocated chunk, set start at
++                       * the end of the last extent.
++                       */
++                      state = rb_entry(prev, struct extent_state, rb_node);
++                      *start_ret = state->end + 1;
++                      *end_ret = -1;
++                      goto out;
++              } else if (!node) {
+                       node = next;
+-                      if (!node) {
+-                              /*
+-                               * We are past the last allocated chunk,
+-                               * set start at the end of the last extent. The
+-                               * device alloc tree should never be empty so
+-                               * prev is always set.
+-                               */
+-                              ASSERT(prev);
+-                              state = rb_entry(prev, struct extent_state, rb_node);
+-                              *start_ret = state->end + 1;
+-                              *end_ret = -1;
+-                              goto out;
+-                      }
+               }
+               /*
+                * At this point 'node' either contains 'start' or start is
+--- a/fs/btrfs/tests/extent-io-tests.c
++++ b/fs/btrfs/tests/extent-io-tests.c
+@@ -441,8 +441,17 @@ static int test_find_first_clear_extent_
+       int ret = -EINVAL;
+ 
+       test_msg("running find_first_clear_extent_bit test");
++
+       extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL);
+ 
++      /* Test correct handling of empty tree */
++      find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED);
++      if (start != 0 || end != -1) {
++              test_err(
++      "error getting a range from completely empty tree: start %llu end %llu",
++                       start, end);
++              goto out;
++      }
+       /*
+        * Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
+        * 4M-32M
diff --git a/queue-5.4/btrfs-drop-log-root-for-dropped-roots.patch b/queue-5.4/btrfs-drop-log-root-for-dropped-roots.patch

new file mode 100644 (file)

index 0000000..cd32b37
--- /dev/null
+++ b/queue-5.4/btrfs-drop-log-root-for-dropped-roots.patch
@@ -0,0 +1,86 @@
+From 889bfa39086e86b52fcfaa04d72c95eaeb12f9a5 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 17 Jan 2020 09:12:45 -0500
+Subject: btrfs: drop log root for dropped roots
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 889bfa39086e86b52fcfaa04d72c95eaeb12f9a5 upstream.
+
+If we fsync on a subvolume and create a log root for that volume, and
+then later delete that subvolume we'll never clean up its log root.  Fix
+this by making switch_commit_roots free the log for any dropped roots we
+encounter.  The extra churn is because we need a btrfs_trans_handle, not
+the btrfs_transaction.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/transaction.c |   22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -77,13 +77,14 @@ void btrfs_put_transaction(struct btrfs_
+       }
+ }
+ 
+-static noinline void switch_commit_roots(struct btrfs_transaction *trans)
++static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
+ {
++      struct btrfs_transaction *cur_trans = trans->transaction;
+       struct btrfs_fs_info *fs_info = trans->fs_info;
+       struct btrfs_root *root, *tmp;
+ 
+       down_write(&fs_info->commit_root_sem);
+-      list_for_each_entry_safe(root, tmp, &trans->switch_commits,
++      list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
+                                dirty_list) {
+               list_del_init(&root->dirty_list);
+               free_extent_buffer(root->commit_root);
+@@ -95,16 +96,17 @@ static noinline void switch_commit_roots
+       }
+ 
+       /* We can free old roots now. */
+-      spin_lock(&trans->dropped_roots_lock);
+-      while (!list_empty(&trans->dropped_roots)) {
+-              root = list_first_entry(&trans->dropped_roots,
++      spin_lock(&cur_trans->dropped_roots_lock);
++      while (!list_empty(&cur_trans->dropped_roots)) {
++              root = list_first_entry(&cur_trans->dropped_roots,
+                                       struct btrfs_root, root_list);
+               list_del_init(&root->root_list);
+-              spin_unlock(&trans->dropped_roots_lock);
++              spin_unlock(&cur_trans->dropped_roots_lock);
++              btrfs_free_log(trans, root);
+               btrfs_drop_and_free_fs_root(fs_info, root);
+-              spin_lock(&trans->dropped_roots_lock);
++              spin_lock(&cur_trans->dropped_roots_lock);
+       }
+-      spin_unlock(&trans->dropped_roots_lock);
++      spin_unlock(&cur_trans->dropped_roots_lock);
+       up_write(&fs_info->commit_root_sem);
+ }
+ 
+@@ -1359,7 +1361,7 @@ static int qgroup_account_snapshot(struc
+       ret = commit_cowonly_roots(trans);
+       if (ret)
+               goto out;
+-      switch_commit_roots(trans->transaction);
++      switch_commit_roots(trans);
+       ret = btrfs_write_and_wait_transaction(trans);
+       if (ret)
+               btrfs_handle_fs_error(fs_info, ret,
+@@ -2245,7 +2247,7 @@ int btrfs_commit_transaction(struct btrf
+       list_add_tail(&fs_info->chunk_root->dirty_list,
+                     &cur_trans->switch_commits);
+ 
+-      switch_commit_roots(cur_trans);
++      switch_commit_roots(trans);
+ 
+       ASSERT(list_empty(&cur_trans->dirty_bgs));
+       ASSERT(list_empty(&cur_trans->io_bgs));
diff --git a/queue-5.4/btrfs-fix-infinite-loop-during-fsync-after-rename-operations.patch b/queue-5.4/btrfs-fix-infinite-loop-during-fsync-after-rename-operations.patch

new file mode 100644 (file)

index 0000000..9c05feb
--- /dev/null
+++ b/queue-5.4/btrfs-fix-infinite-loop-during-fsync-after-rename-operations.patch
@@ -0,0 +1,140 @@
+From b5e4ff9d465da1233a2d9a47ebce487c70d8f4ab Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 15 Jan 2020 13:21:35 +0000
+Subject: Btrfs: fix infinite loop during fsync after rename operations
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit b5e4ff9d465da1233a2d9a47ebce487c70d8f4ab upstream.
+
+Recently fsstress (from fstests) sporadically started to trigger an
+infinite loop during fsync operations. This turned out to be because
+support for the rename exchange and whiteout operations was added to
+fsstress in fstests. These operations, unlike any others in fsstress,
+cause file names to be reused, whence triggering this issue. However
+it's not necessary to use rename exchange and rename whiteout operations
+trigger this issue, simple rename operations and file creations are
+enough to trigger the issue.
+
+The issue boils down to when we are logging inodes that conflict (that
+had the name of any inode we need to log during the fsync operation), we
+keep logging them even if they were already logged before, and after
+that we check if there's any other inode that conflicts with them and
+then add it again to the list of inodes to log. Skipping already logged
+inodes fixes the issue.
+
+Consider the following example:
+
+  $ mkfs.btrfs -f /dev/sdb
+  $ mount /dev/sdb /mnt
+
+  $ mkdir /mnt/testdir                           # inode 257
+
+  $ touch /mnt/testdir/zz                        # inode 258
+  $ ln /mnt/testdir/zz /mnt/testdir/zz_link
+
+  $ touch /mnt/testdir/a                         # inode 259
+
+  $ sync
+
+  # The following 3 renames achieve the same result as a rename exchange
+  # operation (<rename_exchange> /mnt/testdir/zz_link to /mnt/testdir/a).
+
+  $ mv /mnt/testdir/a /mnt/testdir/a/tmp
+  $ mv /mnt/testdir/zz_link /mnt/testdir/a
+  $ mv /mnt/testdir/a/tmp /mnt/testdir/zz_link
+
+  # The following rename and file creation give the same result as a
+  # rename whiteout operation (<rename_whiteout> zz to a2).
+
+  $ mv /mnt/testdir/zz /mnt/testdir/a2
+  $ touch /mnt/testdir/zz                        # inode 260
+
+  $ xfs_io -c fsync /mnt/testdir/zz
+    --> results in the infinite loop
+
+The following steps happen:
+
+1) When logging inode 260, we find that its reference named "zz" was
+   used by inode 258 in the previous transaction (through the commit
+   root), so inode 258 is added to the list of conflicting indoes that
+   need to be logged;
+
+2) After logging inode 258, we find that its reference named "a" was
+   used by inode 259 in the previous transaction, and therefore we add
+   inode 259 to the list of conflicting inodes to be logged;
+
+3) After logging inode 259, we find that its reference named "zz_link"
+   was used by inode 258 in the previous transaction - we add inode 258
+   to the list of conflicting inodes to log, again - we had already
+   logged it before at step 3. After logging it again, we find again
+   that inode 259 conflicts with him, and we add again 259 to the list,
+   etc - we end up repeating all the previous steps.
+
+So fix this by skipping logging of conflicting inodes that were already
+logged.
+
+Fixes: 6b5fc433a7ad67 ("Btrfs: fix fsync after succession of renames of different files")
+CC: stable@vger.kernel.org # 5.1+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c |   44 ++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 44 insertions(+)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4855,6 +4855,50 @@ static int log_conflicting_inodes(struct
+                       continue;
+               }
+               /*
++               * If the inode was already logged skip it - otherwise we can
++               * hit an infinite loop. Example:
++               *
++               * From the commit root (previous transaction) we have the
++               * following inodes:
++               *
++               * inode 257 a directory
++               * inode 258 with references "zz" and "zz_link" on inode 257
++               * inode 259 with reference "a" on inode 257
++               *
++               * And in the current (uncommitted) transaction we have:
++               *
++               * inode 257 a directory, unchanged
++               * inode 258 with references "a" and "a2" on inode 257
++               * inode 259 with reference "zz_link" on inode 257
++               * inode 261 with reference "zz" on inode 257
++               *
++               * When logging inode 261 the following infinite loop could
++               * happen if we don't skip already logged inodes:
++               *
++               * - we detect inode 258 as a conflicting inode, with inode 261
++               *   on reference "zz", and log it;
++               *
++               * - we detect inode 259 as a conflicting inode, with inode 258
++               *   on reference "a", and log it;
++               *
++               * - we detect inode 258 as a conflicting inode, with inode 259
++               *   on reference "zz_link", and log it - again! After this we
++               *   repeat the above steps forever.
++               */
++              spin_lock(&BTRFS_I(inode)->lock);
++              /*
++               * Check the inode's logged_trans only instead of
++               * btrfs_inode_in_log(). This is because the last_log_commit of
++               * the inode is not updated when we only log that it exists and
++               * and it has the full sync bit set (see btrfs_log_inode()).
++               */
++              if (BTRFS_I(inode)->logged_trans == trans->transid) {
++                      spin_unlock(&BTRFS_I(inode)->lock);
++                      btrfs_add_delayed_iput(inode);
++                      continue;
++              }
++              spin_unlock(&BTRFS_I(inode)->lock);
++              /*
+                * We are safe logging the other inode without acquiring its
+                * lock as long as we log with the LOG_INODE_EXISTS mode. We
+                * are safe against concurrent renames of the other inode as
diff --git a/queue-5.4/btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch b/queue-5.4/btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch

new file mode 100644 (file)

index 0000000..a1416ff
--- /dev/null
+++ b/queue-5.4/btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch
@@ -0,0 +1,693 @@
+From 0e56315ca147b3e60c7bf240233a301d3c7fb508 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 19 Nov 2019 12:07:33 +0000
+Subject: Btrfs: fix missing hole after hole punching and fsync when using NO_HOLES
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 0e56315ca147b3e60c7bf240233a301d3c7fb508 upstream.
+
+When using the NO_HOLES feature, if we punch a hole into a file and then
+fsync it, there are cases where a subsequent fsync will miss the fact that
+a hole was punched, resulting in the holes not existing after replaying
+the log tree.
+
+Essentially these cases all imply that, tree-log.c:copy_items(), is not
+invoked for the leafs that delimit holes, because nothing changed those
+leafs in the current transaction. And it's precisely copy_items() where
+we currenly detect and log holes, which works as long as the holes are
+between file extent items in the input leaf or between the beginning of
+input leaf and the previous leaf or between the last item in the leaf
+and the next leaf.
+
+First example where we miss a hole:
+
+  *) The extent items of the inode span multiple leafs;
+
+  *) The punched hole covers a range that affects only the extent items of
+     the first leaf;
+
+  *) The fsync operation is done in full mode (BTRFS_INODE_NEEDS_FULL_SYNC
+     is set in the inode's runtime flags).
+
+  That results in the hole not existing after replaying the log tree.
+
+  For example, if the fs/subvolume tree has the following layout for a
+  particular inode:
+
+      Leaf N, generation 10:
+
+      [ ... INODE_ITEM INODE_REF EXTENT_ITEM (0 64K) EXTENT_ITEM (64K 128K) ]
+
+      Leaf N + 1, generation 10:
+
+      [ EXTENT_ITEM (128K 64K) ... ]
+
+  If at transaction 11 we punch a hole coverting the range [0, 128K[, we end
+  up dropping the two extent items from leaf N, but we don't touch the other
+  leaf, so we end up in the following state:
+
+      Leaf N, generation 11:
+
+      [ ... INODE_ITEM INODE_REF ]
+
+      Leaf N + 1, generation 10:
+
+      [ EXTENT_ITEM (128K 64K) ... ]
+
+  A full fsync after punching the hole will only process leaf N because it
+  was modified in the current transaction, but not leaf N + 1, since it
+  was not modified in the current transaction (generation 10 and not 11).
+  As a result the fsync will not log any holes, because it didn't process
+  any leaf with extent items.
+
+Second example where we will miss a hole:
+
+  *) An inode as its items spanning 5 (or more) leafs;
+
+  *) A hole is punched and it covers only the extents items of the 3rd
+     leaf. This resulsts in deleting the entire leaf and not touching any
+     of the other leafs.
+
+  So the only leaf that is modified in the current transaction, when
+  punching the hole, is the first leaf, which contains the inode item.
+  During the full fsync, the only leaf that is passed to copy_items()
+  is that first leaf, and that's not enough for the hole detection
+  code in copy_items() to determine there's a hole between the last
+  file extent item in the 2nd leaf and the first file extent item in
+  the 3rd leaf (which was the 4th leaf before punching the hole).
+
+Fix this by scanning all leafs and punch holes as necessary when doing a
+full fsync (less common than a non-full fsync) when the NO_HOLES feature
+is enabled. The lack of explicit file extent items to mark holes makes it
+necessary to scan existing extents to determine if holes exist.
+
+A test case for fstests follows soon.
+
+Fixes: 16e7549f045d33 ("Btrfs: incompatible format change to remove hole extents")
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c |  388 +++++++++++++---------------------------------------
+ 1 file changed, 100 insertions(+), 288 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -3953,7 +3953,7 @@ static int log_csums(struct btrfs_trans_
+ static noinline int copy_items(struct btrfs_trans_handle *trans,
+                              struct btrfs_inode *inode,
+                              struct btrfs_path *dst_path,
+-                             struct btrfs_path *src_path, u64 *last_extent,
++                             struct btrfs_path *src_path,
+                              int start_slot, int nr, int inode_only,
+                              u64 logged_isize)
+ {
+@@ -3964,7 +3964,6 @@ static noinline int copy_items(struct bt
+       struct btrfs_file_extent_item *extent;
+       struct btrfs_inode_item *inode_item;
+       struct extent_buffer *src = src_path->nodes[0];
+-      struct btrfs_key first_key, last_key, key;
+       int ret;
+       struct btrfs_key *ins_keys;
+       u32 *ins_sizes;
+@@ -3972,9 +3971,6 @@ static noinline int copy_items(struct bt
+       int i;
+       struct list_head ordered_sums;
+       int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
+-      bool has_extents = false;
+-      bool need_find_last_extent = true;
+-      bool done = false;
+ 
+       INIT_LIST_HEAD(&ordered_sums);
+ 
+@@ -3983,8 +3979,6 @@ static noinline int copy_items(struct bt
+       if (!ins_data)
+               return -ENOMEM;
+ 
+-      first_key.objectid = (u64)-1;
+-
+       ins_sizes = (u32 *)ins_data;
+       ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
+ 
+@@ -4005,9 +3999,6 @@ static noinline int copy_items(struct bt
+ 
+               src_offset = btrfs_item_ptr_offset(src, start_slot + i);
+ 
+-              if (i == nr - 1)
+-                      last_key = ins_keys[i];
+-
+               if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
+                       inode_item = btrfs_item_ptr(dst_path->nodes[0],
+                                                   dst_path->slots[0],
+@@ -4021,20 +4012,6 @@ static noinline int copy_items(struct bt
+                                          src_offset, ins_sizes[i]);
+               }
+ 
+-              /*
+-               * We set need_find_last_extent here in case we know we were
+-               * processing other items and then walk into the first extent in
+-               * the inode.  If we don't hit an extent then nothing changes,
+-               * we'll do the last search the next time around.
+-               */
+-              if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
+-                      has_extents = true;
+-                      if (first_key.objectid == (u64)-1)
+-                              first_key = ins_keys[i];
+-              } else {
+-                      need_find_last_extent = false;
+-              }
+-
+               /* take a reference on file data extents so that truncates
+                * or deletes of this inode don't have to relog the inode
+                * again
+@@ -4100,167 +4077,6 @@ static noinline int copy_items(struct bt
+               kfree(sums);
+       }
+ 
+-      if (!has_extents)
+-              return ret;
+-
+-      if (need_find_last_extent && *last_extent == first_key.offset) {
+-              /*
+-               * We don't have any leafs between our current one and the one
+-               * we processed before that can have file extent items for our
+-               * inode (and have a generation number smaller than our current
+-               * transaction id).
+-               */
+-              need_find_last_extent = false;
+-      }
+-
+-      /*
+-       * Because we use btrfs_search_forward we could skip leaves that were
+-       * not modified and then assume *last_extent is valid when it really
+-       * isn't.  So back up to the previous leaf and read the end of the last
+-       * extent before we go and fill in holes.
+-       */
+-      if (need_find_last_extent) {
+-              u64 len;
+-
+-              ret = btrfs_prev_leaf(inode->root, src_path);
+-              if (ret < 0)
+-                      return ret;
+-              if (ret)
+-                      goto fill_holes;
+-              if (src_path->slots[0])
+-                      src_path->slots[0]--;
+-              src = src_path->nodes[0];
+-              btrfs_item_key_to_cpu(src, &key, src_path->slots[0]);
+-              if (key.objectid != btrfs_ino(inode) ||
+-                  key.type != BTRFS_EXTENT_DATA_KEY)
+-                      goto fill_holes;
+-              extent = btrfs_item_ptr(src, src_path->slots[0],
+-                                      struct btrfs_file_extent_item);
+-              if (btrfs_file_extent_type(src, extent) ==
+-                  BTRFS_FILE_EXTENT_INLINE) {
+-                      len = btrfs_file_extent_ram_bytes(src, extent);
+-                      *last_extent = ALIGN(key.offset + len,
+-                                           fs_info->sectorsize);
+-              } else {
+-                      len = btrfs_file_extent_num_bytes(src, extent);
+-                      *last_extent = key.offset + len;
+-              }
+-      }
+-fill_holes:
+-      /* So we did prev_leaf, now we need to move to the next leaf, but a few
+-       * things could have happened
+-       *
+-       * 1) A merge could have happened, so we could currently be on a leaf
+-       * that holds what we were copying in the first place.
+-       * 2) A split could have happened, and now not all of the items we want
+-       * are on the same leaf.
+-       *
+-       * So we need to adjust how we search for holes, we need to drop the
+-       * path and re-search for the first extent key we found, and then walk
+-       * forward until we hit the last one we copied.
+-       */
+-      if (need_find_last_extent) {
+-              /* btrfs_prev_leaf could return 1 without releasing the path */
+-              btrfs_release_path(src_path);
+-              ret = btrfs_search_slot(NULL, inode->root, &first_key,
+-                              src_path, 0, 0);
+-              if (ret < 0)
+-                      return ret;
+-              ASSERT(ret == 0);
+-              src = src_path->nodes[0];
+-              i = src_path->slots[0];
+-      } else {
+-              i = start_slot;
+-      }
+-
+-      /*
+-       * Ok so here we need to go through and fill in any holes we may have
+-       * to make sure that holes are punched for those areas in case they had
+-       * extents previously.
+-       */
+-      while (!done) {
+-              u64 offset, len;
+-              u64 extent_end;
+-
+-              if (i >= btrfs_header_nritems(src_path->nodes[0])) {
+-                      ret = btrfs_next_leaf(inode->root, src_path);
+-                      if (ret < 0)
+-                              return ret;
+-                      ASSERT(ret == 0);
+-                      src = src_path->nodes[0];
+-                      i = 0;
+-                      need_find_last_extent = true;
+-              }
+-
+-              btrfs_item_key_to_cpu(src, &key, i);
+-              if (!btrfs_comp_cpu_keys(&key, &last_key))
+-                      done = true;
+-              if (key.objectid != btrfs_ino(inode) ||
+-                  key.type != BTRFS_EXTENT_DATA_KEY) {
+-                      i++;
+-                      continue;
+-              }
+-              extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
+-              if (btrfs_file_extent_type(src, extent) ==
+-                  BTRFS_FILE_EXTENT_INLINE) {
+-                      len = btrfs_file_extent_ram_bytes(src, extent);
+-                      extent_end = ALIGN(key.offset + len,
+-                                         fs_info->sectorsize);
+-              } else {
+-                      len = btrfs_file_extent_num_bytes(src, extent);
+-                      extent_end = key.offset + len;
+-              }
+-              i++;
+-
+-              if (*last_extent == key.offset) {
+-                      *last_extent = extent_end;
+-                      continue;
+-              }
+-              offset = *last_extent;
+-              len = key.offset - *last_extent;
+-              ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
+-                              offset, 0, 0, len, 0, len, 0, 0, 0);
+-              if (ret)
+-                      break;
+-              *last_extent = extent_end;
+-      }
+-
+-      /*
+-       * Check if there is a hole between the last extent found in our leaf
+-       * and the first extent in the next leaf. If there is one, we need to
+-       * log an explicit hole so that at replay time we can punch the hole.
+-       */
+-      if (ret == 0 &&
+-          key.objectid == btrfs_ino(inode) &&
+-          key.type == BTRFS_EXTENT_DATA_KEY &&
+-          i == btrfs_header_nritems(src_path->nodes[0])) {
+-              ret = btrfs_next_leaf(inode->root, src_path);
+-              need_find_last_extent = true;
+-              if (ret > 0) {
+-                      ret = 0;
+-              } else if (ret == 0) {
+-                      btrfs_item_key_to_cpu(src_path->nodes[0], &key,
+-                                            src_path->slots[0]);
+-                      if (key.objectid == btrfs_ino(inode) &&
+-                          key.type == BTRFS_EXTENT_DATA_KEY &&
+-                          *last_extent < key.offset) {
+-                              const u64 len = key.offset - *last_extent;
+-
+-                              ret = btrfs_insert_file_extent(trans, log,
+-                                                             btrfs_ino(inode),
+-                                                             *last_extent, 0,
+-                                                             0, len, 0, len,
+-                                                             0, 0, 0);
+-                              *last_extent += len;
+-                      }
+-              }
+-      }
+-      /*
+-       * Need to let the callers know we dropped the path so they should
+-       * re-search.
+-       */
+-      if (!ret && need_find_last_extent)
+-              ret = 1;
+       return ret;
+ }
+ 
+@@ -4425,7 +4241,7 @@ static int btrfs_log_prealloc_extents(st
+       const u64 i_size = i_size_read(&inode->vfs_inode);
+       const u64 ino = btrfs_ino(inode);
+       struct btrfs_path *dst_path = NULL;
+-      u64 last_extent = (u64)-1;
++      bool dropped_extents = false;
+       int ins_nr = 0;
+       int start_slot;
+       int ret;
+@@ -4447,8 +4263,7 @@ static int btrfs_log_prealloc_extents(st
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       if (ins_nr > 0) {
+                               ret = copy_items(trans, inode, dst_path, path,
+-                                               &last_extent, start_slot,
+-                                               ins_nr, 1, 0);
++                                               start_slot, ins_nr, 1, 0);
+                               if (ret < 0)
+                                       goto out;
+                               ins_nr = 0;
+@@ -4472,8 +4287,7 @@ static int btrfs_log_prealloc_extents(st
+                       path->slots[0]++;
+                       continue;
+               }
+-              if (last_extent == (u64)-1) {
+-                      last_extent = key.offset;
++              if (!dropped_extents) {
+                       /*
+                        * Avoid logging extent items logged in past fsync calls
+                        * and leading to duplicate keys in the log tree.
+@@ -4487,6 +4301,7 @@ static int btrfs_log_prealloc_extents(st
+                       } while (ret == -EAGAIN);
+                       if (ret)
+                               goto out;
++                      dropped_extents = true;
+               }
+               if (ins_nr == 0)
+                       start_slot = slot;
+@@ -4501,7 +4316,7 @@ static int btrfs_log_prealloc_extents(st
+               }
+       }
+       if (ins_nr > 0) {
+-              ret = copy_items(trans, inode, dst_path, path, &last_extent,
++              ret = copy_items(trans, inode, dst_path, path,
+                                start_slot, ins_nr, 1, 0);
+               if (ret > 0)
+                       ret = 0;
+@@ -4688,13 +4503,8 @@ static int btrfs_log_all_xattrs(struct b
+ 
+               if (slot >= nritems) {
+                       if (ins_nr > 0) {
+-                              u64 last_extent = 0;
+-
+                               ret = copy_items(trans, inode, dst_path, path,
+-                                               &last_extent, start_slot,
+-                                               ins_nr, 1, 0);
+-                              /* can't be 1, extent items aren't processed */
+-                              ASSERT(ret <= 0);
++                                               start_slot, ins_nr, 1, 0);
+                               if (ret < 0)
+                                       return ret;
+                               ins_nr = 0;
+@@ -4718,13 +4528,8 @@ static int btrfs_log_all_xattrs(struct b
+               cond_resched();
+       }
+       if (ins_nr > 0) {
+-              u64 last_extent = 0;
+-
+               ret = copy_items(trans, inode, dst_path, path,
+-                               &last_extent, start_slot,
+-                               ins_nr, 1, 0);
+-              /* can't be 1, extent items aren't processed */
+-              ASSERT(ret <= 0);
++                               start_slot, ins_nr, 1, 0);
+               if (ret < 0)
+                       return ret;
+       }
+@@ -4733,100 +4538,119 @@ static int btrfs_log_all_xattrs(struct b
+ }
+ 
+ /*
+- * If the no holes feature is enabled we need to make sure any hole between the
+- * last extent and the i_size of our inode is explicitly marked in the log. This
+- * is to make sure that doing something like:
+- *
+- *      1) create file with 128Kb of data
+- *      2) truncate file to 64Kb
+- *      3) truncate file to 256Kb
+- *      4) fsync file
+- *      5) <crash/power failure>
+- *      6) mount fs and trigger log replay
+- *
+- * Will give us a file with a size of 256Kb, the first 64Kb of data match what
+- * the file had in its first 64Kb of data at step 1 and the last 192Kb of the
+- * file correspond to a hole. The presence of explicit holes in a log tree is
+- * what guarantees that log replay will remove/adjust file extent items in the
+- * fs/subvol tree.
+- *
+- * Here we do not need to care about holes between extents, that is already done
+- * by copy_items(). We also only need to do this in the full sync path, where we
+- * lookup for extents from the fs/subvol tree only. In the fast path case, we
+- * lookup the list of modified extent maps and if any represents a hole, we
+- * insert a corresponding extent representing a hole in the log tree.
++ * When using the NO_HOLES feature if we punched a hole that causes the
++ * deletion of entire leafs or all the extent items of the first leaf (the one
++ * that contains the inode item and references) we may end up not processing
++ * any extents, because there are no leafs with a generation matching the
++ * current transaction that have extent items for our inode. So we need to find
++ * if any holes exist and then log them. We also need to log holes after any
++ * truncate operation that changes the inode's size.
+  */
+-static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
+-                                 struct btrfs_root *root,
+-                                 struct btrfs_inode *inode,
+-                                 struct btrfs_path *path)
++static int btrfs_log_holes(struct btrfs_trans_handle *trans,
++                         struct btrfs_root *root,
++                         struct btrfs_inode *inode,
++                         struct btrfs_path *path)
+ {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+-      int ret;
+       struct btrfs_key key;
+-      u64 hole_start;
+-      u64 hole_size;
+-      struct extent_buffer *leaf;
+-      struct btrfs_root *log = root->log_root;
+       const u64 ino = btrfs_ino(inode);
+       const u64 i_size = i_size_read(&inode->vfs_inode);
++      u64 prev_extent_end = 0;
++      int ret;
+ 
+-      if (!btrfs_fs_incompat(fs_info, NO_HOLES))
++      if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0)
+               return 0;
+ 
+       key.objectid = ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+-      key.offset = (u64)-1;
++      key.offset = 0;
+ 
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+-      ASSERT(ret != 0);
+       if (ret < 0)
+               return ret;
+ 
+-      ASSERT(path->slots[0] > 0);
+-      path->slots[0]--;
+-      leaf = path->nodes[0];
+-      btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+-
+-      if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
+-              /* inode does not have any extents */
+-              hole_start = 0;
+-              hole_size = i_size;
+-      } else {
++      while (true) {
+               struct btrfs_file_extent_item *extent;
++              struct extent_buffer *leaf = path->nodes[0];
+               u64 len;
+ 
+-              /*
+-               * If there's an extent beyond i_size, an explicit hole was
+-               * already inserted by copy_items().
+-               */
+-              if (key.offset >= i_size)
+-                      return 0;
++              if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
++                      ret = btrfs_next_leaf(root, path);
++                      if (ret < 0)
++                              return ret;
++                      if (ret > 0) {
++                              ret = 0;
++                              break;
++                      }
++                      leaf = path->nodes[0];
++              }
++
++              btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
++              if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
++                      break;
++
++              /* We have a hole, log it. */
++              if (prev_extent_end < key.offset) {
++                      const u64 hole_len = key.offset - prev_extent_end;
++
++                      /*
++                       * Release the path to avoid deadlocks with other code
++                       * paths that search the root while holding locks on
++                       * leafs from the log root.
++                       */
++                      btrfs_release_path(path);
++                      ret = btrfs_insert_file_extent(trans, root->log_root,
++                                                     ino, prev_extent_end, 0,
++                                                     0, hole_len, 0, hole_len,
++                                                     0, 0, 0);
++                      if (ret < 0)
++                              return ret;
++
++                      /*
++                       * Search for the same key again in the root. Since it's
++                       * an extent item and we are holding the inode lock, the
++                       * key must still exist. If it doesn't just emit warning
++                       * and return an error to fall back to a transaction
++                       * commit.
++                       */
++                      ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
++                      if (ret < 0)
++                              return ret;
++                      if (WARN_ON(ret > 0))
++                              return -ENOENT;
++                      leaf = path->nodes[0];
++              }
+ 
+               extent = btrfs_item_ptr(leaf, path->slots[0],
+                                       struct btrfs_file_extent_item);
+-
+               if (btrfs_file_extent_type(leaf, extent) ==
+-                  BTRFS_FILE_EXTENT_INLINE)
+-                      return 0;
++                  BTRFS_FILE_EXTENT_INLINE) {
++                      len = btrfs_file_extent_ram_bytes(leaf, extent);
++                      prev_extent_end = ALIGN(key.offset + len,
++                                              fs_info->sectorsize);
++              } else {
++                      len = btrfs_file_extent_num_bytes(leaf, extent);
++                      prev_extent_end = key.offset + len;
++              }
+ 
+-              len = btrfs_file_extent_num_bytes(leaf, extent);
+-              /* Last extent goes beyond i_size, no need to log a hole. */
+-              if (key.offset + len > i_size)
+-                      return 0;
+-              hole_start = key.offset + len;
+-              hole_size = i_size - hole_start;
++              path->slots[0]++;
++              cond_resched();
+       }
+-      btrfs_release_path(path);
+ 
+-      /* Last extent ends at i_size. */
+-      if (hole_size == 0)
+-              return 0;
++      if (prev_extent_end < i_size) {
++              u64 hole_len;
+ 
+-      hole_size = ALIGN(hole_size, fs_info->sectorsize);
+-      ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
+-                                     hole_size, 0, hole_size, 0, 0, 0);
+-      return ret;
++              btrfs_release_path(path);
++              hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize);
++              ret = btrfs_insert_file_extent(trans, root->log_root,
++                                             ino, prev_extent_end, 0, 0,
++                                             hole_len, 0, hole_len,
++                                             0, 0, 0);
++              if (ret < 0)
++                      return ret;
++      }
++
++      return 0;
+ }
+ 
+ /*
+@@ -5129,7 +4953,6 @@ static int btrfs_log_inode(struct btrfs_
+       struct btrfs_key min_key;
+       struct btrfs_key max_key;
+       struct btrfs_root *log = root->log_root;
+-      u64 last_extent = 0;
+       int err = 0;
+       int ret;
+       int nritems;
+@@ -5307,7 +5130,7 @@ again:
+                                       ins_start_slot = path->slots[0];
+                               }
+                               ret = copy_items(trans, inode, dst_path, path,
+-                                               &last_extent, ins_start_slot,
++                                               ins_start_slot,
+                                                ins_nr, inode_only,
+                                                logged_isize);
+                               if (ret < 0) {
+@@ -5330,17 +5153,13 @@ again:
+                       if (ins_nr == 0)
+                               goto next_slot;
+                       ret = copy_items(trans, inode, dst_path, path,
+-                                       &last_extent, ins_start_slot,
++                                       ins_start_slot,
+                                        ins_nr, inode_only, logged_isize);
+                       if (ret < 0) {
+                               err = ret;
+                               goto out_unlock;
+                       }
+                       ins_nr = 0;
+-                      if (ret) {
+-                              btrfs_release_path(path);
+-                              continue;
+-                      }
+                       goto next_slot;
+               }
+ 
+@@ -5353,18 +5172,13 @@ again:
+                       goto next_slot;
+               }
+ 
+-              ret = copy_items(trans, inode, dst_path, path, &last_extent,
++              ret = copy_items(trans, inode, dst_path, path,
+                                ins_start_slot, ins_nr, inode_only,
+                                logged_isize);
+               if (ret < 0) {
+                       err = ret;
+                       goto out_unlock;
+               }
+-              if (ret) {
+-                      ins_nr = 0;
+-                      btrfs_release_path(path);
+-                      continue;
+-              }
+               ins_nr = 1;
+               ins_start_slot = path->slots[0];
+ next_slot:
+@@ -5378,13 +5192,12 @@ next_slot:
+               }
+               if (ins_nr) {
+                       ret = copy_items(trans, inode, dst_path, path,
+-                                       &last_extent, ins_start_slot,
++                                       ins_start_slot,
+                                        ins_nr, inode_only, logged_isize);
+                       if (ret < 0) {
+                               err = ret;
+                               goto out_unlock;
+                       }
+-                      ret = 0;
+                       ins_nr = 0;
+               }
+               btrfs_release_path(path);
+@@ -5399,14 +5212,13 @@ next_key:
+               }
+       }
+       if (ins_nr) {
+-              ret = copy_items(trans, inode, dst_path, path, &last_extent,
++              ret = copy_items(trans, inode, dst_path, path,
+                                ins_start_slot, ins_nr, inode_only,
+                                logged_isize);
+               if (ret < 0) {
+                       err = ret;
+                       goto out_unlock;
+               }
+-              ret = 0;
+               ins_nr = 0;
+       }
+ 
+@@ -5419,7 +5231,7 @@ next_key:
+       if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
+               btrfs_release_path(path);
+               btrfs_release_path(dst_path);
+-              err = btrfs_log_trailing_hole(trans, root, inode, path);
++              err = btrfs_log_holes(trans, root, inode, path);
+               if (err)
+                       goto out_unlock;
+       }
diff --git a/queue-5.4/btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch b/queue-5.4/btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch

new file mode 100644 (file)

index 0000000..6794f2e
--- /dev/null
+++ b/queue-5.4/btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch
@@ -0,0 +1,237 @@
+From 7227ff4de55d931bbdc156c8ef0ce4f100c78a5b Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 22 Jan 2020 12:23:20 +0000
+Subject: Btrfs: fix race between adding and putting tree mod seq elements and nodes
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 7227ff4de55d931bbdc156c8ef0ce4f100c78a5b upstream.
+
+There is a race between adding and removing elements to the tree mod log
+list and rbtree that can lead to use-after-free problems.
+
+Consider the following example that explains how/why the problems happens:
+
+1) Task A has mod log element with sequence number 200. It currently is
+   the only element in the mod log list;
+
+2) Task A calls btrfs_put_tree_mod_seq() because it no longer needs to
+   access the tree mod log. When it enters the function, it initializes
+   'min_seq' to (u64)-1. Then it acquires the lock 'tree_mod_seq_lock'
+   before checking if there are other elements in the mod seq list.
+   Since the list it empty, 'min_seq' remains set to (u64)-1. Then it
+   unlocks the lock 'tree_mod_seq_lock';
+
+3) Before task A acquires the lock 'tree_mod_log_lock', task B adds
+   itself to the mod seq list through btrfs_get_tree_mod_seq() and gets a
+   sequence number of 201;
+
+4) Some other task, name it task C, modifies a btree and because there
+   elements in the mod seq list, it adds a tree mod elem to the tree
+   mod log rbtree. That node added to the mod log rbtree is assigned
+   a sequence number of 202;
+
+5) Task B, which is doing fiemap and resolving indirect back references,
+   calls btrfs get_old_root(), with 'time_seq' == 201, which in turn
+   calls tree_mod_log_search() - the search returns the mod log node
+   from the rbtree with sequence number 202, created by task C;
+
+6) Task A now acquires the lock 'tree_mod_log_lock', starts iterating
+   the mod log rbtree and finds the node with sequence number 202. Since
+   202 is less than the previously computed 'min_seq', (u64)-1, it
+   removes the node and frees it;
+
+7) Task B still has a pointer to the node with sequence number 202, and
+   it dereferences the pointer itself and through the call to
+   __tree_mod_log_rewind(), resulting in a use-after-free problem.
+
+This issue can be triggered sporadically with the test case generic/561
+from fstests, and it happens more frequently with a higher number of
+duperemove processes. When it happens to me, it either freezes the VM or
+it produces a trace like the following before crashing:
+
+  [ 1245.321140] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI
+  [ 1245.321200] CPU: 1 PID: 26997 Comm: pool Not tainted 5.5.0-rc6-btrfs-next-52 #1
+  [ 1245.321235] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
+  [ 1245.321287] RIP: 0010:rb_next+0x16/0x50
+  [ 1245.321307] Code: ....
+  [ 1245.321372] RSP: 0018:ffffa151c4d039b0 EFLAGS: 00010202
+  [ 1245.321388] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8ae221363c80 RCX: 6b6b6b6b6b6b6b6b
+  [ 1245.321409] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8ae221363c80
+  [ 1245.321439] RBP: ffff8ae20fcc4688 R08: 0000000000000002 R09: 0000000000000000
+  [ 1245.321475] R10: ffff8ae20b120910 R11: 00000000243f8bb1 R12: 0000000000000038
+  [ 1245.321506] R13: ffff8ae221363c80 R14: 000000000000075f R15: ffff8ae223f762b8
+  [ 1245.321539] FS:  00007fdee1ec7700(0000) GS:ffff8ae236c80000(0000) knlGS:0000000000000000
+  [ 1245.321591] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  [ 1245.321614] CR2: 00007fded4030c48 CR3: 000000021da16003 CR4: 00000000003606e0
+  [ 1245.321642] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  [ 1245.321668] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  [ 1245.321706] Call Trace:
+  [ 1245.321798]  __tree_mod_log_rewind+0xbf/0x280 [btrfs]
+  [ 1245.321841]  btrfs_search_old_slot+0x105/0xd00 [btrfs]
+  [ 1245.321877]  resolve_indirect_refs+0x1eb/0xc60 [btrfs]
+  [ 1245.321912]  find_parent_nodes+0x3dc/0x11b0 [btrfs]
+  [ 1245.321947]  btrfs_check_shared+0x115/0x1c0 [btrfs]
+  [ 1245.321980]  ? extent_fiemap+0x59d/0x6d0 [btrfs]
+  [ 1245.322029]  extent_fiemap+0x59d/0x6d0 [btrfs]
+  [ 1245.322066]  do_vfs_ioctl+0x45a/0x750
+  [ 1245.322081]  ksys_ioctl+0x70/0x80
+  [ 1245.322092]  ? trace_hardirqs_off_thunk+0x1a/0x1c
+  [ 1245.322113]  __x64_sys_ioctl+0x16/0x20
+  [ 1245.322126]  do_syscall_64+0x5c/0x280
+  [ 1245.322139]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
+  [ 1245.322155] RIP: 0033:0x7fdee3942dd7
+  [ 1245.322177] Code: ....
+  [ 1245.322258] RSP: 002b:00007fdee1ec6c88 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+  [ 1245.322294] RAX: ffffffffffffffda RBX: 00007fded40210d8 RCX: 00007fdee3942dd7
+  [ 1245.322314] RDX: 00007fded40210d8 RSI: 00000000c020660b RDI: 0000000000000004
+  [ 1245.322337] RBP: 0000562aa89e7510 R08: 0000000000000000 R09: 00007fdee1ec6d44
+  [ 1245.322369] R10: 0000000000000073 R11: 0000000000000246 R12: 00007fdee1ec6d48
+  [ 1245.322390] R13: 00007fdee1ec6d40 R14: 00007fded40210d0 R15: 00007fdee1ec6d50
+  [ 1245.322423] Modules linked in: ....
+  [ 1245.323443] ---[ end trace 01de1e9ec5dff3cd ]---
+
+Fix this by ensuring that btrfs_put_tree_mod_seq() computes the minimum
+sequence number and iterates the rbtree while holding the lock
+'tree_mod_log_lock' in write mode. Also get rid of the 'tree_mod_seq_lock'
+lock, since it is now redundant.
+
+Fixes: bd989ba359f2ac ("Btrfs: add tree modification log functions")
+Fixes: 097b8a7c9e48e2 ("Btrfs: join tree mod log code with the code holding back delayed refs")
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c             |    8 ++------
+ fs/btrfs/ctree.h             |    6 ++----
+ fs/btrfs/delayed-ref.c       |    8 ++++----
+ fs/btrfs/disk-io.c           |    1 -
+ fs/btrfs/tests/btrfs-tests.c |    1 -
+ 5 files changed, 8 insertions(+), 16 deletions(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -330,12 +330,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_
+                          struct seq_list *elem)
+ {
+       write_lock(&fs_info->tree_mod_log_lock);
+-      spin_lock(&fs_info->tree_mod_seq_lock);
+       if (!elem->seq) {
+               elem->seq = btrfs_inc_tree_mod_seq(fs_info);
+               list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
+       }
+-      spin_unlock(&fs_info->tree_mod_seq_lock);
+       write_unlock(&fs_info->tree_mod_log_lock);
+ 
+       return elem->seq;
+@@ -355,7 +353,7 @@ void btrfs_put_tree_mod_seq(struct btrfs
+       if (!seq_putting)
+               return;
+ 
+-      spin_lock(&fs_info->tree_mod_seq_lock);
++      write_lock(&fs_info->tree_mod_log_lock);
+       list_del(&elem->list);
+       elem->seq = 0;
+ 
+@@ -366,19 +364,17 @@ void btrfs_put_tree_mod_seq(struct btrfs
+                                * blocker with lower sequence number exists, we
+                                * cannot remove anything from the log
+                                */
+-                              spin_unlock(&fs_info->tree_mod_seq_lock);
++                              write_unlock(&fs_info->tree_mod_log_lock);
+                               return;
+                       }
+                       min_seq = cur_elem->seq;
+               }
+       }
+-      spin_unlock(&fs_info->tree_mod_seq_lock);
+ 
+       /*
+        * anything that's lower than the lowest existing (read: blocked)
+        * sequence number can be removed from the tree.
+        */
+-      write_lock(&fs_info->tree_mod_log_lock);
+       tm_root = &fs_info->tree_mod_log;
+       for (node = rb_first(tm_root); node; node = next) {
+               next = rb_next(node);
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -671,14 +671,12 @@ struct btrfs_fs_info {
+       atomic_t nr_delayed_iputs;
+       wait_queue_head_t delayed_iputs_wait;
+ 
+-      /* this protects tree_mod_seq_list */
+-      spinlock_t tree_mod_seq_lock;
+       atomic64_t tree_mod_seq;
+-      struct list_head tree_mod_seq_list;
+ 
+-      /* this protects tree_mod_log */
++      /* this protects tree_mod_log and tree_mod_seq_list */
+       rwlock_t tree_mod_log_lock;
+       struct rb_root tree_mod_log;
++      struct list_head tree_mod_seq_list;
+ 
+       atomic_t async_delalloc_pages;
+ 
+--- a/fs/btrfs/delayed-ref.c
++++ b/fs/btrfs/delayed-ref.c
+@@ -492,7 +492,7 @@ void btrfs_merge_delayed_refs(struct btr
+       if (head->is_data)
+               return;
+ 
+-      spin_lock(&fs_info->tree_mod_seq_lock);
++      read_lock(&fs_info->tree_mod_log_lock);
+       if (!list_empty(&fs_info->tree_mod_seq_list)) {
+               struct seq_list *elem;
+ 
+@@ -500,7 +500,7 @@ void btrfs_merge_delayed_refs(struct btr
+                                       struct seq_list, list);
+               seq = elem->seq;
+       }
+-      spin_unlock(&fs_info->tree_mod_seq_lock);
++      read_unlock(&fs_info->tree_mod_log_lock);
+ 
+ again:
+       for (node = rb_first_cached(&head->ref_tree); node;
+@@ -518,7 +518,7 @@ int btrfs_check_delayed_seq(struct btrfs
+       struct seq_list *elem;
+       int ret = 0;
+ 
+-      spin_lock(&fs_info->tree_mod_seq_lock);
++      read_lock(&fs_info->tree_mod_log_lock);
+       if (!list_empty(&fs_info->tree_mod_seq_list)) {
+               elem = list_first_entry(&fs_info->tree_mod_seq_list,
+                                       struct seq_list, list);
+@@ -531,7 +531,7 @@ int btrfs_check_delayed_seq(struct btrfs
+               }
+       }
+ 
+-      spin_unlock(&fs_info->tree_mod_seq_lock);
++      read_unlock(&fs_info->tree_mod_log_lock);
+       return ret;
+ }
+ 
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2652,7 +2652,6 @@ int open_ctree(struct super_block *sb,
+       spin_lock_init(&fs_info->fs_roots_radix_lock);
+       spin_lock_init(&fs_info->delayed_iput_lock);
+       spin_lock_init(&fs_info->defrag_inodes_lock);
+-      spin_lock_init(&fs_info->tree_mod_seq_lock);
+       spin_lock_init(&fs_info->super_lock);
+       spin_lock_init(&fs_info->buffer_lock);
+       spin_lock_init(&fs_info->unused_bgs_lock);
+--- a/fs/btrfs/tests/btrfs-tests.c
++++ b/fs/btrfs/tests/btrfs-tests.c
+@@ -121,7 +121,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_
+       spin_lock_init(&fs_info->qgroup_lock);
+       spin_lock_init(&fs_info->super_lock);
+       spin_lock_init(&fs_info->fs_roots_radix_lock);
+-      spin_lock_init(&fs_info->tree_mod_seq_lock);
+       mutex_init(&fs_info->qgroup_ioctl_lock);
+       mutex_init(&fs_info->qgroup_rescan_lock);
+       rwlock_init(&fs_info->tree_mod_log_lock);
diff --git a/queue-5.4/btrfs-flush-write-bio-if-we-loop-in-extent_write_cache_pages.patch b/queue-5.4/btrfs-flush-write-bio-if-we-loop-in-extent_write_cache_pages.patch

new file mode 100644 (file)

index 0000000..0184565
--- /dev/null
+++ b/queue-5.4/btrfs-flush-write-bio-if-we-loop-in-extent_write_cache_pages.patch
@@ -0,0 +1,105 @@
+From 42ffb0bf584ae5b6b38f72259af1e0ee417ac77f Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Thu, 23 Jan 2020 15:33:02 -0500
+Subject: btrfs: flush write bio if we loop in extent_write_cache_pages
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 42ffb0bf584ae5b6b38f72259af1e0ee417ac77f upstream.
+
+There exists a deadlock with range_cyclic that has existed forever.  If
+we loop around with a bio already built we could deadlock with a writer
+who has the page locked that we're attempting to write but is waiting on
+a page in our bio to be written out.  The task traces are as follows
+
+  PID: 1329874  TASK: ffff889ebcdf3800  CPU: 33  COMMAND: "kworker/u113:5"
+   #0 [ffffc900297bb658] __schedule at ffffffff81a4c33f
+   #1 [ffffc900297bb6e0] schedule at ffffffff81a4c6e3
+   #2 [ffffc900297bb6f8] io_schedule at ffffffff81a4ca42
+   #3 [ffffc900297bb708] __lock_page at ffffffff811f145b
+   #4 [ffffc900297bb798] __process_pages_contig at ffffffff814bc502
+   #5 [ffffc900297bb8c8] lock_delalloc_pages at ffffffff814bc684
+   #6 [ffffc900297bb900] find_lock_delalloc_range at ffffffff814be9ff
+   #7 [ffffc900297bb9a0] writepage_delalloc at ffffffff814bebd0
+   #8 [ffffc900297bba18] __extent_writepage at ffffffff814bfbf2
+   #9 [ffffc900297bba98] extent_write_cache_pages at ffffffff814bffbd
+
+  PID: 2167901  TASK: ffff889dc6a59c00  CPU: 14  COMMAND:
+  "aio-dio-invalid"
+   #0 [ffffc9003b50bb18] __schedule at ffffffff81a4c33f
+   #1 [ffffc9003b50bba0] schedule at ffffffff81a4c6e3
+   #2 [ffffc9003b50bbb8] io_schedule at ffffffff81a4ca42
+   #3 [ffffc9003b50bbc8] wait_on_page_bit at ffffffff811f24d6
+   #4 [ffffc9003b50bc60] prepare_pages at ffffffff814b05a7
+   #5 [ffffc9003b50bcd8] btrfs_buffered_write at ffffffff814b1359
+   #6 [ffffc9003b50bdb0] btrfs_file_write_iter at ffffffff814b5933
+   #7 [ffffc9003b50be38] new_sync_write at ffffffff8128f6a8
+   #8 [ffffc9003b50bec8] vfs_write at ffffffff81292b9d
+   #9 [ffffc9003b50bf00] ksys_pwrite64 at ffffffff81293032
+
+I used drgn to find the respective pages we were stuck on
+
+page_entry.page 0xffffea00fbfc7500 index 8148 bit 15 pid 2167901
+page_entry.page 0xffffea00f9bb7400 index 7680 bit 0 pid 1329874
+
+As you can see the kworker is waiting for bit 0 (PG_locked) on index
+7680, and aio-dio-invalid is waiting for bit 15 (PG_writeback) on index
+8148.  aio-dio-invalid has 7680, and the kworker epd looks like the
+following
+
+  crash> struct extent_page_data ffffc900297bbbb0
+  struct extent_page_data {
+    bio = 0xffff889f747ed830,
+    tree = 0xffff889eed6ba448,
+    extent_locked = 0,
+    sync_io = 0
+  }
+
+Probably worth mentioning as well that it waits for writeback of the
+page to complete while holding a lock on it (at prepare_pages()).
+
+Using drgn I walked the bio pages looking for page
+0xffffea00fbfc7500 which is the one we're waiting for writeback on
+
+  bio = Object(prog, 'struct bio', address=0xffff889f747ed830)
+  for i in range(0, bio.bi_vcnt.value_()):
+      bv = bio.bi_io_vec[i]
+      if bv.bv_page.value_() == 0xffffea00fbfc7500:
+         print("FOUND IT")
+
+which validated what I suspected.
+
+The fix for this is simple, flush the epd before we loop back around to
+the beginning of the file during writeout.
+
+Fixes: b293f02e1423 ("Btrfs: Add writepages support")
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent_io.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -4185,7 +4185,16 @@ retry:
+                */
+               scanned = 1;
+               index = 0;
+-              goto retry;
++
++              /*
++               * If we're looping we could run into a page that is locked by a
++               * writer and that writer could be waiting on writeback for a
++               * page in our current bio, and thus deadlock, so flush the
++               * write bio here.
++               */
++              ret = flush_write_bio(epd);
++              if (!ret)
++                      goto retry;
+       }
+ 
+       if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
diff --git a/queue-5.4/btrfs-make-deduplication-with-range-including-the-last-block-work.patch b/queue-5.4/btrfs-make-deduplication-with-range-including-the-last-block-work.patch

new file mode 100644 (file)

index 0000000..82150f8
--- /dev/null
+++ b/queue-5.4/btrfs-make-deduplication-with-range-including-the-last-block-work.patch
@@ -0,0 +1,67 @@
+From 831d2fa25ab8e27592b1b0268dae6f2dfaf7cc43 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 16 Dec 2019 18:26:56 +0000
+Subject: Btrfs: make deduplication with range including the last block work
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 831d2fa25ab8e27592b1b0268dae6f2dfaf7cc43 upstream.
+
+Since btrfs was migrated to use the generic VFS helpers for clone and
+deduplication, it stopped allowing for the last block of a file to be
+deduplicated when the source file size is not sector size aligned (when
+eof is somewhere in the middle of the last block). There are two reasons
+for that:
+
+1) The generic code always rounds down, to a multiple of the block size,
+   the range's length for deduplications. This means we end up never
+   deduplicating the last block when the eof is not block size aligned,
+   even for the safe case where the destination range's end offset matches
+   the destination file's size. That rounding down operation is done at
+   generic_remap_check_len();
+
+2) Because of that, the btrfs specific code does not expect anymore any
+   non-aligned range length's for deduplication and therefore does not
+   work if such nona-aligned length is given.
+
+This patch addresses that second part, and it depends on a patch that
+fixes generic_remap_check_len(), in the VFS, which was submitted ealier
+and has the following subject:
+
+  "fs: allow deduplication of eof block into the end of the destination file"
+
+These two patches address reports from users that started seeing lower
+deduplication rates due to the last block never being deduplicated when
+the file size is not aligned to the filesystem's block size.
+
+Link: https://lore.kernel.org/linux-btrfs/2019-1576167349.500456@svIo.N5dq.dFFD/
+CC: stable@vger.kernel.org # 5.1+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ioctl.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -3244,6 +3244,7 @@ static void btrfs_double_extent_lock(str
+ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
+                                  struct inode *dst, u64 dst_loff)
+ {
++      const u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
+       int ret;
+ 
+       /*
+@@ -3251,7 +3252,7 @@ static int btrfs_extent_same_range(struc
+        * source range to serialize with relocation.
+        */
+       btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+-      ret = btrfs_clone(src, dst, loff, len, len, dst_loff, 1);
++      ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1);
+       btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
+ 
+       return ret;
diff --git a/queue-5.4/btrfs-set-trans-drity-in-btrfs_commit_transaction.patch b/queue-5.4/btrfs-set-trans-drity-in-btrfs_commit_transaction.patch

new file mode 100644 (file)

index 0000000..4b5341a
--- /dev/null
+++ b/queue-5.4/btrfs-set-trans-drity-in-btrfs_commit_transaction.patch
@@ -0,0 +1,96 @@
+From d62b23c94952e78211a383b7d90ef0afbd9a3717 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 17 Jan 2020 08:57:51 -0500
+Subject: btrfs: set trans->drity in btrfs_commit_transaction
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit d62b23c94952e78211a383b7d90ef0afbd9a3717 upstream.
+
+If we abort a transaction we have the following sequence
+
+if (!trans->dirty && list_empty(&trans->new_bgs))
+       return;
+WRITE_ONCE(trans->transaction->aborted, err);
+
+The idea being if we didn't modify anything with our trans handle then
+we don't really need to abort the whole transaction, maybe the other
+trans handles are fine and we can carry on.
+
+However in the case of create_snapshot we add a pending_snapshot object
+to our transaction and then commit the transaction.  We don't actually
+modify anything.  sync() behaves the same way, attach to an existing
+transaction and commit it.  This means that if we have an IO error in
+the right places we could abort the committing transaction with our
+trans->dirty being not set and thus not set transaction->aborted.
+
+This is a problem because in the create_snapshot() case we depend on
+pending->error being set to something, or btrfs_commit_transaction
+returning an error.
+
+If we are not the trans handle that gets to commit the transaction, and
+we're waiting on the commit to happen we get our return value from
+cur_trans->aborted.  If this was not set to anything because sync() hit
+an error in the transaction commit before it could modify anything then
+cur_trans->aborted would be 0.  Thus we'd return 0 from
+btrfs_commit_transaction() in create_snapshot.
+
+This is a problem because we then try to do things with
+pending_snapshot->snap, which will be NULL because we didn't create the
+snapshot, and then we'll get a NULL pointer dereference like the
+following
+
+"BUG: kernel NULL pointer dereference, address: 00000000000001f0"
+RIP: 0010:btrfs_orphan_cleanup+0x2d/0x330
+Call Trace:
+ ? btrfs_mksubvol.isra.31+0x3f2/0x510
+ btrfs_mksubvol.isra.31+0x4bc/0x510
+ ? __sb_start_write+0xfa/0x200
+ ? mnt_want_write_file+0x24/0x50
+ btrfs_ioctl_snap_create_transid+0x16c/0x1a0
+ btrfs_ioctl_snap_create_v2+0x11e/0x1a0
+ btrfs_ioctl+0x1534/0x2c10
+ ? free_debug_processing+0x262/0x2a3
+ do_vfs_ioctl+0xa6/0x6b0
+ ? do_sys_open+0x188/0x220
+ ? syscall_trace_enter+0x1f8/0x330
+ ksys_ioctl+0x60/0x90
+ __x64_sys_ioctl+0x16/0x20
+ do_syscall_64+0x4a/0x1b0
+
+In order to fix this we need to make sure anybody who calls
+commit_transaction has trans->dirty set so that they properly set the
+trans->transaction->aborted value properly so any waiters know bad
+things happened.
+
+This was found while I was running generic/475 with my modified
+fsstress, it reproduced within a few runs.  I ran with this patch all
+night and didn't see the problem again.
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/transaction.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -1949,6 +1949,14 @@ int btrfs_commit_transaction(struct btrf
+       struct btrfs_transaction *prev_trans = NULL;
+       int ret;
+ 
++      /*
++       * Some places just start a transaction to commit it.  We need to make
++       * sure that if this commit fails that the abort code actually marks the
++       * transaction as failed, so set trans->dirty to make the abort code do
++       * the right thing.
++       */
++      trans->dirty = true;
++
+       /* Stop the commit early if ->aborted is set */
+       if (unlikely(READ_ONCE(cur_trans->aborted))) {
+               ret = cur_trans->aborted;
diff --git a/queue-5.4/drm-atmel-hlcdc-enable-clock-before-configuring-timing-engine.patch b/queue-5.4/drm-atmel-hlcdc-enable-clock-before-configuring-timing-engine.patch

new file mode 100644 (file)

index 0000000..87b1192
--- /dev/null
+++ b/queue-5.4/drm-atmel-hlcdc-enable-clock-before-configuring-timing-engine.patch
@@ -0,0 +1,53 @@
+From 2c1fb9d86f6820abbfaa38a6836157c76ccb4e7b Mon Sep 17 00:00:00 2001
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+Date: Wed, 18 Dec 2019 14:28:25 +0200
+Subject: drm: atmel-hlcdc: enable clock before configuring timing engine
+
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+
+commit 2c1fb9d86f6820abbfaa38a6836157c76ccb4e7b upstream.
+
+Changing pixel clock source without having this clock source enabled
+will block the timing engine and the next operations after (in this case
+setting ATMEL_HLCDC_CFG(5) settings in atmel_hlcdc_crtc_mode_set_nofb()
+will fail). It is recomended (although in datasheet this is not present)
+to actually enabled pixel clock source before doing any changes on timing
+enginge (only SAM9X60 datasheet specifies that the peripheral clock and
+pixel clock must be enabled before using LCD controller).
+
+Fixes: 1a396789f65a ("drm: add Atmel HLCDC Display Controller support")
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
+Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
+Cc: <stable@vger.kernel.org> # v4.0+
+Link: https://patchwork.freedesktop.org/patch/msgid/1576672109-22707-3-git-send-email-claudiu.beznea@microchip.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
++++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+@@ -73,7 +73,11 @@ static void atmel_hlcdc_crtc_mode_set_no
+       unsigned long prate;
+       unsigned int mask = ATMEL_HLCDC_CLKDIV_MASK | ATMEL_HLCDC_CLKPOL;
+       unsigned int cfg = 0;
+-      int div;
++      int div, ret;
++
++      ret = clk_prepare_enable(crtc->dc->hlcdc->sys_clk);
++      if (ret)
++              return;
+ 
+       vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay;
+       vm.vback_porch = adj->crtc_vtotal - adj->crtc_vsync_end;
+@@ -147,6 +151,8 @@ static void atmel_hlcdc_crtc_mode_set_no
+                          ATMEL_HLCDC_VSPSU | ATMEL_HLCDC_VSPHO |
+                          ATMEL_HLCDC_GUARDTIME_MASK | ATMEL_HLCDC_MODE_MASK,
+                          cfg);
++
++      clk_disable_unprepare(crtc->dc->hlcdc->sys_clk);
+ }
+ 
+ static enum drm_mode_status
diff --git a/queue-5.4/drm-atmel-hlcdc-prefer-a-lower-pixel-clock-than-requested.patch b/queue-5.4/drm-atmel-hlcdc-prefer-a-lower-pixel-clock-than-requested.patch

new file mode 100644 (file)

index 0000000..fa88f0e
--- /dev/null
+++ b/queue-5.4/drm-atmel-hlcdc-prefer-a-lower-pixel-clock-than-requested.patch
@@ -0,0 +1,43 @@
+From 51a19d150b520f6cb42143f3bdffacd3c33d7ac5 Mon Sep 17 00:00:00 2001
+From: Peter Rosin <peda@axentia.se>
+Date: Wed, 18 Dec 2019 14:28:28 +0200
+Subject: drm: atmel-hlcdc: prefer a lower pixel-clock than requested
+
+From: Peter Rosin <peda@axentia.se>
+
+commit 51a19d150b520f6cb42143f3bdffacd3c33d7ac5 upstream.
+
+The intention was to only select a higher pixel-clock rate than the
+requested, if a slight overclocking would result in a rate significantly
+closer to the requested rate than if the conservative lower pixel-clock
+rate is selected. The fixed patch has the logic the other way around and
+actually prefers the higher frequency. Fix that.
+
+Signed-off-by: Peter Rosin <peda@axentia.se>
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
+Fixes: 9946a3a9dbed ("drm/atmel-hlcdc: allow selecting a higher pixel-clock than requested")
+Reported-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Tested-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Cc: Boris Brezillon <boris.brezillon@bootlin.com>
+Cc: <stable@vger.kernel.org> # v4.20+
+Link: https://patchwork.freedesktop.org/patch/msgid/1576672109-22707-6-git-send-email-claudiu.beznea@microchip.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
++++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+@@ -121,8 +121,8 @@ static void atmel_hlcdc_crtc_mode_set_no
+               int div_low = prate / mode_rate;
+ 
+               if (div_low >= 2 &&
+-                  ((prate / div_low - mode_rate) <
+-                   10 * (mode_rate - prate / div)))
++                  (10 * (prate / div_low - mode_rate) <
++                   (mode_rate - prate / div)))
+                       /*
+                        * At least 10 times better when using a higher
+                        * frequency than requested, instead of a lower.
diff --git a/queue-5.4/drm-atmel-hlcdc-use-double-rate-for-pixel-clock-only-if-supported.patch b/queue-5.4/drm-atmel-hlcdc-use-double-rate-for-pixel-clock-only-if-supported.patch

new file mode 100644 (file)

index 0000000..ca21ebf
--- /dev/null
+++ b/queue-5.4/drm-atmel-hlcdc-use-double-rate-for-pixel-clock-only-if-supported.patch
@@ -0,0 +1,45 @@
+From 07acf4bafe81dd37eff3fbcfbbdbc48084bc202b Mon Sep 17 00:00:00 2001
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+Date: Wed, 18 Dec 2019 14:28:24 +0200
+Subject: drm: atmel-hlcdc: use double rate for pixel clock only if supported
+
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+
+commit 07acf4bafe81dd37eff3fbcfbbdbc48084bc202b upstream.
+
+Doubled system clock should be used as pixel cock source only if this
+is supported. This is emphasized by the value of
+atmel_hlcdc_crtc::dc::desc::fixed_clksrc.
+
+Fixes: a6eca2abdd42 ("drm: atmel-hlcdc: add config option for clock selection")
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
+Cc: Boris Brezillon <bbrezillon@kernel.org>
+Cc: <stable@vger.kernel.org> # v5.3+
+Link: https://patchwork.freedesktop.org/patch/msgid/1576672109-22707-2-git-send-email-claudiu.beznea@microchip.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
++++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+@@ -95,14 +95,14 @@ static void atmel_hlcdc_crtc_mode_set_no
+                    (adj->crtc_hdisplay - 1) |
+                    ((adj->crtc_vdisplay - 1) << 16));
+ 
++      prate = clk_get_rate(crtc->dc->hlcdc->sys_clk);
++      mode_rate = adj->crtc_clock * 1000;
+       if (!crtc->dc->desc->fixed_clksrc) {
++              prate *= 2;
+               cfg |= ATMEL_HLCDC_CLKSEL;
+               mask |= ATMEL_HLCDC_CLKSEL;
+       }
+ 
+-      prate = 2 * clk_get_rate(crtc->dc->hlcdc->sys_clk);
+-      mode_rate = adj->crtc_clock * 1000;
+-
+       div = DIV_ROUND_UP(prate, mode_rate);
+       if (div < 2) {
+               div = 2;
diff --git a/queue-5.4/drm-rect-avoid-division-by-zero.patch b/queue-5.4/drm-rect-avoid-division-by-zero.patch

new file mode 100644 (file)

index 0000000..1008639
--- /dev/null
+++ b/queue-5.4/drm-rect-avoid-division-by-zero.patch
@@ -0,0 +1,47 @@
+From 433480c1afd44f3e1e664b85063d98cefeefa0ed Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
+Date: Fri, 22 Nov 2019 19:56:20 +0200
+Subject: drm/rect: Avoid division by zero
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ville Syrjälä <ville.syrjala@linux.intel.com>
+
+commit 433480c1afd44f3e1e664b85063d98cefeefa0ed upstream.
+
+Check for zero width/height destination rectangle in
+drm_rect_clip_scaled() to avoid a division by zero.
+
+Cc: stable@vger.kernel.org
+Fixes: f96bdf564f3e ("drm/rect: Handle rounding errors in drm_rect_clip_scaled, v3.")
+Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+Cc: Benjamin Gaignard <benjamin.gaignard@st.com>
+Cc: Daniel Vetter <daniel@ffwll.ch>
+Testcase: igt/kms_selftest/drm_rect_clip_scaled_div_by_zero
+Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20191122175623.13565-2-ville.syrjala@linux.intel.com
+Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Reviewed-by: Benjamin Gaignard <benjamin.gaignard@st.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/drm_rect.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/drm_rect.c
++++ b/drivers/gpu/drm/drm_rect.c
+@@ -54,7 +54,12 @@ EXPORT_SYMBOL(drm_rect_intersect);
+ 
+ static u32 clip_scaled(u32 src, u32 dst, u32 clip)
+ {
+-      u64 tmp = mul_u32_u32(src, dst - clip);
++      u64 tmp;
++
++      if (dst == 0)
++              return 0;
++
++      tmp = mul_u32_u32(src, dst - clip);
+ 
+       /*
+        * Round toward 1.0 when clipping so that we don't accidentally
diff --git a/queue-5.4/eventfd-track-eventfd_signal-recursion-depth.patch b/queue-5.4/eventfd-track-eventfd_signal-recursion-depth.patch

new file mode 100644 (file)

index 0000000..389b191
--- /dev/null
+++ b/queue-5.4/eventfd-track-eventfd_signal-recursion-depth.patch
@@ -0,0 +1,102 @@
+From b5e683d5cab8cd433b06ae178621f083cabd4f63 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sun, 2 Feb 2020 08:23:03 -0700
+Subject: eventfd: track eventfd_signal() recursion depth
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit b5e683d5cab8cd433b06ae178621f083cabd4f63 upstream.
+
+eventfd use cases from aio and io_uring can deadlock due to circular
+or resursive calling, when eventfd_signal() tries to grab the waitqueue
+lock. On top of that, it's also possible to construct notification
+chains that are deep enough that we could blow the stack.
+
+Add a percpu counter that tracks the percpu recursion depth, warn if we
+exceed it. The counter is also exposed so that users of eventfd_signal()
+can do the right thing if it's non-zero in the context where it is
+called.
+
+Cc: stable@vger.kernel.org # 4.19+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/eventfd.c            |   15 +++++++++++++++
+ include/linux/eventfd.h |   14 ++++++++++++++
+ 2 files changed, 29 insertions(+)
+
+--- a/fs/eventfd.c
++++ b/fs/eventfd.c
+@@ -24,6 +24,8 @@
+ #include <linux/seq_file.h>
+ #include <linux/idr.h>
+ 
++DEFINE_PER_CPU(int, eventfd_wake_count);
++
+ static DEFINE_IDA(eventfd_ida);
+ 
+ struct eventfd_ctx {
+@@ -60,12 +62,25 @@ __u64 eventfd_signal(struct eventfd_ctx
+ {
+       unsigned long flags;
+ 
++      /*
++       * Deadlock or stack overflow issues can happen if we recurse here
++       * through waitqueue wakeup handlers. If the caller users potentially
++       * nested waitqueues with custom wakeup handlers, then it should
++       * check eventfd_signal_count() before calling this function. If
++       * it returns true, the eventfd_signal() call should be deferred to a
++       * safe context.
++       */
++      if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
++              return 0;
++
+       spin_lock_irqsave(&ctx->wqh.lock, flags);
++      this_cpu_inc(eventfd_wake_count);
+       if (ULLONG_MAX - ctx->count < n)
+               n = ULLONG_MAX - ctx->count;
+       ctx->count += n;
+       if (waitqueue_active(&ctx->wqh))
+               wake_up_locked_poll(&ctx->wqh, EPOLLIN);
++      this_cpu_dec(eventfd_wake_count);
+       spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+ 
+       return n;
+--- a/include/linux/eventfd.h
++++ b/include/linux/eventfd.h
+@@ -12,6 +12,8 @@
+ #include <linux/fcntl.h>
+ #include <linux/wait.h>
+ #include <linux/err.h>
++#include <linux/percpu-defs.h>
++#include <linux/percpu.h>
+ 
+ /*
+  * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
+@@ -40,6 +42,13 @@ __u64 eventfd_signal(struct eventfd_ctx
+ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
+                                 __u64 *cnt);
+ 
++DECLARE_PER_CPU(int, eventfd_wake_count);
++
++static inline bool eventfd_signal_count(void)
++{
++      return this_cpu_read(eventfd_wake_count);
++}
++
+ #else /* CONFIG_EVENTFD */
+ 
+ /*
+@@ -68,6 +77,11 @@ static inline int eventfd_ctx_remove_wai
+       return -ENOSYS;
+ }
+ 
++static inline bool eventfd_signal_count(void)
++{
++      return false;
++}
++
+ #endif
+ 
+ #endif /* _LINUX_EVENTFD_H */
diff --git a/queue-5.4/ext4-fix-deadlock-allocating-crypto-bounce-page-from-mempool.patch b/queue-5.4/ext4-fix-deadlock-allocating-crypto-bounce-page-from-mempool.patch

new file mode 100644 (file)

index 0000000..ab55762
--- /dev/null
+++ b/queue-5.4/ext4-fix-deadlock-allocating-crypto-bounce-page-from-mempool.patch
@@ -0,0 +1,77 @@
+From 547c556f4db7c09447ecf5f833ab6aaae0c5ab58 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Tue, 31 Dec 2019 12:11:49 -0600
+Subject: ext4: fix deadlock allocating crypto bounce page from mempool
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 547c556f4db7c09447ecf5f833ab6aaae0c5ab58 upstream.
+
+ext4_writepages() on an encrypted file has to encrypt the data, but it
+can't modify the pagecache pages in-place, so it encrypts the data into
+bounce pages and writes those instead.  All bounce pages are allocated
+from a mempool using GFP_NOFS.
+
+This is not correct use of a mempool, and it can deadlock.  This is
+because GFP_NOFS includes __GFP_DIRECT_RECLAIM, which enables the "never
+fail" mode for mempool_alloc() where a failed allocation will fall back
+to waiting for one of the preallocated elements in the pool.
+
+But since this mode is used for all a bio's pages and not just the
+first, it can deadlock waiting for pages already in the bio to be freed.
+
+This deadlock can be reproduced by patching mempool_alloc() to pretend
+that pool->alloc() always fails (so that it always falls back to the
+preallocations), and then creating an encrypted file of size > 128 KiB.
+
+Fix it by only using GFP_NOFS for the first page in the bio.  For
+subsequent pages just use GFP_NOWAIT, and if any of those fail, just
+submit the bio and start a new one.
+
+This will need to be fixed in f2fs too, but that's less straightforward.
+
+Fixes: c9af28fdd449 ("ext4 crypto: don't let data integrity writebacks fail with ENOMEM")
+Cc: stable@vger.kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Link: https://lore.kernel.org/r/20191231181149.47619-1-ebiggers@kernel.org
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/page-io.c |   19 ++++++++++++++-----
+ 1 file changed, 14 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -478,17 +478,26 @@ int ext4_bio_write_page(struct ext4_io_s
+               gfp_t gfp_flags = GFP_NOFS;
+               unsigned int enc_bytes = round_up(len, i_blocksize(inode));
+ 
++              /*
++               * Since bounce page allocation uses a mempool, we can only use
++               * a waiting mask (i.e. request guaranteed allocation) on the
++               * first page of the bio.  Otherwise it can deadlock.
++               */
++              if (io->io_bio)
++                      gfp_flags = GFP_NOWAIT | __GFP_NOWARN;
+       retry_encrypt:
+               bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes,
+                                                              0, gfp_flags);
+               if (IS_ERR(bounce_page)) {
+                       ret = PTR_ERR(bounce_page);
+-                      if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
+-                              if (io->io_bio) {
++                      if (ret == -ENOMEM &&
++                          (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
++                              gfp_flags = GFP_NOFS;
++                              if (io->io_bio)
+                                       ext4_io_submit(io);
+-                                      congestion_wait(BLK_RW_ASYNC, HZ/50);
+-                              }
+-                              gfp_flags |= __GFP_NOFAIL;
++                              else
++                                      gfp_flags |= __GFP_NOFAIL;
++                              congestion_wait(BLK_RW_ASYNC, HZ/50);
+                               goto retry_encrypt;
+                       }
+                       bounce_page = NULL;
diff --git a/queue-5.4/ext4-fix-race-conditions-in-d_compare-and-d_hash.patch b/queue-5.4/ext4-fix-race-conditions-in-d_compare-and-d_hash.patch

new file mode 100644 (file)

index 0000000..30d3b91
--- /dev/null
+++ b/queue-5.4/ext4-fix-race-conditions-in-d_compare-and-d_hash.patch
@@ -0,0 +1,79 @@
+From ec772f01307a2c06ebf6cdd221e6b518a71ddae7 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Thu, 23 Jan 2020 20:12:34 -0800
+Subject: ext4: fix race conditions in ->d_compare() and ->d_hash()
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit ec772f01307a2c06ebf6cdd221e6b518a71ddae7 upstream.
+
+Since ->d_compare() and ->d_hash() can be called in RCU-walk mode,
+->d_parent and ->d_inode can be concurrently modified, and in
+particular, ->d_inode may be changed to NULL.  For ext4_d_hash() this
+resulted in a reproducible NULL dereference if a lookup is done in a
+directory being deleted, e.g. with:
+
+       int main()
+       {
+               if (fork()) {
+                       for (;;) {
+                               mkdir("subdir", 0700);
+                               rmdir("subdir");
+                       }
+               } else {
+                       for (;;)
+                               access("subdir/file", 0);
+               }
+       }
+
+... or by running the 't_encrypted_d_revalidate' program from xfstests.
+Both repros work in any directory on a filesystem with the encoding
+feature, even if the directory doesn't actually have the casefold flag.
+
+I couldn't reproduce a crash in ext4_d_compare(), but it appears that a
+similar crash is possible there.
+
+Fix these bugs by reading ->d_parent and ->d_inode using READ_ONCE() and
+falling back to the case sensitive behavior if the inode is NULL.
+
+Reported-by: Al Viro <viro@zeniv.linux.org.uk>
+Fixes: b886ee3e778e ("ext4: Support case-insensitive file name lookups")
+Cc: <stable@vger.kernel.org> # v5.2+
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Link: https://lore.kernel.org/r/20200124041234.159740-1-ebiggers@kernel.org
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/dir.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/dir.c
++++ b/fs/ext4/dir.c
+@@ -673,9 +673,11 @@ static int ext4_d_compare(const struct d
+                         const char *str, const struct qstr *name)
+ {
+       struct qstr qstr = {.name = str, .len = len };
+-      struct inode *inode = dentry->d_parent->d_inode;
++      const struct dentry *parent = READ_ONCE(dentry->d_parent);
++      const struct inode *inode = READ_ONCE(parent->d_inode);
+ 
+-      if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) {
++      if (!inode || !IS_CASEFOLDED(inode) ||
++          !EXT4_SB(inode->i_sb)->s_encoding) {
+               if (len != name->len)
+                       return -1;
+               return memcmp(str, name->name, len);
+@@ -688,10 +690,11 @@ static int ext4_d_hash(const struct dent
+ {
+       const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
+       const struct unicode_map *um = sbi->s_encoding;
++      const struct inode *inode = READ_ONCE(dentry->d_inode);
+       unsigned char *norm;
+       int len, ret = 0;
+ 
+-      if (!IS_CASEFOLDED(dentry->d_inode) || !um)
++      if (!inode || !IS_CASEFOLDED(inode) || !um)
+               return 0;
+ 
+       norm = kmalloc(PATH_MAX, GFP_ATOMIC);
diff --git a/queue-5.4/gfs2-fix-gfs2_find_jhead-that-returns-uninitialized-jhead-with-seq-0.patch b/queue-5.4/gfs2-fix-gfs2_find_jhead-that-returns-uninitialized-jhead-with-seq-0.patch

new file mode 100644 (file)

index 0000000..8515911
--- /dev/null
+++ b/queue-5.4/gfs2-fix-gfs2_find_jhead-that-returns-uninitialized-jhead-with-seq-0.patch
@@ -0,0 +1,38 @@
+From 7582026f6f3588ecebd281965c8a71aff6fb6158 Mon Sep 17 00:00:00 2001
+From: Abhi Das <adas@redhat.com>
+Date: Tue, 4 Feb 2020 14:14:56 -0600
+Subject: gfs2: fix gfs2_find_jhead that returns uninitialized jhead with seq 0
+
+From: Abhi Das <adas@redhat.com>
+
+commit 7582026f6f3588ecebd281965c8a71aff6fb6158 upstream.
+
+When the first log header in a journal happens to have a sequence
+number of 0, a bug in gfs2_find_jhead() causes it to prematurely exit,
+and return an uninitialized jhead with seq 0. This can cause failures
+in the caller. For instance, a mount fails in one test case.
+
+The correct behavior is for it to continue searching through the journal
+to find the correct journal head with the highest sequence number.
+
+Fixes: f4686c26ecc3 ("gfs2: read journal in large chunks")
+Cc: stable@vger.kernel.org # v5.2+
+Signed-off-by: Abhi Das <adas@redhat.com>
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/gfs2/lops.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/gfs2/lops.c
++++ b/fs/gfs2/lops.c
+@@ -421,7 +421,7 @@ static bool gfs2_jhead_pg_srch(struct gf
+ 
+       for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
+               if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
+-                      if (lh.lh_sequence > head->lh_sequence)
++                      if (lh.lh_sequence >= head->lh_sequence)
+                               *head = lh;
+                       else {
+                               ret = true;
diff --git a/queue-5.4/gfs2-fix-o_sync-write-handling.patch b/queue-5.4/gfs2-fix-o_sync-write-handling.patch

new file mode 100644 (file)

index 0000000..514c8e9
--- /dev/null
+++ b/queue-5.4/gfs2-fix-o_sync-write-handling.patch
@@ -0,0 +1,111 @@
+From 6e5e41e2dc4e4413296d5a4af54ac92d7cd52317 Mon Sep 17 00:00:00 2001
+From: Andreas Gruenbacher <agruenba@redhat.com>
+Date: Tue, 14 Jan 2020 17:12:18 +0100
+Subject: gfs2: fix O_SYNC write handling
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+commit 6e5e41e2dc4e4413296d5a4af54ac92d7cd52317 upstream.
+
+In gfs2_file_write_iter, for direct writes, the error checking in the buffered
+write fallback case is incomplete.  This can cause inode write errors to go
+undetected.  Fix and clean up gfs2_file_write_iter along the way.
+
+Based on a proposed fix by Christoph Hellwig <hch@lst.de>.
+
+Fixes: 967bcc91b044 ("gfs2: iomap direct I/O support")
+Cc: stable@vger.kernel.org # v4.19+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/gfs2/file.c |   51 +++++++++++++++++++++------------------------------
+ 1 file changed, 21 insertions(+), 30 deletions(-)
+
+--- a/fs/gfs2/file.c
++++ b/fs/gfs2/file.c
+@@ -833,7 +833,7 @@ static ssize_t gfs2_file_write_iter(stru
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       struct gfs2_inode *ip = GFS2_I(inode);
+-      ssize_t written = 0, ret;
++      ssize_t ret;
+ 
+       ret = gfs2_rsqa_alloc(ip);
+       if (ret)
+@@ -865,55 +865,46 @@ static ssize_t gfs2_file_write_iter(stru
+ 
+       if (iocb->ki_flags & IOCB_DIRECT) {
+               struct address_space *mapping = file->f_mapping;
+-              loff_t pos, endbyte;
+-              ssize_t buffered;
++              ssize_t buffered, ret2;
+ 
+-              written = gfs2_file_direct_write(iocb, from);
+-              if (written < 0 || !iov_iter_count(from))
++              ret = gfs2_file_direct_write(iocb, from);
++              if (ret < 0 || !iov_iter_count(from))
+                       goto out_unlock;
+ 
++              iocb->ki_flags |= IOCB_DSYNC;
+               current->backing_dev_info = inode_to_bdi(inode);
+-              ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
++              buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+               current->backing_dev_info = NULL;
+-              if (unlikely(ret < 0))
++              if (unlikely(buffered <= 0))
+                       goto out_unlock;
+-              buffered = ret;
+ 
+               /*
+                * We need to ensure that the page cache pages are written to
+                * disk and invalidated to preserve the expected O_DIRECT
+-               * semantics.
++               * semantics.  If the writeback or invalidate fails, only report
++               * the direct I/O range as we don't know if the buffered pages
++               * made it to disk.
+                */
+-              pos = iocb->ki_pos;
+-              endbyte = pos + buffered - 1;
+-              ret = filemap_write_and_wait_range(mapping, pos, endbyte);
+-              if (!ret) {
+-                      iocb->ki_pos += buffered;
+-                      written += buffered;
+-                      invalidate_mapping_pages(mapping,
+-                                               pos >> PAGE_SHIFT,
+-                                               endbyte >> PAGE_SHIFT);
+-              } else {
+-                      /*
+-                       * We don't know how much we wrote, so just return
+-                       * the number of bytes which were direct-written
+-                       */
+-              }
++              iocb->ki_pos += buffered;
++              ret2 = generic_write_sync(iocb, buffered);
++              invalidate_mapping_pages(mapping,
++                              (iocb->ki_pos - buffered) >> PAGE_SHIFT,
++                              (iocb->ki_pos - 1) >> PAGE_SHIFT);
++              if (!ret || ret2 > 0)
++                      ret += ret2;
+       } else {
+               current->backing_dev_info = inode_to_bdi(inode);
+               ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+               current->backing_dev_info = NULL;
+-              if (likely(ret > 0))
++              if (likely(ret > 0)) {
+                       iocb->ki_pos += ret;
++                      ret = generic_write_sync(iocb, ret);
++              }
+       }
+ 
+ out_unlock:
+       inode_unlock(inode);
+-      if (likely(ret > 0)) {
+-              /* Handle various SYNC-type writes */
+-              ret = generic_write_sync(iocb, ret);
+-      }
+-      return written ? written : ret;
++      return ret;
+ }
+ 
+ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
diff --git a/queue-5.4/gfs2-move-setting-current-backing_dev_info.patch b/queue-5.4/gfs2-move-setting-current-backing_dev_info.patch

new file mode 100644 (file)

index 0000000..ee9bf68
--- /dev/null
+++ b/queue-5.4/gfs2-move-setting-current-backing_dev_info.patch
@@ -0,0 +1,80 @@
+From 4c0e8dda608a51855225c611b5c6b442f95fbc56 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Wed, 15 Jan 2020 16:38:29 +0100
+Subject: gfs2: move setting current->backing_dev_info
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 4c0e8dda608a51855225c611b5c6b442f95fbc56 upstream.
+
+Set current->backing_dev_info just around the buffered write calls to
+prepare for the next fix.
+
+Fixes: 967bcc91b044 ("gfs2: iomap direct I/O support")
+Cc: stable@vger.kernel.org # v4.19+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/gfs2/file.c |   21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+--- a/fs/gfs2/file.c
++++ b/fs/gfs2/file.c
+@@ -853,18 +853,15 @@ static ssize_t gfs2_file_write_iter(stru
+       inode_lock(inode);
+       ret = generic_write_checks(iocb, from);
+       if (ret <= 0)
+-              goto out;
+-
+-      /* We can write back this queue in page reclaim */
+-      current->backing_dev_info = inode_to_bdi(inode);
++              goto out_unlock;
+ 
+       ret = file_remove_privs(file);
+       if (ret)
+-              goto out2;
++              goto out_unlock;
+ 
+       ret = file_update_time(file);
+       if (ret)
+-              goto out2;
++              goto out_unlock;
+ 
+       if (iocb->ki_flags & IOCB_DIRECT) {
+               struct address_space *mapping = file->f_mapping;
+@@ -873,11 +870,13 @@ static ssize_t gfs2_file_write_iter(stru
+ 
+               written = gfs2_file_direct_write(iocb, from);
+               if (written < 0 || !iov_iter_count(from))
+-                      goto out2;
++                      goto out_unlock;
+ 
++              current->backing_dev_info = inode_to_bdi(inode);
+               ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
++              current->backing_dev_info = NULL;
+               if (unlikely(ret < 0))
+-                      goto out2;
++                      goto out_unlock;
+               buffered = ret;
+ 
+               /*
+@@ -901,14 +900,14 @@ static ssize_t gfs2_file_write_iter(stru
+                        */
+               }
+       } else {
++              current->backing_dev_info = inode_to_bdi(inode);
+               ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
++              current->backing_dev_info = NULL;
+               if (likely(ret > 0))
+                       iocb->ki_pos += ret;
+       }
+ 
+-out2:
+-      current->backing_dev_info = NULL;
+-out:
++out_unlock:
+       inode_unlock(inode);
+       if (likely(ret > 0)) {
+               /* Handle various SYNC-type writes */
diff --git a/queue-5.4/iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch b/queue-5.4/iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch

new file mode 100644 (file)

index 0000000..e62ef97
--- /dev/null
+++ b/queue-5.4/iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch
@@ -0,0 +1,56 @@
+From 197288d5ba8a5289f22d3aeb4fca3824bfd9b4af Mon Sep 17 00:00:00 2001
+From: Luca Coelho <luciano.coelho@intel.com>
+Date: Fri, 31 Jan 2020 15:45:25 +0200
+Subject: iwlwifi: don't throw error when trying to remove IGTK
+
+From: Luca Coelho <luciano.coelho@intel.com>
+
+commit 197288d5ba8a5289f22d3aeb4fca3824bfd9b4af upstream.
+
+The IGTK keys are only removed by mac80211 after it has already
+removed the AP station.  This causes the driver to throw an error
+because mac80211 is trying to remove the IGTK when the station doesn't
+exist anymore.
+
+The firmware is aware that the station has been removed and can deal
+with it the next time we try to add an IGTK for a station, so we
+shouldn't try to remove the key if the station ID is
+IWL_MVM_INVALID_STA.  Do this by removing the check for mvm_sta before
+calling iwl_mvm_send_sta_igtk() and check return from that function
+gracefully if the station ID is invalid.
+
+Cc: stable@vger.kernel.org # 4.12+
+Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/sta.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+@@ -3321,6 +3321,10 @@ static int iwl_mvm_send_sta_igtk(struct
+       igtk_cmd.sta_id = cpu_to_le32(sta_id);
+ 
+       if (remove_key) {
++              /* This is a valid situation for IGTK */
++              if (sta_id == IWL_MVM_INVALID_STA)
++                      return 0;
++
+               igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_NOT_VALID);
+       } else {
+               struct ieee80211_key_seq seq;
+@@ -3575,9 +3579,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mv
+       IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n",
+                     keyconf->keyidx, sta_id);
+ 
+-      if (mvm_sta && (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
+-                      keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
+-                      keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256))
++      if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
++          keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
++          keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)
+               return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true);
+ 
+       if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) {
diff --git a/queue-5.4/jbd2_seq_info_next-should-increase-position-index.patch b/queue-5.4/jbd2_seq_info_next-should-increase-position-index.patch

new file mode 100644 (file)

index 0000000..98f7d9b
--- /dev/null
+++ b/queue-5.4/jbd2_seq_info_next-should-increase-position-index.patch
@@ -0,0 +1,39 @@
+From 1a8e9cf40c9a6a2e40b1e924b13ed303aeea4418 Mon Sep 17 00:00:00 2001
+From: Vasily Averin <vvs@virtuozzo.com>
+Date: Thu, 23 Jan 2020 12:05:10 +0300
+Subject: jbd2_seq_info_next should increase position index
+
+From: Vasily Averin <vvs@virtuozzo.com>
+
+commit 1a8e9cf40c9a6a2e40b1e924b13ed303aeea4418 upstream.
+
+if seq_file .next fuction does not change position index,
+read after some lseek can generate unexpected output.
+
+Script below generates endless output
+ $ q=;while read -r r;do echo "$((++q)) $r";done </proc/fs/jbd2/DEV/info
+
+https://bugzilla.kernel.org/show_bug.cgi?id=206283
+
+Fixes: 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface")
+Cc: stable@kernel.org
+Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/d13805e5-695e-8ac3-b678-26ca2313629f@virtuozzo.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jbd2/journal.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -981,6 +981,7 @@ static void *jbd2_seq_info_start(struct
+ 
+ static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
++      (*pos)++;
+       return NULL;
+ }
+ 
diff --git a/queue-5.4/kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch b/queue-5.4/kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch

new file mode 100644 (file)

index 0000000..51225d7
--- /dev/null
+++ b/queue-5.4/kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch
@@ -0,0 +1,44 @@
+From 1a978d9d3e72ddfa40ac60d26301b154247ee0bc Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+Date: Wed, 18 Dec 2019 13:54:46 -0800
+Subject: KVM: PPC: Book3S HV: Uninit vCPU if vcore creation fails
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+commit 1a978d9d3e72ddfa40ac60d26301b154247ee0bc upstream.
+
+Call kvm_vcpu_uninit() if vcore creation fails to avoid leaking any
+resources allocated by kvm_vcpu_init(), i.e. the vcpu->run page.
+
+Fixes: 371fefd6f2dc4 ("KVM: PPC: Allow book3s_hv guests to use SMT processor modes")
+Cc: stable@vger.kernel.org
+Reviewed-by: Greg Kurz <groug@kaod.org>
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Acked-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_hv.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -2354,7 +2354,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu
+       mutex_unlock(&kvm->lock);
+ 
+       if (!vcore)
+-              goto free_vcpu;
++              goto uninit_vcpu;
+ 
+       spin_lock(&vcore->lock);
+       ++vcore->num_threads;
+@@ -2371,6 +2371,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu
+ 
+       return vcpu;
+ 
++uninit_vcpu:
++      kvm_vcpu_uninit(vcpu);
+ free_vcpu:
+       kmem_cache_free(kvm_vcpu_cache, vcpu);
+ out:
diff --git a/queue-5.4/kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch b/queue-5.4/kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch

new file mode 100644 (file)

index 0000000..e9c3365
--- /dev/null
+++ b/queue-5.4/kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch
@@ -0,0 +1,41 @@
+From cb10bf9194f4d2c5d830eddca861f7ca0fecdbb4 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+Date: Wed, 18 Dec 2019 13:54:47 -0800
+Subject: KVM: PPC: Book3S PR: Free shared page if mmu initialization fails
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+commit cb10bf9194f4d2c5d830eddca861f7ca0fecdbb4 upstream.
+
+Explicitly free the shared page if kvmppc_mmu_init() fails during
+kvmppc_core_vcpu_create(), as the page is freed only in
+kvmppc_core_vcpu_free(), which is not reached via kvm_vcpu_uninit().
+
+Fixes: 96bc451a15329 ("KVM: PPC: Introduce shared page")
+Cc: stable@vger.kernel.org
+Reviewed-by: Greg Kurz <groug@kaod.org>
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Acked-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_pr.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/book3s_pr.c
++++ b/arch/powerpc/kvm/book3s_pr.c
+@@ -1769,10 +1769,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu
+ 
+       err = kvmppc_mmu_init(vcpu);
+       if (err < 0)
+-              goto uninit_vcpu;
++              goto free_shared_page;
+ 
+       return vcpu;
+ 
++free_shared_page:
++      free_page((unsigned long)vcpu->arch.shared);
+ uninit_vcpu:
+       kvm_vcpu_uninit(vcpu);
+ free_shadow_vcpu:
diff --git a/queue-5.4/kvm-svm-pku-not-currently-supported.patch b/queue-5.4/kvm-svm-pku-not-currently-supported.patch

new file mode 100644 (file)

index 0000000..efb88dc
--- /dev/null
+++ b/queue-5.4/kvm-svm-pku-not-currently-supported.patch
@@ -0,0 +1,112 @@
+From a47970ed74a535b1accb4bc73643fd5a93993c3e Mon Sep 17 00:00:00 2001
+From: John Allen <john.allen@amd.com>
+Date: Thu, 19 Dec 2019 14:17:59 -0600
+Subject: kvm/svm: PKU not currently supported
+
+From: John Allen <john.allen@amd.com>
+
+commit a47970ed74a535b1accb4bc73643fd5a93993c3e upstream.
+
+Current SVM implementation does not have support for handling PKU. Guests
+running on a host with future AMD cpus that support the feature will read
+garbage from the PKRU register and will hit segmentation faults on boot as
+memory is getting marked as protected that should not be. Ensure that cpuid
+from SVM does not advertise the feature.
+
+Signed-off-by: John Allen <john.allen@amd.com>
+Cc: stable@vger.kernel.org
+Fixes: 0556cbdc2fbc ("x86/pkeys: Don't check if PKRU is zero before writing it")
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/kvm_host.h |    1 +
+ arch/x86/kvm/cpuid.c            |    4 +++-
+ arch/x86/kvm/svm.c              |    6 ++++++
+ arch/x86/kvm/vmx/capabilities.h |    5 +++++
+ arch/x86/kvm/vmx/vmx.c          |    1 +
+ 5 files changed, 16 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1128,6 +1128,7 @@ struct kvm_x86_ops {
+       bool (*xsaves_supported)(void);
+       bool (*umip_emulated)(void);
+       bool (*pt_supported)(void);
++      bool (*pku_supported)(void);
+ 
+       int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
+       void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -352,6 +352,7 @@ static inline void do_cpuid_7_mask(struc
+       unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
+       unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
+       unsigned f_la57;
++      unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0;
+ 
+       /* cpuid 7.0.ebx */
+       const u32 kvm_cpuid_7_0_ebx_x86_features =
+@@ -363,7 +364,7 @@ static inline void do_cpuid_7_mask(struc
+ 
+       /* cpuid 7.0.ecx*/
+       const u32 kvm_cpuid_7_0_ecx_x86_features =
+-              F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
++              F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
+               F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
+               F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+               F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
+@@ -392,6 +393,7 @@ static inline void do_cpuid_7_mask(struc
+               /* Set LA57 based on hardware capability. */
+               entry->ecx |= f_la57;
+               entry->ecx |= f_umip;
++              entry->ecx |= f_pku;
+               /* PKU is not yet implemented for shadow paging. */
+               if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
+                       entry->ecx &= ~F(PKU);
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -5986,6 +5986,11 @@ static bool svm_has_wbinvd_exit(void)
+       return true;
+ }
+ 
++static bool svm_pku_supported(void)
++{
++      return false;
++}
++
+ #define PRE_EX(exit)  { .exit_code = (exit), \
+                       .stage = X86_ICPT_PRE_EXCEPT, }
+ #define POST_EX(exit) { .exit_code = (exit), \
+@@ -7278,6 +7283,7 @@ static struct kvm_x86_ops svm_x86_ops __
+       .xsaves_supported = svm_xsaves_supported,
+       .umip_emulated = svm_umip_emulated,
+       .pt_supported = svm_pt_supported,
++      .pku_supported = svm_pku_supported,
+ 
+       .set_supported_cpuid = svm_set_supported_cpuid,
+ 
+--- a/arch/x86/kvm/vmx/capabilities.h
++++ b/arch/x86/kvm/vmx/capabilities.h
+@@ -145,6 +145,11 @@ static inline bool vmx_umip_emulated(voi
+               SECONDARY_EXEC_DESC;
+ }
+ 
++static inline bool vmx_pku_supported(void)
++{
++      return boot_cpu_has(X86_FEATURE_PKU);
++}
++
+ static inline bool cpu_has_vmx_rdtscp(void)
+ {
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -7865,6 +7865,7 @@ static struct kvm_x86_ops vmx_x86_ops __
+       .xsaves_supported = vmx_xsaves_supported,
+       .umip_emulated = vmx_umip_emulated,
+       .pt_supported = vmx_pt_supported,
++      .pku_supported = vmx_pku_supported,
+ 
+       .request_immediate_exit = vmx_request_immediate_exit,
+ 
diff --git a/queue-5.4/kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch b/queue-5.4/kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch

new file mode 100644 (file)

index 0000000..e7897f8
--- /dev/null
+++ b/queue-5.4/kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch
@@ -0,0 +1,55 @@
+From f958bd2314d117f8c29f4821401bc1925bc2e5ef Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+Date: Mon, 9 Dec 2019 12:19:31 -0800
+Subject: KVM: x86: Fix potential put_fpu() w/o load_fpu() on MPX platform
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+commit f958bd2314d117f8c29f4821401bc1925bc2e5ef upstream.
+
+Unlike most state managed by XSAVE, MPX is initialized to zero on INIT.
+Because INITs are usually recognized in the context of a VCPU_RUN call,
+kvm_vcpu_reset() puts the guest's FPU so that the FPU state is resident
+in memory, zeros the MPX state, and reloads FPU state to hardware.  But,
+in the unlikely event that an INIT is recognized during
+kvm_arch_vcpu_ioctl_get_mpstate() via kvm_apic_accept_events(),
+kvm_vcpu_reset() will call kvm_put_guest_fpu() without a preceding
+kvm_load_guest_fpu() and corrupt the guest's FPU state (and possibly
+userspace's FPU state as well).
+
+Given that MPX is being removed from the kernel[*], fix the bug with the
+simple-but-ugly approach of loading the guest's FPU during
+KVM_GET_MP_STATE.
+
+[*] See commit f240652b6032b ("x86/mpx: Remove MPX APIs").
+
+Fixes: f775b13eedee2 ("x86,kvm: move qemu/guest FPU switching out to vcpu_run")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/x86.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8698,6 +8698,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(stru
+                                   struct kvm_mp_state *mp_state)
+ {
+       vcpu_load(vcpu);
++      if (kvm_mpx_supported())
++              kvm_load_guest_fpu(vcpu);
+ 
+       kvm_apic_accept_events(vcpu);
+       if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
+@@ -8706,6 +8708,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(stru
+       else
+               mp_state->mp_state = vcpu->arch.mp_state;
+ 
++      if (kvm_mpx_supported())
++              kvm_put_guest_fpu(vcpu);
+       vcpu_put(vcpu);
+       return 0;
+ }
diff --git a/queue-5.4/kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..743b92b
--- /dev/null
+++ b/queue-5.4/kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,57 @@
+From ea740059ecb37807ba47b84b33d1447435a8d868 Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:52 -0800
+Subject: KVM: x86: Protect DR-based index computations from Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit ea740059ecb37807ba47b84b33d1447435a8d868 upstream.
+
+This fixes a Spectre-v1/L1TF vulnerability in __kvm_set_dr() and
+kvm_get_dr().
+Both kvm_get_dr() and kvm_set_dr() (a wrapper of __kvm_set_dr()) are
+exported symbols so KVM should tream them conservatively from a security
+perspective.
+
+Fixes: 020df0794f57 ("KVM: move DR register access handling into generic code")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/x86.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1054,9 +1054,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu
+ 
+ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
+ {
++      size_t size = ARRAY_SIZE(vcpu->arch.db);
++
+       switch (dr) {
+       case 0 ... 3:
+-              vcpu->arch.db[dr] = val;
++              vcpu->arch.db[array_index_nospec(dr, size)] = val;
+               if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+                       vcpu->arch.eff_db[dr] = val;
+               break;
+@@ -1093,9 +1095,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr);
+ 
+ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+ {
++      size_t size = ARRAY_SIZE(vcpu->arch.db);
++
+       switch (dr) {
+       case 0 ... 3:
+-              *val = vcpu->arch.db[dr];
++              *val = vcpu->arch.db[array_index_nospec(dr, size)];
+               break;
+       case 4:
+               /* fall through */
diff --git a/queue-5.4/kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..82db466
--- /dev/null
+++ b/queue-5.4/kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,58 @@
+From 8c86405f606ca8508b8d9280680166ca26723695 Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:44 -0800
+Subject: KVM: x86: Protect ioapic_read_indirect() from Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 8c86405f606ca8508b8d9280680166ca26723695 upstream.
+
+This fixes a Spectre-v1/L1TF vulnerability in ioapic_read_indirect().
+This function contains index computations based on the
+(attacker-controlled) IOREGSEL register.
+
+Fixes: a2c118bfab8b ("KVM: Fix bounds checking in ioapic indirect register reads (CVE-2013-1798)")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/ioapic.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/ioapic.c
++++ b/arch/x86/kvm/ioapic.c
+@@ -36,6 +36,7 @@
+ #include <linux/io.h>
+ #include <linux/slab.h>
+ #include <linux/export.h>
++#include <linux/nospec.h>
+ #include <asm/processor.h>
+ #include <asm/page.h>
+ #include <asm/current.h>
+@@ -68,13 +69,14 @@ static unsigned long ioapic_read_indirec
+       default:
+               {
+                       u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
+-                      u64 redir_content;
++                      u64 redir_content = ~0ULL;
+ 
+-                      if (redir_index < IOAPIC_NUM_PINS)
+-                              redir_content =
+-                                      ioapic->redirtbl[redir_index].bits;
+-                      else
+-                              redir_content = ~0ULL;
++                      if (redir_index < IOAPIC_NUM_PINS) {
++                              u32 index = array_index_nospec(
++                                      redir_index, IOAPIC_NUM_PINS);
++
++                              redir_content = ioapic->redirtbl[index].bits;
++                      }
+ 
+                       result = (ioapic->ioregsel & 0x1) ?
+                           (redir_content >> 32) & 0xffffffff :
diff --git a/queue-5.4/kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..033c13b
--- /dev/null
+++ b/queue-5.4/kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,40 @@
+From 670564559ca35b439c8d8861fc399451ddf95137 Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:45 -0800
+Subject: KVM: x86: Protect ioapic_write_indirect() from Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 670564559ca35b439c8d8861fc399451ddf95137 upstream.
+
+This fixes a Spectre-v1/L1TF vulnerability in ioapic_write_indirect().
+This function contains index computations based on the
+(attacker-controlled) IOREGSEL register.
+
+This patch depends on patch
+"KVM: x86: Protect ioapic_read_indirect() from Spectre-v1/L1TF attacks".
+
+Fixes: 70f93dae32ac ("KVM: Use temporary variable to shorten lines.")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/ioapic.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/ioapic.c
++++ b/arch/x86/kvm/ioapic.c
+@@ -291,6 +291,7 @@ static void ioapic_write_indirect(struct
+ 
+               if (index >= IOAPIC_NUM_PINS)
+                       return;
++              index = array_index_nospec(index, IOAPIC_NUM_PINS);
+               e = &ioapic->redirtbl[index];
+               mask_before = e->fields.mask;
+               /* Preserve read-only fields */
diff --git a/queue-5.4/kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..f2c1264
--- /dev/null
+++ b/queue-5.4/kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,59 @@
+From 8618793750071d66028584a83ed0b4fa7eb4f607 Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:42 -0800
+Subject: KVM: x86: Protect kvm_hv_msr_[get|set]_crash_data() from Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 8618793750071d66028584a83ed0b4fa7eb4f607 upstream.
+
+This fixes Spectre-v1/L1TF vulnerabilities in kvm_hv_msr_get_crash_data()
+and kvm_hv_msr_set_crash_data().
+These functions contain index computations that use the
+(attacker-controlled) MSR number.
+
+Fixes: e7d9513b60e8 ("kvm/x86: added hyper-v crash msrs into kvm hyperv context")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/hyperv.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/hyperv.c
++++ b/arch/x86/kvm/hyperv.c
+@@ -809,11 +809,12 @@ static int kvm_hv_msr_get_crash_data(str
+                                    u32 index, u64 *pdata)
+ {
+       struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
++      size_t size = ARRAY_SIZE(hv->hv_crash_param);
+ 
+-      if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
++      if (WARN_ON_ONCE(index >= size))
+               return -EINVAL;
+ 
+-      *pdata = hv->hv_crash_param[index];
++      *pdata = hv->hv_crash_param[array_index_nospec(index, size)];
+       return 0;
+ }
+ 
+@@ -852,11 +853,12 @@ static int kvm_hv_msr_set_crash_data(str
+                                    u32 index, u64 data)
+ {
+       struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
++      size_t size = ARRAY_SIZE(hv->hv_crash_param);
+ 
+-      if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
++      if (WARN_ON_ONCE(index >= size))
+               return -EINVAL;
+ 
+-      hv->hv_crash_param[index] = data;
++      hv->hv_crash_param[array_index_nospec(index, size)] = data;
+       return 0;
+ }
+ 
diff --git a/queue-5.4/kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..902d6f5
--- /dev/null
+++ b/queue-5.4/kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,54 @@
+From 4bf79cb089f6b1c6c632492c0271054ce52ad766 Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:46 -0800
+Subject: KVM: x86: Protect kvm_lapic_reg_write() from Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 4bf79cb089f6b1c6c632492c0271054ce52ad766 upstream.
+
+This fixes a Spectre-v1/L1TF vulnerability in kvm_lapic_reg_write().
+This function contains index computations based on the
+(attacker-controlled) MSR number.
+
+Fixes: 0105d1a52640 ("KVM: x2apic interface to lapic")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/lapic.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -1926,15 +1926,20 @@ int kvm_lapic_reg_write(struct kvm_lapic
+       case APIC_LVTTHMR:
+       case APIC_LVTPC:
+       case APIC_LVT1:
+-      case APIC_LVTERR:
++      case APIC_LVTERR: {
+               /* TODO: Check vector */
++              size_t size;
++              u32 index;
++
+               if (!kvm_apic_sw_enabled(apic))
+                       val |= APIC_LVT_MASKED;
+-
+-              val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
++              size = ARRAY_SIZE(apic_lvt_mask);
++              index = array_index_nospec(
++                              (reg - APIC_LVTT) >> 4, size);
++              val &= apic_lvt_mask[index];
+               kvm_lapic_set_reg(apic, reg, val);
+-
+               break;
++      }
+ 
+       case APIC_LVTT:
+               if (!kvm_apic_sw_enabled(apic))
diff --git a/queue-5.4/kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch b/queue-5.4/kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch

new file mode 100644 (file)

index 0000000..f8c58d0
--- /dev/null
+++ b/queue-5.4/kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch
@@ -0,0 +1,54 @@
+From 6ec4c5eee1750d5d17951c4e1960d953376a0dda Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:49 -0800
+Subject: KVM: x86: Protect MSR-based index computations from Spectre-v1/L1TF attacks in x86.c
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 6ec4c5eee1750d5d17951c4e1960d953376a0dda upstream.
+
+This fixes a Spectre-v1/L1TF vulnerability in set_msr_mce() and
+get_msr_mce().
+Both functions contain index computations based on the
+(attacker-controlled) MSR number.
+
+Fixes: 890ca9aefa78 ("KVM: Add MCE support")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/x86.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2494,7 +2494,10 @@ static int set_msr_mce(struct kvm_vcpu *
+       default:
+               if (msr >= MSR_IA32_MC0_CTL &&
+                   msr < MSR_IA32_MCx_CTL(bank_num)) {
+-                      u32 offset = msr - MSR_IA32_MC0_CTL;
++                      u32 offset = array_index_nospec(
++                              msr - MSR_IA32_MC0_CTL,
++                              MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
++
+                       /* only 0 or all 1s can be written to IA32_MCi_CTL
+                        * some Linux kernels though clear bit 10 in bank 4 to
+                        * workaround a BIOS/GART TBL issue on AMD K8s, ignore
+@@ -2921,7 +2924,10 @@ static int get_msr_mce(struct kvm_vcpu *
+       default:
+               if (msr >= MSR_IA32_MC0_CTL &&
+                   msr < MSR_IA32_MCx_CTL(bank_num)) {
+-                      u32 offset = msr - MSR_IA32_MC0_CTL;
++                      u32 offset = array_index_nospec(
++                              msr - MSR_IA32_MC0_CTL,
++                              MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
++
+                       data = vcpu->arch.mce_banks[offset];
+                       break;
+               }
diff --git a/queue-5.4/kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..57ffe4a
--- /dev/null
+++ b/queue-5.4/kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,47 @@
+From 25a5edea71b7c154b6a0b8cec14c711cafa31d26 Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:47 -0800
+Subject: KVM: x86: Protect MSR-based index computations in fixed_msr_to_seg_unit() from Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 25a5edea71b7c154b6a0b8cec14c711cafa31d26 upstream.
+
+This fixes a Spectre-v1/L1TF vulnerability in fixed_msr_to_seg_unit().
+This function contains index computations based on the
+(attacker-controlled) MSR number.
+
+Fixes: de9aef5e1ad6 ("KVM: MTRR: introduce fixed_mtrr_segment table")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/mtrr.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/mtrr.c
++++ b/arch/x86/kvm/mtrr.c
+@@ -192,11 +192,15 @@ static bool fixed_msr_to_seg_unit(u32 ms
+               break;
+       case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000:
+               *seg = 1;
+-              *unit = msr - MSR_MTRRfix16K_80000;
++              *unit = array_index_nospec(
++                      msr - MSR_MTRRfix16K_80000,
++                      MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1);
+               break;
+       case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
+               *seg = 2;
+-              *unit = msr - MSR_MTRRfix4K_C0000;
++              *unit = array_index_nospec(
++                      msr - MSR_MTRRfix4K_C0000,
++                      MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1);
+               break;
+       default:
+               return false;
diff --git a/queue-5.4/kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..b477bca
--- /dev/null
+++ b/queue-5.4/kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,69 @@
+From 13c5183a4e643cc2b03a22d0e582c8e17bb7457d Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:48 -0800
+Subject: KVM: x86: Protect MSR-based index computations in pmu.h from Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 13c5183a4e643cc2b03a22d0e582c8e17bb7457d upstream.
+
+This fixes a Spectre-v1/L1TF vulnerability in the get_gp_pmc() and
+get_fixed_pmc() functions.
+They both contain index computations based on the (attacker-controlled)
+MSR number.
+
+Fixes: 25462f7f5295 ("KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/pmu.h |   18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/pmu.h
++++ b/arch/x86/kvm/pmu.h
+@@ -2,6 +2,8 @@
+ #ifndef __KVM_X86_PMU_H
+ #define __KVM_X86_PMU_H
+ 
++#include <linux/nospec.h>
++
+ #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu)
+ #define pmu_to_vcpu(pmu)  (container_of((pmu), struct kvm_vcpu, arch.pmu))
+ #define pmc_to_pmu(pmc)   (&(pmc)->vcpu->arch.pmu)
+@@ -86,8 +88,12 @@ static inline bool pmc_is_enabled(struct
+ static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
+                                        u32 base)
+ {
+-      if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
+-              return &pmu->gp_counters[msr - base];
++      if (msr >= base && msr < base + pmu->nr_arch_gp_counters) {
++              u32 index = array_index_nospec(msr - base,
++                                             pmu->nr_arch_gp_counters);
++
++              return &pmu->gp_counters[index];
++      }
+ 
+       return NULL;
+ }
+@@ -97,8 +103,12 @@ static inline struct kvm_pmc *get_fixed_
+ {
+       int base = MSR_CORE_PERF_FIXED_CTR0;
+ 
+-      if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
+-              return &pmu->fixed_counters[msr - base];
++      if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) {
++              u32 index = array_index_nospec(msr - base,
++                                             pmu->nr_arch_fixed_counters);
++
++              return &pmu->fixed_counters[index];
++      }
+ 
+       return NULL;
+ }
diff --git a/queue-5.4/kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..b0e9540
--- /dev/null
+++ b/queue-5.4/kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,76 @@
+From 66061740f1a487f4ed54fde75e724709f805da53 Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:53 -0800
+Subject: KVM: x86: Protect pmu_intel.c from Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 66061740f1a487f4ed54fde75e724709f805da53 upstream.
+
+This fixes Spectre-v1/L1TF vulnerabilities in intel_find_fixed_event()
+and intel_rdpmc_ecx_to_pmc().
+kvm_rdpmc() (ancestor of intel_find_fixed_event()) and
+reprogram_fixed_counter() (ancestor of intel_rdpmc_ecx_to_pmc()) are
+exported symbols so KVM should treat them conservatively from a security
+perspective.
+
+Fixes: 25462f7f5295 ("KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx/pmu_intel.c |   24 ++++++++++++++++--------
+ 1 file changed, 16 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -84,10 +84,14 @@ static unsigned intel_find_arch_event(st
+ 
+ static unsigned intel_find_fixed_event(int idx)
+ {
+-      if (idx >= ARRAY_SIZE(fixed_pmc_events))
++      u32 event;
++      size_t size = ARRAY_SIZE(fixed_pmc_events);
++
++      if (idx >= size)
+               return PERF_COUNT_HW_MAX;
+ 
+-      return intel_arch_events[fixed_pmc_events[idx]].event_type;
++      event = fixed_pmc_events[array_index_nospec(idx, size)];
++      return intel_arch_events[event].event_type;
+ }
+ 
+ /* check if a PMC is enabled by comparing it with globl_ctrl bits. */
+@@ -128,16 +132,20 @@ static struct kvm_pmc *intel_msr_idx_to_
+       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+       bool fixed = idx & (1u << 30);
+       struct kvm_pmc *counters;
++      unsigned int num_counters;
+ 
+       idx &= ~(3u << 30);
+-      if (!fixed && idx >= pmu->nr_arch_gp_counters)
+-              return NULL;
+-      if (fixed && idx >= pmu->nr_arch_fixed_counters)
++      if (fixed) {
++              counters = pmu->fixed_counters;
++              num_counters = pmu->nr_arch_fixed_counters;
++      } else {
++              counters = pmu->gp_counters;
++              num_counters = pmu->nr_arch_gp_counters;
++      }
++      if (idx >= num_counters)
+               return NULL;
+-      counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
+       *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
+-
+-      return &counters[idx];
++      return &counters[array_index_nospec(idx, num_counters)];
+ }
+ 
+ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
diff --git a/queue-5.4/kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..8a37884
--- /dev/null
+++ b/queue-5.4/kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,48 @@
+From 3c9053a2cae7ba2ba73766a34cea41baa70f57f7 Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:41 -0800
+Subject: KVM: x86: Protect x86_decode_insn from Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 3c9053a2cae7ba2ba73766a34cea41baa70f57f7 upstream.
+
+This fixes a Spectre-v1/L1TF vulnerability in x86_decode_insn().
+kvm_emulate_instruction() (an ancestor of x86_decode_insn()) is an exported
+symbol, so KVM should treat it conservatively from a security perspective.
+
+Fixes: 045a282ca415 ("KVM: emulator: implement fninit, fnstsw, fnstcw")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/emulate.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -5317,10 +5317,15 @@ done_prefixes:
+                       }
+                       break;
+               case Escape:
+-                      if (ctxt->modrm > 0xbf)
+-                              opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
+-                      else
++                      if (ctxt->modrm > 0xbf) {
++                              size_t size = ARRAY_SIZE(opcode.u.esc->high);
++                              u32 index = array_index_nospec(
++                                      ctxt->modrm - 0xc0, size);
++
++                              opcode = opcode.u.esc->high[index];
++                      } else {
+                               opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
++                      }
+                       break;
+               case InstrDual:
+                       if ((ctxt->modrm >> 6) == 3)
diff --git a/queue-5.4/kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..eb7e20d
--- /dev/null
+++ b/queue-5.4/kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,45 @@
+From 14e32321f3606e4b0970200b6e5e47ee6f1e6410 Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:43 -0800
+Subject: KVM: x86: Refactor picdev_write() to prevent Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 14e32321f3606e4b0970200b6e5e47ee6f1e6410 upstream.
+
+This fixes a Spectre-v1/L1TF vulnerability in picdev_write().
+It replaces index computations based on the (attacked-controlled) port
+number with constants through a minor refactoring.
+
+Fixes: 85f455f7ddbe ("KVM: Add support for in-kernel PIC emulation")
+
+Signed-off-by: Nick Finco <nifi@google.com>
+Signed-off-by: Marios Pomonis <pomonis@google.com>
+Reviewed-by: Andrew Honig <ahonig@google.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/i8259.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/i8259.c
++++ b/arch/x86/kvm/i8259.c
+@@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *
+       switch (addr) {
+       case 0x20:
+       case 0x21:
++              pic_lock(s);
++              pic_ioport_write(&s->pics[0], addr, data);
++              pic_unlock(s);
++              break;
+       case 0xa0:
+       case 0xa1:
+               pic_lock(s);
+-              pic_ioport_write(&s->pics[addr >> 7], addr, data);
++              pic_ioport_write(&s->pics[1], addr, data);
+               pic_unlock(s);
+               break;
+       case 0x4d0:
diff --git a/queue-5.4/kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch b/queue-5.4/kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch

new file mode 100644 (file)

index 0000000..86b4697
--- /dev/null
+++ b/queue-5.4/kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch
@@ -0,0 +1,57 @@
+From 125ffc5e0a56a3eded608dc51e09d5ebf72cf652 Mon Sep 17 00:00:00 2001
+From: Marios Pomonis <pomonis@google.com>
+Date: Wed, 11 Dec 2019 12:47:50 -0800
+Subject: KVM: x86: Refactor prefix decoding to prevent Spectre-v1/L1TF attacks
+
+From: Marios Pomonis <pomonis@google.com>
+
+commit 125ffc5e0a56a3eded608dc51e09d5ebf72cf652 upstream.
+
+This fixes Spectre-v1/L1TF vulnerabilities in
+vmx_read_guest_seg_selector(), vmx_read_guest_seg_base(),
+vmx_read_guest_seg_limit() and vmx_read_guest_seg_ar().  When
+invoked from emulation, these functions contain index computations
+based on the (attacker-influenced) segment value.  Using constants
+prevents the attack.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/emulate.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -5212,16 +5212,28 @@ int x86_decode_insn(struct x86_emulate_c
+                               ctxt->ad_bytes = def_ad_bytes ^ 6;
+                       break;
+               case 0x26:      /* ES override */
++                      has_seg_override = true;
++                      ctxt->seg_override = VCPU_SREG_ES;
++                      break;
+               case 0x2e:      /* CS override */
++                      has_seg_override = true;
++                      ctxt->seg_override = VCPU_SREG_CS;
++                      break;
+               case 0x36:      /* SS override */
++                      has_seg_override = true;
++                      ctxt->seg_override = VCPU_SREG_SS;
++                      break;
+               case 0x3e:      /* DS override */
+                       has_seg_override = true;
+-                      ctxt->seg_override = (ctxt->b >> 3) & 3;
++                      ctxt->seg_override = VCPU_SREG_DS;
+                       break;
+               case 0x64:      /* FS override */
++                      has_seg_override = true;
++                      ctxt->seg_override = VCPU_SREG_FS;
++                      break;
+               case 0x65:      /* GS override */
+                       has_seg_override = true;
+-                      ctxt->seg_override = ctxt->b & 7;
++                      ctxt->seg_override = VCPU_SREG_GS;
+                       break;
+               case 0x40 ... 0x4f: /* REX */
+                       if (mode != X86EMUL_MODE_PROT64)
diff --git a/queue-5.4/media-iguanair-fix-endpoint-sanity-check.patch b/queue-5.4/media-iguanair-fix-endpoint-sanity-check.patch

new file mode 100644 (file)

index 0000000..641d7a3
--- /dev/null
+++ b/queue-5.4/media-iguanair-fix-endpoint-sanity-check.patch
@@ -0,0 +1,40 @@
+From 1b257870a78b0a9ce98fdfb052c58542022ffb5b Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Fri, 3 Jan 2020 17:35:13 +0100
+Subject: media: iguanair: fix endpoint sanity check
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 1b257870a78b0a9ce98fdfb052c58542022ffb5b upstream.
+
+Make sure to use the current alternate setting, which need not be the
+first one by index, when verifying the endpoint descriptors and
+initialising the URBs.
+
+Failing to do so could cause the driver to misbehave or trigger a WARN()
+in usb_submit_urb() that kernels with panic_on_warn set would choke on.
+
+Fixes: 26ff63137c45 ("[media] Add support for the IguanaWorks USB IR Transceiver")
+Fixes: ab1cbdf159be ("media: iguanair: add sanity checks")
+Cc: stable <stable@vger.kernel.org>     # 3.6
+Cc: Oliver Neukum <oneukum@suse.com>
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Signed-off-by: Sean Young <sean@mess.org>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/media/rc/iguanair.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/media/rc/iguanair.c
++++ b/drivers/media/rc/iguanair.c
+@@ -413,7 +413,7 @@ static int iguanair_probe(struct usb_int
+       int ret, pipein, pipeout;
+       struct usb_host_interface *idesc;
+ 
+-      idesc = intf->altsetting;
++      idesc = intf->cur_altsetting;
+       if (idesc->desc.bNumEndpoints < 2)
+               return -ENODEV;
+ 
diff --git a/queue-5.4/media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch b/queue-5.4/media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch

new file mode 100644 (file)

index 0000000..8cdd96b
--- /dev/null
+++ b/queue-5.4/media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch
@@ -0,0 +1,145 @@
+From 080d89f522e2baddb4fbbd1af4b67b5f92537ef8 Mon Sep 17 00:00:00 2001
+From: Sean Young <sean@mess.org>
+Date: Thu, 21 Nov 2019 11:10:47 +0100
+Subject: media: rc: ensure lirc is initialized before registering input device
+
+From: Sean Young <sean@mess.org>
+
+commit 080d89f522e2baddb4fbbd1af4b67b5f92537ef8 upstream.
+
+Once rc_open is called on the input device, lirc events can be delivered.
+Ensure lirc is ready to do so else we might get this:
+
+Registered IR keymap rc-hauppauge
+rc rc0: Hauppauge WinTV PVR-350 as
+/devices/pci0000:00/0000:00:1e.0/0000:04:00.0/i2c-0/0-0018/rc/rc0
+input: Hauppauge WinTV PVR-350 as
+/devices/pci0000:00/0000:00:1e.0/0000:04:00.0/i2c-0/0-0018/rc/rc0/input9
+BUG: kernel NULL pointer dereference, address: 0000000000000038
+PGD 0 P4D 0
+Oops: 0000 [#1] SMP PTI
+CPU: 1 PID: 17 Comm: kworker/1:0 Not tainted 5.3.11-300.fc31.x86_64 #1
+Hardware name:  /DG43NB, BIOS NBG4310H.86A.0096.2009.0903.1845 09/03/2009
+Workqueue: events ir_work [ir_kbd_i2c]
+RIP: 0010:ir_lirc_scancode_event+0x3d/0xb0
+Code: a6 b4 07 00 00 49 81 c6 b8 07 00 00 55 53 e8 ba a7 9d ff 4c 89
+e7 49 89 45 00 e8 5e 7a 25 00 49 8b 1e 48 89 c5 4c 39 f3 74 58 <8b> 43
+38 8b 53 40 89 c1 2b 4b 3c 39 ca 72 41 21 d0 49 8b 7d 00 49
+RSP: 0018:ffffaae2000b3d88 EFLAGS: 00010017
+RAX: 0000000000000002 RBX: 0000000000000000 RCX: 0000000000000019
+RDX: 0000000000000001 RSI: 006e801b1f26ce6a RDI: ffff9e39797c37b4
+RBP: 0000000000000002 R08: 0000000000000001 R09: 0000000000000001
+R10: 0000000000000001 R11: 0000000000000001 R12: ffff9e39797c37b4
+R13: ffffaae2000b3db8 R14: ffff9e39797c37b8 R15: ffff9e39797c33d8
+FS:  0000000000000000(0000) GS:ffff9e397b680000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000038 CR3: 0000000035844000 CR4: 00000000000006e0
+Call Trace:
+ir_do_keydown+0x8e/0x2b0
+rc_keydown+0x52/0xc0
+ir_work+0xb8/0x130 [ir_kbd_i2c]
+process_one_work+0x19d/0x340
+worker_thread+0x50/0x3b0
+kthread+0xfb/0x130
+? process_one_work+0x340/0x340
+? kthread_park+0x80/0x80
+ret_from_fork+0x35/0x40
+Modules linked in: rc_hauppauge tuner msp3400 saa7127 saa7115 ivtv(+)
+tveeprom cx2341x v4l2_common videodev mc i2c_algo_bit ir_kbd_i2c
+ip_tables firewire_ohci e1000e serio_raw firewire_core ata_generic
+crc_itu_t pata_acpi pata_jmicron fuse
+CR2: 0000000000000038
+---[ end trace c67c2697a99fa74b ]---
+RIP: 0010:ir_lirc_scancode_event+0x3d/0xb0
+Code: a6 b4 07 00 00 49 81 c6 b8 07 00 00 55 53 e8 ba a7 9d ff 4c 89
+e7 49 89 45 00 e8 5e 7a 25 00 49 8b 1e 48 89 c5 4c 39 f3 74 58 <8b> 43
+38 8b 53 40 89 c1 2b 4b 3c 39 ca 72 41 21 d0 49 8b 7d 00 49
+RSP: 0018:ffffaae2000b3d88 EFLAGS: 00010017
+RAX: 0000000000000002 RBX: 0000000000000000 RCX: 0000000000000019
+RDX: 0000000000000001 RSI: 006e801b1f26ce6a RDI: ffff9e39797c37b4
+RBP: 0000000000000002 R08: 0000000000000001 R09: 0000000000000001
+R10: 0000000000000001 R11: 0000000000000001 R12: ffff9e39797c37b4
+R13: ffffaae2000b3db8 R14: ffff9e39797c37b8 R15: ffff9e39797c33d8
+FS:  0000000000000000(0000) GS:ffff9e397b680000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000038 CR3: 0000000035844000 CR4: 00000000000006e0
+rc rc0: lirc_dev: driver ir_kbd_i2c registered at minor = 0, scancode
+receiver, no transmitter
+tuner-simple 0-0061: creating new instance
+tuner-simple 0-0061: type set to 2 (Philips NTSC (FI1236,FM1236 and
+compatibles))
+ivtv0: Registered device video0 for encoder MPG (4096 kB)
+ivtv0: Registered device video32 for encoder YUV (2048 kB)
+ivtv0: Registered device vbi0 for encoder VBI (1024 kB)
+ivtv0: Registered device video24 for encoder PCM (320 kB)
+ivtv0: Registered device radio0 for encoder radio
+ivtv0: Registered device video16 for decoder MPG (1024 kB)
+ivtv0: Registered device vbi8 for decoder VBI (64 kB)
+ivtv0: Registered device vbi16 for decoder VOUT
+ivtv0: Registered device video48 for decoder YUV (1024 kB)
+
+Cc: stable@vger.kernel.org
+Tested-by: Nick French <nickfrench@gmail.com>
+Reported-by: Nick French <nickfrench@gmail.com>
+Signed-off-by: Sean Young <sean@mess.org>
+Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/media/rc/rc-main.c |   27 ++++++++++++++++-----------
+ 1 file changed, 16 insertions(+), 11 deletions(-)
+
+--- a/drivers/media/rc/rc-main.c
++++ b/drivers/media/rc/rc-main.c
+@@ -1891,23 +1891,28 @@ int rc_register_device(struct rc_dev *de
+ 
+       dev->registered = true;
+ 
+-      if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
+-              rc = rc_setup_rx_device(dev);
+-              if (rc)
+-                      goto out_dev;
+-      }
+-
+-      /* Ensure that the lirc kfifo is setup before we start the thread */
++      /*
++       * once the the input device is registered in rc_setup_rx_device,
++       * userspace can open the input device and rc_open() will be called
++       * as a result. This results in driver code being allowed to submit
++       * keycodes with rc_keydown, so lirc must be registered first.
++       */
+       if (dev->allowed_protocols != RC_PROTO_BIT_CEC) {
+               rc = ir_lirc_register(dev);
+               if (rc < 0)
+-                      goto out_rx;
++                      goto out_dev;
++      }
++
++      if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
++              rc = rc_setup_rx_device(dev);
++              if (rc)
++                      goto out_lirc;
+       }
+ 
+       if (dev->driver_type == RC_DRIVER_IR_RAW) {
+               rc = ir_raw_event_register(dev);
+               if (rc < 0)
+-                      goto out_lirc;
++                      goto out_rx;
+       }
+ 
+       dev_dbg(&dev->dev, "Registered rc%u (driver: %s)\n", dev->minor,
+@@ -1915,11 +1920,11 @@ int rc_register_device(struct rc_dev *de
+ 
+       return 0;
+ 
++out_rx:
++      rc_free_rx_device(dev);
+ out_lirc:
+       if (dev->allowed_protocols != RC_PROTO_BIT_CEC)
+               ir_lirc_unregister(dev);
+-out_rx:
+-      rc_free_rx_device(dev);
+ out_dev:
+       device_del(&dev->dev);
+ out_rx_free:
diff --git a/queue-5.4/mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch b/queue-5.4/mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch

new file mode 100644 (file)

index 0000000..e740b6e
--- /dev/null
+++ b/queue-5.4/mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch
@@ -0,0 +1,35 @@
+From 65b1aae0d9d5962faccc06bdb8e91a2a0b09451c Mon Sep 17 00:00:00 2001
+From: Brian Norris <briannorris@chromium.org>
+Date: Mon, 6 Jan 2020 14:42:12 -0800
+Subject: mwifiex: fix unbalanced locking in mwifiex_process_country_ie()
+
+From: Brian Norris <briannorris@chromium.org>
+
+commit 65b1aae0d9d5962faccc06bdb8e91a2a0b09451c upstream.
+
+We called rcu_read_lock(), so we need to call rcu_read_unlock() before
+we return.
+
+Fixes: 3d94a4a8373b ("mwifiex: fix possible heap overflow in mwifiex_process_country_ie()")
+Cc: stable@vger.kernel.org
+Cc: huangwen <huangwenabc@gmail.com>
+Cc: Ganapathi Bhat <ganapathi.bhat@nxp.com>
+Signed-off-by: Brian Norris <briannorris@chromium.org>
+Acked-by: Ganapathi Bhat <ganapathi.bhat@nxp.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/marvell/mwifiex/sta_ioctl.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
++++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
+@@ -232,6 +232,7 @@ static int mwifiex_process_country_ie(st
+ 
+       if (country_ie_len >
+           (IEEE80211_COUNTRY_STRING_LEN + MWIFIEX_MAX_TRIPLET_802_11D)) {
++              rcu_read_unlock();
+               mwifiex_dbg(priv->adapter, ERROR,
+                           "11D: country_ie_len overflow!, deauth AP\n");
+               return -EINVAL;
diff --git a/queue-5.4/nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch b/queue-5.4/nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch

new file mode 100644 (file)

index 0000000..43b2d9f
--- /dev/null
+++ b/queue-5.4/nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch
@@ -0,0 +1,112 @@
+From 114de38225d9b300f027e2aec9afbb6e0def154b Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trondmy@gmail.com>
+Date: Sun, 2 Feb 2020 17:53:54 -0500
+Subject: NFS: Directory page cache pages need to be locked when read
+
+From: Trond Myklebust <trondmy@gmail.com>
+
+commit 114de38225d9b300f027e2aec9afbb6e0def154b upstream.
+
+When a NFS directory page cache page is removed from the page cache,
+its contents are freed through a call to nfs_readdir_clear_array().
+To prevent the removal of the page cache entry until after we've
+finished reading it, we must take the page lock.
+
+Fixes: 11de3b11e08c ("NFS: Fix a memory leak in nfs_readdir")
+Cc: stable@vger.kernel.org # v2.6.37+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/dir.c |   30 +++++++++++++++++++-----------
+ 1 file changed, 19 insertions(+), 11 deletions(-)
+
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -702,8 +702,6 @@ int nfs_readdir_filler(void *data, struc
+ static
+ void cache_page_release(nfs_readdir_descriptor_t *desc)
+ {
+-      if (!desc->page->mapping)
+-              nfs_readdir_clear_array(desc->page);
+       put_page(desc->page);
+       desc->page = NULL;
+ }
+@@ -717,19 +715,28 @@ struct page *get_cache_page(nfs_readdir_
+ 
+ /*
+  * Returns 0 if desc->dir_cookie was found on page desc->page_index
++ * and locks the page to prevent removal from the page cache.
+  */
+ static
+-int find_cache_page(nfs_readdir_descriptor_t *desc)
++int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
+ {
+       int res;
+ 
+       desc->page = get_cache_page(desc);
+       if (IS_ERR(desc->page))
+               return PTR_ERR(desc->page);
+-
+-      res = nfs_readdir_search_array(desc);
++      res = lock_page_killable(desc->page);
+       if (res != 0)
+-              cache_page_release(desc);
++              goto error;
++      res = -EAGAIN;
++      if (desc->page->mapping != NULL) {
++              res = nfs_readdir_search_array(desc);
++              if (res == 0)
++                      return 0;
++      }
++      unlock_page(desc->page);
++error:
++      cache_page_release(desc);
+       return res;
+ }
+ 
+@@ -744,7 +751,7 @@ int readdir_search_pagecache(nfs_readdir
+               desc->last_cookie = 0;
+       }
+       do {
+-              res = find_cache_page(desc);
++              res = find_and_lock_cache_page(desc);
+       } while (res == -EAGAIN);
+       return res;
+ }
+@@ -783,7 +790,6 @@ int nfs_do_filldir(nfs_readdir_descripto
+               desc->eof = true;
+ 
+       kunmap(desc->page);
+-      cache_page_release(desc);
+       dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
+                       (unsigned long long)*desc->dir_cookie, res);
+       return res;
+@@ -829,13 +835,13 @@ int uncached_readdir(nfs_readdir_descrip
+ 
+       status = nfs_do_filldir(desc);
+ 
++ out_release:
++      nfs_readdir_clear_array(desc->page);
++      cache_page_release(desc);
+  out:
+       dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
+                       __func__, status);
+       return status;
+- out_release:
+-      cache_page_release(desc);
+-      goto out;
+ }
+ 
+ /* The file offset position represents the dirent entry number.  A
+@@ -900,6 +906,8 @@ static int nfs_readdir(struct file *file
+                       break;
+ 
+               res = nfs_do_filldir(desc);
++              unlock_page(desc->page);
++              cache_page_release(desc);
+               if (res < 0)
+                       break;
+       } while (!desc->eof);
diff --git a/queue-5.4/nfs-fix-memory-leaks-and-corruption-in-readdir.patch b/queue-5.4/nfs-fix-memory-leaks-and-corruption-in-readdir.patch

new file mode 100644 (file)

index 0000000..9202917
--- /dev/null
+++ b/queue-5.4/nfs-fix-memory-leaks-and-corruption-in-readdir.patch
@@ -0,0 +1,81 @@
+From 4b310319c6a8ce708f1033d57145e2aa027a883c Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trondmy@gmail.com>
+Date: Sun, 2 Feb 2020 17:53:53 -0500
+Subject: NFS: Fix memory leaks and corruption in readdir
+
+From: Trond Myklebust <trondmy@gmail.com>
+
+commit 4b310319c6a8ce708f1033d57145e2aa027a883c upstream.
+
+nfs_readdir_xdr_to_array() must not exit without having initialised
+the array, so that the page cache deletion routines can safely
+call nfs_readdir_clear_array().
+Furthermore, we should ensure that if we exit nfs_readdir_filler()
+with an error, we free up any page contents to prevent a leak
+if we try to fill the page again.
+
+Fixes: 11de3b11e08c ("NFS: Fix a memory leak in nfs_readdir")
+Cc: stable@vger.kernel.org # v2.6.37+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/dir.c |   17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -162,6 +162,17 @@ typedef struct {
+       bool eof;
+ } nfs_readdir_descriptor_t;
+ 
++static
++void nfs_readdir_init_array(struct page *page)
++{
++      struct nfs_cache_array *array;
++
++      array = kmap_atomic(page);
++      memset(array, 0, sizeof(struct nfs_cache_array));
++      array->eof_index = -1;
++      kunmap_atomic(array);
++}
++
+ /*
+  * we are freeing strings created by nfs_add_to_readdir_array()
+  */
+@@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page
+       array = kmap_atomic(page);
+       for (i = 0; i < array->size; i++)
+               kfree(array->array[i].string.name);
++      array->size = 0;
+       kunmap_atomic(array);
+ }
+ 
+@@ -610,6 +622,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir
+       int status = -ENOMEM;
+       unsigned int array_size = ARRAY_SIZE(pages);
+ 
++      nfs_readdir_init_array(page);
++
+       entry.prev_cookie = 0;
+       entry.cookie = desc->last_cookie;
+       entry.eof = 0;
+@@ -626,8 +640,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir
+       }
+ 
+       array = kmap(page);
+-      memset(array, 0, sizeof(struct nfs_cache_array));
+-      array->eof_index = -1;
+ 
+       status = nfs_readdir_alloc_pages(pages, array_size);
+       if (status < 0)
+@@ -682,6 +694,7 @@ int nfs_readdir_filler(void *data, struc
+       unlock_page(page);
+       return 0;
+  error:
++      nfs_readdir_clear_array(page);
+       unlock_page(page);
+       return ret;
+ }
diff --git a/queue-5.4/nfsd-fix-filecache-lookup.patch b/queue-5.4/nfsd-fix-filecache-lookup.patch

new file mode 100644 (file)

index 0000000..b88ec70
--- /dev/null
+++ b/queue-5.4/nfsd-fix-filecache-lookup.patch
@@ -0,0 +1,44 @@
+From 28c7d86bb6172ffbb1a1237c6388e77f9fe5f181 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trondmy@gmail.com>
+Date: Mon, 6 Jan 2020 13:18:03 -0500
+Subject: nfsd: fix filecache lookup
+
+From: Trond Myklebust <trondmy@gmail.com>
+
+commit 28c7d86bb6172ffbb1a1237c6388e77f9fe5f181 upstream.
+
+If the lookup keeps finding a nfsd_file with an unhashed open file,
+then retry once only.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: stable@vger.kernel.org
+Fixes: 65294c1f2c5e "nfsd: add a new struct file caching facility to nfsd"
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfsd/filecache.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -791,6 +791,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp
+       struct nfsd_file *nf, *new;
+       struct inode *inode;
+       unsigned int hashval;
++      bool retry = true;
+ 
+       /* FIXME: skip this if fh_dentry is already set? */
+       status = fh_verify(rqstp, fhp, S_IFREG,
+@@ -826,6 +827,11 @@ wait_for_construction:
+ 
+       /* Did construction of this file fail? */
+       if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
++              if (!retry) {
++                      status = nfserr_jukebox;
++                      goto out;
++              }
++              retry = false;
+               nfsd_file_put_noref(nf);
+               goto retry;
+       }
diff --git a/queue-5.4/powerpc-futex-fix-incorrect-user-access-blocking.patch b/queue-5.4/powerpc-futex-fix-incorrect-user-access-blocking.patch

new file mode 100644 (file)

index 0000000..1c4bfb6
--- /dev/null
+++ b/queue-5.4/powerpc-futex-fix-incorrect-user-access-blocking.patch
@@ -0,0 +1,105 @@
+From 9dc086f1e9ef39dd823bd27954b884b2062f9e70 Mon Sep 17 00:00:00 2001
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Fri, 7 Feb 2020 22:15:46 +1100
+Subject: powerpc/futex: Fix incorrect user access blocking
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+commit 9dc086f1e9ef39dd823bd27954b884b2062f9e70 upstream.
+
+The early versions of our kernel user access prevention (KUAP) were
+written by Russell and Christophe, and didn't have separate
+read/write access.
+
+At some point I picked up the series and added the read/write access,
+but I failed to update the usages in futex.h to correctly allow read
+and write.
+
+However we didn't notice because of another bug which was causing the
+low-level code to always enable read and write. That bug was fixed
+recently in commit 1d8f739b07bd ("powerpc/kuap: Fix set direction in
+allow/prevent_user_access()").
+
+futex_atomic_cmpxchg_inatomic() is passed the user address as %3 and
+does:
+
+  1:     lwarx   %1,  0, %3
+         cmpw    0,  %1, %4
+         bne-    3f
+  2:     stwcx.  %5,  0, %3
+
+Which clearly loads and stores from/to %3. The logic in
+arch_futex_atomic_op_inuser() is similar, so fix both of them to use
+allow_read_write_user().
+
+Without this fix, and with PPC_KUAP_DEBUG=y, we see eg:
+
+  Bug: Read fault blocked by AMR!
+  WARNING: CPU: 94 PID: 149215 at arch/powerpc/include/asm/book3s/64/kup-radix.h:126 __do_page_fault+0x600/0xf30
+  CPU: 94 PID: 149215 Comm: futex_requeue_p Tainted: G        W         5.5.0-rc7-gcc9x-g4c25df5640ae #1
+  ...
+  NIP [c000000000070680] __do_page_fault+0x600/0xf30
+  LR [c00000000007067c] __do_page_fault+0x5fc/0xf30
+  Call Trace:
+  [c00020138e5637e0] [c00000000007067c] __do_page_fault+0x5fc/0xf30 (unreliable)
+  [c00020138e5638c0] [c00000000000ada8] handle_page_fault+0x10/0x30
+  --- interrupt: 301 at cmpxchg_futex_value_locked+0x68/0xd0
+      LR = futex_lock_pi_atomic+0xe0/0x1f0
+  [c00020138e563bc0] [c000000000217b50] futex_lock_pi_atomic+0x80/0x1f0 (unreliable)
+  [c00020138e563c30] [c00000000021b668] futex_requeue+0x438/0xb60
+  [c00020138e563d60] [c00000000021c6cc] do_futex+0x1ec/0x2b0
+  [c00020138e563d90] [c00000000021c8b8] sys_futex+0x128/0x200
+  [c00020138e563e20] [c00000000000b7ac] system_call+0x5c/0x68
+
+Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection")
+Cc: stable@vger.kernel.org # v5.2+
+Reported-by: syzbot+e808452bad7c375cbee6@syzkaller-ppc64.appspotmail.com
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Link: https://lore.kernel.org/r/20200207122145.11928-1-mpe@ellerman.id.au
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/futex.h |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/arch/powerpc/include/asm/futex.h
++++ b/arch/powerpc/include/asm/futex.h
+@@ -35,7 +35,7 @@ static inline int arch_futex_atomic_op_i
+ {
+       int oldval = 0, ret;
+ 
+-      allow_write_to_user(uaddr, sizeof(*uaddr));
++      allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
+       pagefault_disable();
+ 
+       switch (op) {
+@@ -62,7 +62,7 @@ static inline int arch_futex_atomic_op_i
+ 
+       *oval = oldval;
+ 
+-      prevent_write_to_user(uaddr, sizeof(*uaddr));
++      prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
+       return ret;
+ }
+ 
+@@ -76,7 +76,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
+       if (!access_ok(uaddr, sizeof(u32)))
+               return -EFAULT;
+ 
+-      allow_write_to_user(uaddr, sizeof(*uaddr));
++      allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
++
+         __asm__ __volatile__ (
+         PPC_ATOMIC_ENTRY_BARRIER
+ "1:     lwarx   %1,0,%3         # futex_atomic_cmpxchg_inatomic\n\
+@@ -97,7 +98,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
+         : "cc", "memory");
+ 
+       *uval = prev;
+-      prevent_write_to_user(uaddr, sizeof(*uaddr));
++      prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
++
+         return ret;
+ }
+ 
diff --git a/queue-5.4/scsi-qla2xxx-fix-unbound-nvme-response-length.patch b/queue-5.4/scsi-qla2xxx-fix-unbound-nvme-response-length.patch

new file mode 100644 (file)

index 0000000..4e89891
--- /dev/null
+++ b/queue-5.4/scsi-qla2xxx-fix-unbound-nvme-response-length.patch
@@ -0,0 +1,78 @@
+From 00fe717ee1ea3c2979db4f94b1533c57aed8dea9 Mon Sep 17 00:00:00 2001
+From: Arun Easi <aeasi@marvell.com>
+Date: Thu, 23 Jan 2020 20:50:14 -0800
+Subject: scsi: qla2xxx: Fix unbound NVME response length
+
+From: Arun Easi <aeasi@marvell.com>
+
+commit 00fe717ee1ea3c2979db4f94b1533c57aed8dea9 upstream.
+
+On certain cases when response length is less than 32, NVME response data
+is supplied inline in IOCB. This is indicated by some combination of state
+flags. There was an instance when a high, and incorrect, response length
+was indicated causing driver to overrun buffers. Fix this by checking and
+limiting the response payload length.
+
+Fixes: 7401bc18d1ee3 ("scsi: qla2xxx: Add FC-NVMe command handling")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20200124045014.23554-1-hmadhani@marvell.com
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Himanshu Madhani <hmadhani@marvell.com>
+Reviewed-by: Ewan D. Milne <emilne@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/qla2xxx/qla_dbg.c |    6 ------
+ drivers/scsi/qla2xxx/qla_dbg.h |    6 ++++++
+ drivers/scsi/qla2xxx/qla_isr.c |   12 ++++++++++++
+ 3 files changed, 18 insertions(+), 6 deletions(-)
+
+--- a/drivers/scsi/qla2xxx/qla_dbg.c
++++ b/drivers/scsi/qla2xxx/qla_dbg.c
+@@ -2519,12 +2519,6 @@ qla83xx_fw_dump_failed:
+ /*                         Driver Debug Functions.                          */
+ /****************************************************************************/
+ 
+-static inline int
+-ql_mask_match(uint level)
+-{
+-      return (level & ql2xextended_error_logging) == level;
+-}
+-
+ /*
+  * This function is for formatting and logging debug information.
+  * It is to be used when vha is available. It formats the message
+--- a/drivers/scsi/qla2xxx/qla_dbg.h
++++ b/drivers/scsi/qla2xxx/qla_dbg.h
+@@ -374,3 +374,9 @@ extern int qla24xx_dump_ram(struct qla_h
+ extern void qla24xx_pause_risc(struct device_reg_24xx __iomem *,
+       struct qla_hw_data *);
+ extern int qla24xx_soft_reset(struct qla_hw_data *);
++
++static inline int
++ql_mask_match(uint level)
++{
++      return (level & ql2xextended_error_logging) == level;
++}
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -1897,6 +1897,18 @@ static void qla24xx_nvme_iocb_entry(scsi
+               inbuf = (uint32_t *)&sts->nvme_ersp_data;
+               outbuf = (uint32_t *)fd->rspaddr;
+               iocb->u.nvme.rsp_pyld_len = le16_to_cpu(sts->nvme_rsp_pyld_len);
++              if (unlikely(iocb->u.nvme.rsp_pyld_len >
++                  sizeof(struct nvme_fc_ersp_iu))) {
++                      if (ql_mask_match(ql_dbg_io)) {
++                              WARN_ONCE(1, "Unexpected response payload length %u.\n",
++                                  iocb->u.nvme.rsp_pyld_len);
++                              ql_log(ql_log_warn, fcport->vha, 0x5100,
++                                  "Unexpected response payload length %u.\n",
++                                  iocb->u.nvme.rsp_pyld_len);
++                      }
++                      iocb->u.nvme.rsp_pyld_len =
++                          sizeof(struct nvme_fc_ersp_iu);
++              }
+               iter = iocb->u.nvme.rsp_pyld_len >> 2;
+               for (; iter; iter--)
+                       *outbuf++ = swab32(*inbuf++);
diff --git a/queue-5.4/series b/queue-5.4/series

index aaee65c07523166ae157db07dd2a71e48f34f3c9..28343a5a1c127633645f46d127ebdffd57444d0e 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -160,3 +160,54 @@ crypto-pcrypt-do-not-clear-may_sleep-flag-in-original-request.patch
  crypto-atmel-aes-fix-counter-overflow-in-ctr-mode.patch
  crypto-api-fix-race-condition-in-crypto_spawn_alg.patch
  crypto-picoxcell-adjust-the-position-of-tasklet_init-and-fix-missed-tasklet_kill.patch
+powerpc-futex-fix-incorrect-user-access-blocking.patch
+scsi-qla2xxx-fix-unbound-nvme-response-length.patch
+nfs-fix-memory-leaks-and-corruption-in-readdir.patch
+nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch
+nfsd-fix-filecache-lookup.patch
+jbd2_seq_info_next-should-increase-position-index.patch
+ext4-fix-deadlock-allocating-crypto-bounce-page-from-mempool.patch
+ext4-fix-race-conditions-in-d_compare-and-d_hash.patch
+btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch
+btrfs-make-deduplication-with-range-including-the-last-block-work.patch
+btrfs-fix-infinite-loop-during-fsync-after-rename-operations.patch
+btrfs-set-trans-drity-in-btrfs_commit_transaction.patch
+btrfs-drop-log-root-for-dropped-roots.patch
+btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch
+btrfs-flush-write-bio-if-we-loop-in-extent_write_cache_pages.patch
+btrfs-correctly-handle-empty-trees-in-find_first_clear_extent_bit.patch
+arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch
+iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch
+mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch
+sunrpc-expiry_time-should-be-seconds-not-timeval.patch
+gfs2-fix-gfs2_find_jhead-that-returns-uninitialized-jhead-with-seq-0.patch
+gfs2-move-setting-current-backing_dev_info.patch
+gfs2-fix-o_sync-write-handling.patch
+drm-atmel-hlcdc-use-double-rate-for-pixel-clock-only-if-supported.patch
+drm-atmel-hlcdc-enable-clock-before-configuring-timing-engine.patch
+drm-atmel-hlcdc-prefer-a-lower-pixel-clock-than-requested.patch
+drm-rect-avoid-division-by-zero.patch
+media-iguanair-fix-endpoint-sanity-check.patch
+media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch
+tools-kvm_stat-fix-kvm_exit-filter-name.patch
+xen-balloon-support-xend-based-toolstack-take-two.patch
+watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch
+bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch
+eventfd-track-eventfd_signal-recursion-depth.patch
+aio-prevent-potential-eventfd-recursion-on-poll.patch
+kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch
+kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch
+kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch
+kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch
+kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch
+kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch
+kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch
+kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch
+kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch
+kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch
+kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch
+kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch
+kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch
+kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch
+kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch
+kvm-svm-pku-not-currently-supported.patch
diff --git a/queue-5.4/sunrpc-expiry_time-should-be-seconds-not-timeval.patch b/queue-5.4/sunrpc-expiry_time-should-be-seconds-not-timeval.patch

new file mode 100644 (file)

index 0000000..ddc91d9
--- /dev/null
+++ b/queue-5.4/sunrpc-expiry_time-should-be-seconds-not-timeval.patch
@@ -0,0 +1,54 @@
+From 3d96208c30f84d6edf9ab4fac813306ac0d20c10 Mon Sep 17 00:00:00 2001
+From: Roberto Bergantinos Corpas <rbergant@redhat.com>
+Date: Tue, 4 Feb 2020 11:32:56 +0100
+Subject: sunrpc: expiry_time should be seconds not timeval
+
+From: Roberto Bergantinos Corpas <rbergant@redhat.com>
+
+commit 3d96208c30f84d6edf9ab4fac813306ac0d20c10 upstream.
+
+When upcalling gssproxy, cache_head.expiry_time is set as a
+timeval, not seconds since boot. As such, RPC cache expiry
+logic will not clean expired objects created under
+auth.rpcsec.context cache.
+
+This has proven to cause kernel memory leaks on field. Using
+64 bit variants of getboottime/timespec
+
+Expiration times have worked this way since 2010's c5b29f885afe "sunrpc:
+use seconds since boot in expiry cache".  The gssproxy code introduced
+in 2012 added gss_proxy_save_rsc and introduced the bug.  That's a while
+for this to lurk, but it required a bit of an extreme case to make it
+obvious.
+
+Signed-off-by: Roberto Bergantinos Corpas <rbergant@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: 030d794bf498 "SUNRPC: Use gssproxy upcall for server..."
+Tested-By: Frank Sorenson <sorenson@redhat.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sunrpc/auth_gss/svcauth_gss.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/sunrpc/auth_gss/svcauth_gss.c
++++ b/net/sunrpc/auth_gss/svcauth_gss.c
+@@ -1245,6 +1245,7 @@ static int gss_proxy_save_rsc(struct cac
+               dprintk("RPC:       No creds found!\n");
+               goto out;
+       } else {
++              struct timespec64 boot;
+ 
+               /* steal creds */
+               rsci.cred = ud->creds;
+@@ -1265,6 +1266,9 @@ static int gss_proxy_save_rsc(struct cac
+                                               &expiry, GFP_KERNEL);
+               if (status)
+                       goto out;
++
++              getboottime64(&boot);
++              expiry -= boot.tv_sec;
+       }
+ 
+       rsci.h.expiry_time = expiry;
diff --git a/queue-5.4/tools-kvm_stat-fix-kvm_exit-filter-name.patch b/queue-5.4/tools-kvm_stat-fix-kvm_exit-filter-name.patch

new file mode 100644 (file)

index 0000000..21d720f
--- /dev/null
+++ b/queue-5.4/tools-kvm_stat-fix-kvm_exit-filter-name.patch
@@ -0,0 +1,73 @@
+From 5fcf3a55a62afb0760ccb6f391d62f20bce4a42f Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gshan@redhat.com>
+Date: Tue, 10 Dec 2019 15:48:29 +1100
+Subject: tools/kvm_stat: Fix kvm_exit filter name
+
+From: Gavin Shan <gshan@redhat.com>
+
+commit 5fcf3a55a62afb0760ccb6f391d62f20bce4a42f upstream.
+
+The filter name is fixed to "exit_reason" for some kvm_exit events, no
+matter what architect we have. Actually, the filter name ("exit_reason")
+is only applicable to x86, meaning it's broken on other architects
+including aarch64.
+
+This fixes the issue by providing various kvm_exit filter names, depending
+on architect we're on. Afterwards, the variable filter name is picked and
+applied through ioctl(fd, SET_FILTER).
+
+Reported-by: Andrew Jones <drjones@redhat.com>
+Signed-off-by: Gavin Shan <gshan@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/kvm/kvm_stat/kvm_stat |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/tools/kvm/kvm_stat/kvm_stat
++++ b/tools/kvm/kvm_stat/kvm_stat
+@@ -270,6 +270,7 @@ class ArchX86(Arch):
+     def __init__(self, exit_reasons):
+         self.sc_perf_evt_open = 298
+         self.ioctl_numbers = IOCTL_NUMBERS
++        self.exit_reason_field = 'exit_reason'
+         self.exit_reasons = exit_reasons
+ 
+     def debugfs_is_child(self, field):
+@@ -289,6 +290,7 @@ class ArchPPC(Arch):
+         # numbers depend on the wordsize.
+         char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
+         self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
++        self.exit_reason_field = 'exit_nr'
+         self.exit_reasons = {}
+ 
+     def debugfs_is_child(self, field):
+@@ -300,6 +302,7 @@ class ArchA64(Arch):
+     def __init__(self):
+         self.sc_perf_evt_open = 241
+         self.ioctl_numbers = IOCTL_NUMBERS
++        self.exit_reason_field = 'esr_ec'
+         self.exit_reasons = AARCH64_EXIT_REASONS
+ 
+     def debugfs_is_child(self, field):
+@@ -311,6 +314,7 @@ class ArchS390(Arch):
+     def __init__(self):
+         self.sc_perf_evt_open = 331
+         self.ioctl_numbers = IOCTL_NUMBERS
++        self.exit_reason_field = None
+         self.exit_reasons = None
+ 
+     def debugfs_is_child(self, field):
+@@ -541,8 +545,8 @@ class TracepointProvider(Provider):
+         """
+         filters = {}
+         filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
+-        if ARCH.exit_reasons:
+-            filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
++        if ARCH.exit_reason_field and ARCH.exit_reasons:
++            filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons)
+         return filters
+ 
+     def _get_available_fields(self):
diff --git a/queue-5.4/watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch b/queue-5.4/watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch

new file mode 100644 (file)

index 0000000..6a601ad
--- /dev/null
+++ b/queue-5.4/watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch
@@ -0,0 +1,197 @@
+From 69503e585192fdd84b240f18a0873d20e18a2e0a Mon Sep 17 00:00:00 2001
+From: Vladis Dronov <vdronov@redhat.com>
+Date: Wed, 8 Jan 2020 13:53:47 +0100
+Subject: watchdog: fix UAF in reboot notifier handling in watchdog core code
+
+From: Vladis Dronov <vdronov@redhat.com>
+
+commit 69503e585192fdd84b240f18a0873d20e18a2e0a upstream.
+
+After the commit 44ea39420fc9 ("drivers/watchdog: make use of
+devm_register_reboot_notifier()") the struct notifier_block reboot_nb in
+the struct watchdog_device is removed from the reboot notifiers chain at
+the time watchdog's chardev is closed. But at least in i6300esb.c case
+reboot_nb is embedded in the struct esb_dev which can be freed on its
+device removal and before the chardev is closed, thus UAF at reboot:
+
+[    7.728581] esb_probe: esb_dev.watchdog_device ffff91316f91ab28
+ts# uname -r                            note the address ^^^
+5.5.0-rc5-ae6088-wdog
+ts# ./openwdog0 &
+[1] 696
+ts# opened /dev/watchdog0, sleeping 10s...
+ts# echo 1 > /sys/devices/pci0000\:00/0000\:00\:09.0/remove
+[  178.086079] devres:rel_nodes: dev ffff91317668a0b0 data ffff91316f91ab28
+           esb_dev.watchdog_device.reboot_nb memory is freed here ^^^
+ts# ...woken up
+[  181.459010] devres:rel_nodes: dev ffff913171781000 data ffff913174a1dae8
+[  181.460195] devm_unreg_reboot_notifier: res ffff913174a1dae8 nb ffff91316f91ab78
+                                     attempt to use memory already freed ^^^
+[  181.461063] devm_unreg_reboot_notifier: nb->call 6b6b6b6b6b6b6b6b
+[  181.461243] devm_unreg_reboot_notifier: nb->next 6b6b6b6b6b6b6b6b
+                freed memory is filled with a slub poison ^^^
+[1]+  Done                    ./openwdog0
+ts# reboot
+[  229.921862] systemd-shutdown[1]: Rebooting.
+[  229.939265] notifier_call_chain: nb ffffffff9c6c2f20 nb->next ffffffff9c6d50c0
+[  229.943080] notifier_call_chain: nb ffffffff9c6d50c0 nb->next 6b6b6b6b6b6b6b6b
+[  229.946054] notifier_call_chain: nb 6b6b6b6b6b6b6b6b INVAL
+[  229.957584] general protection fault: 0000 [#1] SMP
+[  229.958770] CPU: 0 PID: 1 Comm: systemd-shutdow Not tainted 5.5.0-rc5-ae6088-wdog
+[  229.960224] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ...
+[  229.963288] RIP: 0010:notifier_call_chain+0x66/0xd0
+[  229.969082] RSP: 0018:ffffb20dc0013d88 EFLAGS: 00010246
+[  229.970812] RAX: 000000000000002e RBX: 6b6b6b6b6b6b6b6b RCX: 00000000000008b3
+[  229.972929] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffffffff9ccc46ac
+[  229.975028] RBP: 0000000000000001 R08: 0000000000000000 R09: 00000000000008b3
+[  229.977039] R10: 0000000000000001 R11: ffffffff9c26c740 R12: 0000000000000000
+[  229.979155] R13: 6b6b6b6b6b6b6b6b R14: 0000000000000000 R15: 00000000fffffffa
+...   slub_debug=FZP poison ^^^
+[  229.989089] Call Trace:
+[  229.990157]  blocking_notifier_call_chain+0x43/0x59
+[  229.991401]  kernel_restart_prepare+0x14/0x30
+[  229.992607]  kernel_restart+0x9/0x30
+[  229.993800]  __do_sys_reboot+0x1d2/0x210
+[  230.000149]  do_syscall_64+0x3d/0x130
+[  230.001277]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  230.002639] RIP: 0033:0x7f5461bdd177
+[  230.016402] Modules linked in: i6300esb
+[  230.050261] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
+
+Fix the crash by reverting 44ea39420fc9 so unregister_reboot_notifier()
+is called when watchdog device is removed. This also makes handling of
+the reboot notifier unified with the handling of the restart handler,
+which is freed with unregister_restart_handler() in the same place.
+
+Fixes: 44ea39420fc9 ("drivers/watchdog: make use of devm_register_reboot_notifier()")
+Cc: stable@vger.kernel.org # v4.15+
+Signed-off-by: Vladis Dronov <vdronov@redhat.com>
+Reviewed-by: Guenter Roeck <linux@roeck-us.net>
+Link: https://lore.kernel.org/r/20200108125347.6067-1-vdronov@redhat.com
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/watchdog/watchdog_core.c |   35 +++++++++++++++++++++++++++++++++++
+ drivers/watchdog/watchdog_dev.c  |   36 +-----------------------------------
+ 2 files changed, 36 insertions(+), 35 deletions(-)
+
+--- a/drivers/watchdog/watchdog_core.c
++++ b/drivers/watchdog/watchdog_core.c
+@@ -147,6 +147,25 @@ int watchdog_init_timeout(struct watchdo
+ }
+ EXPORT_SYMBOL_GPL(watchdog_init_timeout);
+ 
++static int watchdog_reboot_notifier(struct notifier_block *nb,
++                                  unsigned long code, void *data)
++{
++      struct watchdog_device *wdd;
++
++      wdd = container_of(nb, struct watchdog_device, reboot_nb);
++      if (code == SYS_DOWN || code == SYS_HALT) {
++              if (watchdog_active(wdd)) {
++                      int ret;
++
++                      ret = wdd->ops->stop(wdd);
++                      if (ret)
++                              return NOTIFY_BAD;
++              }
++      }
++
++      return NOTIFY_DONE;
++}
++
+ static int watchdog_restart_notifier(struct notifier_block *nb,
+                                    unsigned long action, void *data)
+ {
+@@ -235,6 +254,19 @@ static int __watchdog_register_device(st
+               }
+       }
+ 
++      if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) {
++              wdd->reboot_nb.notifier_call = watchdog_reboot_notifier;
++
++              ret = register_reboot_notifier(&wdd->reboot_nb);
++              if (ret) {
++                      pr_err("watchdog%d: Cannot register reboot notifier (%d)\n",
++                             wdd->id, ret);
++                      watchdog_dev_unregister(wdd);
++                      ida_simple_remove(&watchdog_ida, id);
++                      return ret;
++              }
++      }
++
+       if (wdd->ops->restart) {
+               wdd->restart_nb.notifier_call = watchdog_restart_notifier;
+ 
+@@ -289,6 +321,9 @@ static void __watchdog_unregister_device
+       if (wdd->ops->restart)
+               unregister_restart_handler(&wdd->restart_nb);
+ 
++      if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status))
++              unregister_reboot_notifier(&wdd->reboot_nb);
++
+       watchdog_dev_unregister(wdd);
+       ida_simple_remove(&watchdog_ida, wdd->id);
+ }
+--- a/drivers/watchdog/watchdog_dev.c
++++ b/drivers/watchdog/watchdog_dev.c
+@@ -38,7 +38,6 @@
+ #include <linux/miscdevice.h> /* For handling misc devices */
+ #include <linux/module.h>     /* For module stuff/... */
+ #include <linux/mutex.h>      /* For mutexes */
+-#include <linux/reboot.h>     /* For reboot notifier */
+ #include <linux/slab.h>               /* For memory functions */
+ #include <linux/types.h>      /* For standard types (like size_t) */
+ #include <linux/watchdog.h>   /* For watchdog specific items */
+@@ -1077,25 +1076,6 @@ static void watchdog_cdev_unregister(str
+       put_device(&wd_data->dev);
+ }
+ 
+-static int watchdog_reboot_notifier(struct notifier_block *nb,
+-                                  unsigned long code, void *data)
+-{
+-      struct watchdog_device *wdd;
+-
+-      wdd = container_of(nb, struct watchdog_device, reboot_nb);
+-      if (code == SYS_DOWN || code == SYS_HALT) {
+-              if (watchdog_active(wdd)) {
+-                      int ret;
+-
+-                      ret = wdd->ops->stop(wdd);
+-                      if (ret)
+-                              return NOTIFY_BAD;
+-              }
+-      }
+-
+-      return NOTIFY_DONE;
+-}
+-
+ /*
+  *    watchdog_dev_register: register a watchdog device
+  *    @wdd: watchdog device
+@@ -1114,22 +1094,8 @@ int watchdog_dev_register(struct watchdo
+               return ret;
+ 
+       ret = watchdog_register_pretimeout(wdd);
+-      if (ret) {
++      if (ret)
+               watchdog_cdev_unregister(wdd);
+-              return ret;
+-      }
+-
+-      if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) {
+-              wdd->reboot_nb.notifier_call = watchdog_reboot_notifier;
+-
+-              ret = devm_register_reboot_notifier(&wdd->wd_data->dev,
+-                                                  &wdd->reboot_nb);
+-              if (ret) {
+-                      pr_err("watchdog%d: Cannot register reboot notifier (%d)\n",
+-                             wdd->id, ret);
+-                      watchdog_dev_unregister(wdd);
+-              }
+-      }
+ 
+       return ret;
+ }
diff --git a/queue-5.4/xen-balloon-support-xend-based-toolstack-take-two.patch b/queue-5.4/xen-balloon-support-xend-based-toolstack-take-two.patch

new file mode 100644 (file)

index 0000000..8090dfc
--- /dev/null
+++ b/queue-5.4/xen-balloon-support-xend-based-toolstack-take-two.patch
@@ -0,0 +1,47 @@
+From eda4eabf86fd6806eaabc23fb90dd056fdac037b Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Fri, 17 Jan 2020 14:49:31 +0100
+Subject: xen/balloon: Support xend-based toolstack take two
+
+From: Juergen Gross <jgross@suse.com>
+
+commit eda4eabf86fd6806eaabc23fb90dd056fdac037b upstream.
+
+Commit 3aa6c19d2f38be ("xen/balloon: Support xend-based toolstack")
+tried to fix a regression with running on rather ancient Xen versions.
+Unfortunately the fix was based on the assumption that xend would
+just use another Xenstore node, but in reality only some downstream
+versions of xend are doing that. The upstream xend does not write
+that Xenstore node at all, so the problem must be fixed in another
+way.
+
+The easiest way to achieve that is to fall back to the behavior
+before commit 96edd61dcf4436 ("xen/balloon: don't online new memory
+initially") in case the static memory maximum can't be read.
+
+This is achieved by setting static_max to the current number of
+memory pages known by the system resulting in target_diff becoming
+zero.
+
+Fixes: 3aa6c19d2f38be ("xen/balloon: Support xend-based toolstack")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: <stable@vger.kernel.org> # 4.13
+Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/xen/xen-balloon.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/xen/xen-balloon.c
++++ b/drivers/xen/xen-balloon.c
+@@ -94,7 +94,7 @@ static void watch_target(struct xenbus_w
+                                 "%llu", &static_max) == 1))
+                       static_max >>= PAGE_SHIFT - 10;
+               else
+-                      static_max = new_target;
++                      static_max = balloon_stats.current_pages;
+ 
+               target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0
+                               : static_max - balloon_stats.target_pages;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 9 Feb 2020 12:27:43 +0000 (13:27 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 9 Feb 2020 12:27:43 +0000 (13:27 +0100)
queue-5.4/aio-prevent-potential-eventfd-recursion-on-poll.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/btrfs-correctly-handle-empty-trees-in-find_first_clear_extent_bit.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/btrfs-drop-log-root-for-dropped-roots.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/btrfs-fix-infinite-loop-during-fsync-after-rename-operations.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/btrfs-flush-write-bio-if-we-loop-in-extent_write_cache_pages.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/btrfs-make-deduplication-with-range-including-the-last-block-work.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/btrfs-set-trans-drity-in-btrfs_commit_transaction.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/drm-atmel-hlcdc-enable-clock-before-configuring-timing-engine.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/drm-atmel-hlcdc-prefer-a-lower-pixel-clock-than-requested.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/drm-atmel-hlcdc-use-double-rate-for-pixel-clock-only-if-supported.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/drm-rect-avoid-division-by-zero.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/eventfd-track-eventfd_signal-recursion-depth.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/ext4-fix-deadlock-allocating-crypto-bounce-page-from-mempool.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/ext4-fix-race-conditions-in-d_compare-and-d_hash.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gfs2-fix-gfs2_find_jhead-that-returns-uninitialized-jhead-with-seq-0.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gfs2-fix-o_sync-write-handling.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/gfs2-move-setting-current-backing_dev_info.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/jbd2_seq_info_next-should-increase-position-index.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-svm-pku-not-currently-supported.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/media-iguanair-fix-endpoint-sanity-check.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/nfs-fix-memory-leaks-and-corruption-in-readdir.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/nfsd-fix-filecache-lookup.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/powerpc-futex-fix-incorrect-user-access-blocking.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/scsi-qla2xxx-fix-unbound-nvme-response-length.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history
queue-5.4/sunrpc-expiry_time-should-be-seconds-not-timeval.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tools-kvm_stat-fix-kvm_exit-filter-name.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xen-balloon-support-xend-based-toolstack-take-two.patch	[new file with mode: 0644]	patch \| blob