]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 14 Apr 2020 14:42:40 +0000 (16:42 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 14 Apr 2020 14:42:40 +0000 (16:42 +0200)
added patches:
btrfs-drop-block-from-cache-on-error-in-relocation.patch
btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch
btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch
btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch
btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch
cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch
crypto-mxs-dcp-fix-scatterlist-linearization-for-hash.patch
kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch
kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch
kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch
kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch
kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch
kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch
kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch
mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch
mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch

17 files changed:
queue-4.19/btrfs-drop-block-from-cache-on-error-in-relocation.patch [new file with mode: 0644]
queue-4.19/btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch [new file with mode: 0644]
queue-4.19/btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch [new file with mode: 0644]
queue-4.19/btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch [new file with mode: 0644]
queue-4.19/btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch [new file with mode: 0644]
queue-4.19/cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch [new file with mode: 0644]
queue-4.19/crypto-mxs-dcp-fix-scatterlist-linearization-for-hash.patch [new file with mode: 0644]
queue-4.19/kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch [new file with mode: 0644]
queue-4.19/kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch [new file with mode: 0644]
queue-4.19/kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch [new file with mode: 0644]
queue-4.19/kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch [new file with mode: 0644]
queue-4.19/kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch [new file with mode: 0644]
queue-4.19/kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch [new file with mode: 0644]
queue-4.19/kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch [new file with mode: 0644]
queue-4.19/mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch [new file with mode: 0644]
queue-4.19/mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch [new file with mode: 0644]
queue-4.19/series

diff --git a/queue-4.19/btrfs-drop-block-from-cache-on-error-in-relocation.patch b/queue-4.19/btrfs-drop-block-from-cache-on-error-in-relocation.patch
new file mode 100644 (file)
index 0000000..36dd4d4
--- /dev/null
@@ -0,0 +1,41 @@
+From 8e19c9732ad1d127b5575a10f4fbcacf740500ff Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 4 Mar 2020 11:18:23 -0500
+Subject: btrfs: drop block from cache on error in relocation
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 8e19c9732ad1d127b5575a10f4fbcacf740500ff upstream.
+
+If we have an error while building the backref tree in relocation we'll
+process all the pending edges and then free the node.  However if we
+integrated some edges into the cache we'll lose our link to those edges
+by simply freeing this node, which means we'll leak memory and
+references to any roots that we've found.
+
+Instead we need to use remove_backref_node(), which walks through all of
+the edges that are still linked to this node and free's them up and
+drops any root references we may be holding.
+
+CC: stable@vger.kernel.org # 4.9+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/relocation.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1141,7 +1141,7 @@ out:
+                       free_backref_node(cache, lower);
+               }
+-              free_backref_node(cache, node);
++              remove_backref_node(cache, node);
+               return ERR_PTR(err);
+       }
+       ASSERT(!node || !node->detached);
diff --git a/queue-4.19/btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch b/queue-4.19/btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch
new file mode 100644 (file)
index 0000000..50fd02b
--- /dev/null
@@ -0,0 +1,222 @@
+From f0cc2cd70164efe8f75c5d99560f0f69969c72e4 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Fri, 28 Feb 2020 13:04:36 +0000
+Subject: Btrfs: fix crash during unmount due to race with delayed inode workers
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit f0cc2cd70164efe8f75c5d99560f0f69969c72e4 upstream.
+
+During unmount we can have a job from the delayed inode items work queue
+still running, that can lead to at least two bad things:
+
+1) A crash, because the worker can try to create a transaction just
+   after the fs roots were freed;
+
+2) A transaction leak, because the worker can create a transaction
+   before the fs roots are freed and just after we committed the last
+   transaction and after we stopped the transaction kthread.
+
+A stack trace example of the crash:
+
+ [79011.691214] kernel BUG at lib/radix-tree.c:982!
+ [79011.692056] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI
+ [79011.693180] CPU: 3 PID: 1394 Comm: kworker/u8:2 Tainted: G        W         5.6.0-rc2-btrfs-next-54 #2
+ (...)
+ [79011.696789] Workqueue: btrfs-delayed-meta btrfs_work_helper [btrfs]
+ [79011.697904] RIP: 0010:radix_tree_tag_set+0xe7/0x170
+ (...)
+ [79011.702014] RSP: 0018:ffffb3c84a317ca0 EFLAGS: 00010293
+ [79011.702949] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
+ [79011.704202] RDX: ffffb3c84a317cb0 RSI: ffffb3c84a317ca8 RDI: ffff8db3931340a0
+ [79011.705463] RBP: 0000000000000005 R08: 0000000000000005 R09: ffffffff974629d0
+ [79011.706756] R10: ffffb3c84a317bc0 R11: 0000000000000001 R12: ffff8db393134000
+ [79011.708010] R13: ffff8db3931340a0 R14: ffff8db393134068 R15: 0000000000000001
+ [79011.709270] FS:  0000000000000000(0000) GS:ffff8db3b6a00000(0000) knlGS:0000000000000000
+ [79011.710699] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ [79011.711710] CR2: 00007f22c2a0a000 CR3: 0000000232ad4005 CR4: 00000000003606e0
+ [79011.712958] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ [79011.714205] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ [79011.715448] Call Trace:
+ [79011.715925]  record_root_in_trans+0x72/0xf0 [btrfs]
+ [79011.716819]  btrfs_record_root_in_trans+0x4b/0x70 [btrfs]
+ [79011.717925]  start_transaction+0xdd/0x5c0 [btrfs]
+ [79011.718829]  btrfs_async_run_delayed_root+0x17e/0x2b0 [btrfs]
+ [79011.719915]  btrfs_work_helper+0xaa/0x720 [btrfs]
+ [79011.720773]  process_one_work+0x26d/0x6a0
+ [79011.721497]  worker_thread+0x4f/0x3e0
+ [79011.722153]  ? process_one_work+0x6a0/0x6a0
+ [79011.722901]  kthread+0x103/0x140
+ [79011.723481]  ? kthread_create_worker_on_cpu+0x70/0x70
+ [79011.724379]  ret_from_fork+0x3a/0x50
+ (...)
+
+The following diagram shows a sequence of steps that lead to the crash
+during ummount of the filesystem:
+
+        CPU 1                                             CPU 2                                CPU 3
+
+ btrfs_punch_hole()
+   btrfs_btree_balance_dirty()
+     btrfs_balance_delayed_items()
+       --> sees
+           fs_info->delayed_root->items
+           with value 200, which is greater
+           than
+           BTRFS_DELAYED_BACKGROUND (128)
+           and smaller than
+           BTRFS_DELAYED_WRITEBACK (512)
+       btrfs_wq_run_delayed_node()
+         --> queues a job for
+             fs_info->delayed_workers to run
+             btrfs_async_run_delayed_root()
+
+                                                                                            btrfs_async_run_delayed_root()
+                                                                                              --> job queued by CPU 1
+
+                                                                                              --> starts picking and running
+                                                                                                  delayed nodes from the
+                                                                                                  prepare_list list
+
+                                                 close_ctree()
+
+                                                   btrfs_delete_unused_bgs()
+
+                                                   btrfs_commit_super()
+
+                                                     btrfs_join_transaction()
+                                                       --> gets transaction N
+
+                                                     btrfs_commit_transaction(N)
+                                                       --> set transaction state
+                                                        to TRANTS_STATE_COMMIT_START
+
+                                                                                             btrfs_first_prepared_delayed_node()
+                                                                                               --> picks delayed node X through
+                                                                                                   the prepared_list list
+
+                                                       btrfs_run_delayed_items()
+
+                                                         btrfs_first_delayed_node()
+                                                           --> also picks delayed node X
+                                                               but through the node_list
+                                                               list
+
+                                                         __btrfs_commit_inode_delayed_items()
+                                                            --> runs all delayed items from
+                                                                this node and drops the
+                                                                node's item count to 0
+                                                                through call to
+                                                                btrfs_release_delayed_inode()
+
+                                                         --> finishes running any remaining
+                                                             delayed nodes
+
+                                                       --> finishes transaction commit
+
+                                                   --> stops cleaner and transaction threads
+
+                                                   btrfs_free_fs_roots()
+                                                     --> frees all roots and removes them
+                                                         from the radix tree
+                                                         fs_info->fs_roots_radix
+
+                                                                                             btrfs_join_transaction()
+                                                                                               start_transaction()
+                                                                                                 btrfs_record_root_in_trans()
+                                                                                                   record_root_in_trans()
+                                                                                                     radix_tree_tag_set()
+                                                                                                       --> crashes because
+                                                                                                           the root is not in
+                                                                                                           the radix tree
+                                                                                                           anymore
+
+If the worker is able to call btrfs_join_transaction() before the unmount
+task frees the fs roots, we end up leaking a transaction and all its
+resources, since after the call to btrfs_commit_super() and stopping the
+transaction kthread, we don't expect to have any transaction open anymore.
+
+When this situation happens the worker has a delayed node that has no
+more items to run, since the task calling btrfs_run_delayed_items(),
+which is doing a transaction commit, picks the same node and runs all
+its items first.
+
+We can not wait for the worker to complete when running delayed items
+through btrfs_run_delayed_items(), because we call that function in
+several phases of a transaction commit, and that could cause a deadlock
+because the worker calls btrfs_join_transaction() and the task doing the
+transaction commit may have already set the transaction state to
+TRANS_STATE_COMMIT_DOING.
+
+Also it's not possible to get into a situation where only some of the
+items of a delayed node are added to the fs/subvolume tree in the current
+transaction and the remaining ones in the next transaction, because when
+running the items of a delayed inode we lock its mutex, effectively
+waiting for the worker if the worker is running the items of the delayed
+node already.
+
+Since this can only cause issues when unmounting a filesystem, fix it in
+a simple way by waiting for any jobs on the delayed workers queue before
+calling btrfs_commit_supper() at close_ctree(). This works because at this
+point no one can call btrfs_btree_balance_dirty() or
+btrfs_balance_delayed_items(), and if we end up waiting for any worker to
+complete, btrfs_commit_super() will commit the transaction created by the
+worker.
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/async-thread.c |    8 ++++++++
+ fs/btrfs/async-thread.h |    1 +
+ fs/btrfs/disk-io.c      |   13 +++++++++++++
+ 3 files changed, 22 insertions(+)
+
+--- a/fs/btrfs/async-thread.c
++++ b/fs/btrfs/async-thread.c
+@@ -434,3 +434,11 @@ void btrfs_set_work_high_priority(struct
+ {
+       set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
+ }
++
++void btrfs_flush_workqueue(struct btrfs_workqueue *wq)
++{
++      if (wq->high)
++              flush_workqueue(wq->high->normal_wq);
++
++      flush_workqueue(wq->normal->normal_wq);
++}
+--- a/fs/btrfs/async-thread.h
++++ b/fs/btrfs/async-thread.h
+@@ -73,5 +73,6 @@ void btrfs_set_work_high_priority(struct
+ struct btrfs_fs_info *btrfs_work_owner(const struct btrfs_work *work);
+ struct btrfs_fs_info *btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
+ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);
++void btrfs_flush_workqueue(struct btrfs_workqueue *wq);
+ #endif
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3949,6 +3949,19 @@ void close_ctree(struct btrfs_fs_info *f
+                */
+               btrfs_delete_unused_bgs(fs_info);
++              /*
++               * There might be existing delayed inode workers still running
++               * and holding an empty delayed inode item. We must wait for
++               * them to complete first because they can create a transaction.
++               * This happens when someone calls btrfs_balance_delayed_items()
++               * and then a transaction commit runs the same delayed nodes
++               * before any delayed worker has done something with the nodes.
++               * We must wait for any worker here and not at transaction
++               * commit time since that could cause a deadlock.
++               * This is a very rare case.
++               */
++              btrfs_flush_workqueue(fs_info->delayed_workers);
++
+               ret = btrfs_commit_super(fs_info);
+               if (ret)
+                       btrfs_err(fs_info, "commit super ret %d", ret);
diff --git a/queue-4.19/btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch b/queue-4.19/btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch
new file mode 100644 (file)
index 0000000..d1b2759
--- /dev/null
@@ -0,0 +1,103 @@
+From 95418ed1d10774cd9a49af6f39e216c1256f1eeb Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 9 Mar 2020 12:41:05 +0000
+Subject: btrfs: fix missing file extent item for hole after ranged fsync
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 95418ed1d10774cd9a49af6f39e216c1256f1eeb upstream.
+
+When doing a fast fsync for a range that starts at an offset greater than
+zero, we can end up with a log that when replayed causes the respective
+inode miss a file extent item representing a hole if we are not using the
+NO_HOLES feature. This is because for fast fsyncs we don't log any extents
+that cover a range different from the one requested in the fsync.
+
+Example scenario to trigger it:
+
+  $ mkfs.btrfs -O ^no-holes -f /dev/sdd
+  $ mount /dev/sdd /mnt
+
+  # Create a file with a single 256K and fsync it to clear to full sync
+  # bit in the inode - we want the msync below to trigger a fast fsync.
+  $ xfs_io -f -c "pwrite -S 0xab 0 256K" -c "fsync" /mnt/foo
+
+  # Force a transaction commit and wipe out the log tree.
+  $ sync
+
+  # Dirty 768K of data, increasing the file size to 1Mb, and flush only
+  # the range from 256K to 512K without updating the log tree
+  # (sync_file_range() does not trigger fsync, it only starts writeback
+  # and waits for it to finish).
+
+  $ xfs_io -c "pwrite -S 0xcd 256K 768K" /mnt/foo
+  $ xfs_io -c "sync_range -abw 256K 256K" /mnt/foo
+
+  # Now dirty the range from 768K to 1M again and sync that range.
+  $ xfs_io -c "mmap -w 768K 256K"        \
+           -c "mwrite -S 0xef 768K 256K" \
+           -c "msync -s 768K 256K"       \
+           -c "munmap"                   \
+           /mnt/foo
+
+  <power fail>
+
+  # Mount to replay the log.
+  $ mount /dev/sdd /mnt
+  $ umount /mnt
+
+  $ btrfs check /dev/sdd
+  Opening filesystem to check...
+  Checking filesystem on /dev/sdd
+  UUID: 482fb574-b288-478e-a190-a9c44a78fca6
+  [1/7] checking root items
+  [2/7] checking extents
+  [3/7] checking free space cache
+  [4/7] checking fs roots
+  root 5 inode 257 errors 100, file extent discount
+  Found file extent holes:
+       start: 262144, len: 524288
+  ERROR: errors found in fs roots
+  found 720896 bytes used, error(s) found
+  total csum bytes: 512
+  total tree bytes: 131072
+  total fs tree bytes: 32768
+  total extent tree bytes: 16384
+  btree space waste bytes: 123514
+  file data blocks allocated: 589824
+    referenced 589824
+
+Fix this issue by setting the range to full (0 to LLONG_MAX) when the
+NO_HOLES feature is not enabled. This results in extra work being done
+but it gives the guarantee we don't end up with missing holes after
+replaying the log.
+
+CC: stable@vger.kernel.org # 4.19+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2074,6 +2074,16 @@ int btrfs_sync_file(struct file *file, l
+       btrfs_init_log_ctx(&ctx, inode);
+       /*
++       * Set the range to full if the NO_HOLES feature is not enabled.
++       * This is to avoid missing file extent items representing holes after
++       * replaying the log.
++       */
++      if (!btrfs_fs_incompat(fs_info, NO_HOLES)) {
++              start = 0;
++              end = LLONG_MAX;
++      }
++
++      /*
+        * We write the dirty pages in the range and wait until they complete
+        * out of the ->i_mutex. If so, we can flush the dirty pages by
+        * multi-task, and make the performance up.  See
diff --git a/queue-4.19/btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch b/queue-4.19/btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch
new file mode 100644 (file)
index 0000000..9d81e38
--- /dev/null
@@ -0,0 +1,35 @@
+From 6ff06729c22ec0b7498d900d79cc88cfb8aceaeb Mon Sep 17 00:00:00 2001
+From: Robbie Ko <robbieko@synology.com>
+Date: Tue, 17 Mar 2020 14:31:02 +0800
+Subject: btrfs: fix missing semaphore unlock in btrfs_sync_file
+
+From: Robbie Ko <robbieko@synology.com>
+
+commit 6ff06729c22ec0b7498d900d79cc88cfb8aceaeb upstream.
+
+Ordered ops are started twice in sync file, once outside of inode mutex
+and once inside, taking the dio semaphore. There was one error path
+missing the semaphore unlock.
+
+Fixes: aab15e8ec2576 ("Btrfs: fix rare chances for data loss when doing a fast fsync")
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: Robbie Ko <robbieko@synology.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+[ add changelog ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2137,6 +2137,7 @@ int btrfs_sync_file(struct file *file, l
+        */
+       ret = start_ordered_ops(inode, start, end);
+       if (ret) {
++              up_write(&BTRFS_I(inode)->dio_sem);
+               inode_unlock(inode);
+               goto out;
+       }
diff --git a/queue-4.19/btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch b/queue-4.19/btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch
new file mode 100644 (file)
index 0000000..8b96667
--- /dev/null
@@ -0,0 +1,64 @@
+From 75ec1db8717a8f0a9d9c8d033e542fdaa7b73898 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 14 Feb 2020 15:22:06 -0500
+Subject: btrfs: set update the uuid generation as soon as possible
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 75ec1db8717a8f0a9d9c8d033e542fdaa7b73898 upstream.
+
+In my EIO stress testing I noticed I was getting forced to rescan the
+uuid tree pretty often, which was weird.  This is because my error
+injection stuff would sometimes inject an error after log replay but
+before we loaded the UUID tree.  If log replay committed the transaction
+it wouldn't have updated the uuid tree generation, but the tree was
+valid and didn't change, so there's no reason to not update the
+generation here.
+
+Fix this by setting the BTRFS_FS_UPDATE_UUID_TREE_GEN bit immediately
+after reading all the fs roots if the uuid tree generation matches the
+fs generation.  Then any transaction commits that happen during mount
+won't screw up our uuid tree state, forcing us to do needless uuid
+rescans.
+
+Fixes: 70f801754728 ("Btrfs: check UUID tree during mount if required")
+CC: stable@vger.kernel.org # 4.19+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/disk-io.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3007,6 +3007,18 @@ retry_root_backup:
+       fs_info->generation = generation;
+       fs_info->last_trans_committed = generation;
++      /*
++       * If we have a uuid root and we're not being told to rescan we need to
++       * check the generation here so we can set the
++       * BTRFS_FS_UPDATE_UUID_TREE_GEN bit.  Otherwise we could commit the
++       * transaction during a balance or the log replay without updating the
++       * uuid generation, and then if we crash we would rescan the uuid tree,
++       * even though it was perfectly fine.
++       */
++      if (fs_info->uuid_root && !btrfs_test_opt(fs_info, RESCAN_UUID_TREE) &&
++          fs_info->generation == btrfs_super_uuid_tree_generation(disk_super))
++              set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
++
+       ret = btrfs_verify_dev_extents(fs_info);
+       if (ret) {
+               btrfs_err(fs_info,
+@@ -3237,8 +3249,6 @@ retry_root_backup:
+                       close_ctree(fs_info);
+                       return ret;
+               }
+-      } else {
+-              set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
+       }
+       set_bit(BTRFS_FS_OPEN, &fs_info->flags);
diff --git a/queue-4.19/cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch b/queue-4.19/cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch
new file mode 100644 (file)
index 0000000..0eb9ec4
--- /dev/null
@@ -0,0 +1,63 @@
+From 97adda8b3ab703de8e4c8d27646ddd54fe22879c Mon Sep 17 00:00:00 2001
+From: Yilu Lin <linyilu@huawei.com>
+Date: Wed, 18 Mar 2020 11:59:19 +0800
+Subject: CIFS: Fix bug which the return value by asynchronous read is error
+
+From: Yilu Lin <linyilu@huawei.com>
+
+commit 97adda8b3ab703de8e4c8d27646ddd54fe22879c upstream.
+
+This patch is used to fix the bug in collect_uncached_read_data()
+that rc is automatically converted from a signed number to an
+unsigned number when the CIFS asynchronous read fails.
+It will cause ctx->rc is error.
+
+Example:
+Share a directory and create a file on the Windows OS.
+Mount the directory to the Linux OS using CIFS.
+On the CIFS client of the Linux OS, invoke the pread interface to
+deliver the read request.
+
+The size of the read length plus offset of the read request is greater
+than the maximum file size.
+
+In this case, the CIFS server on the Windows OS returns a failure
+message (for example, the return value of
+smb2.nt_status is STATUS_INVALID_PARAMETER).
+
+After receiving the response message, the CIFS client parses
+smb2.nt_status to STATUS_INVALID_PARAMETER
+and converts it to the Linux error code (rdata->result=-22).
+
+Then the CIFS client invokes the collect_uncached_read_data function to
+assign the value of rdata->result to rc, that is, rc=rdata->result=-22.
+
+The type of the ctx->total_len variable is unsigned integer,
+the type of the rc variable is integer, and the type of
+the ctx->rc variable is ssize_t.
+
+Therefore, during the ternary operation, the value of rc is
+automatically converted to an unsigned number. The final result is
+ctx->rc=4294967274. However, the expected result is ctx->rc=-22.
+
+Signed-off-by: Yilu Lin <linyilu@huawei.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+CC: Stable <stable@vger.kernel.org>
+Acked-by: Ronnie Sahlberg <lsahlber@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -3339,7 +3339,7 @@ again:
+       if (rc == -ENODATA)
+               rc = 0;
+-      ctx->rc = (rc == 0) ? ctx->total_len : rc;
++      ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
+       mutex_unlock(&ctx->aio_mutex);
diff --git a/queue-4.19/crypto-mxs-dcp-fix-scatterlist-linearization-for-hash.patch b/queue-4.19/crypto-mxs-dcp-fix-scatterlist-linearization-for-hash.patch
new file mode 100644 (file)
index 0000000..d2d871b
--- /dev/null
@@ -0,0 +1,113 @@
+From fa03481b6e2e82355c46644147b614f18c7a8161 Mon Sep 17 00:00:00 2001
+From: Rosioru Dragos <dragos.rosioru@nxp.com>
+Date: Tue, 25 Feb 2020 17:05:52 +0200
+Subject: crypto: mxs-dcp - fix scatterlist linearization for hash
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Rosioru Dragos <dragos.rosioru@nxp.com>
+
+commit fa03481b6e2e82355c46644147b614f18c7a8161 upstream.
+
+The incorrect traversal of the scatterlist, during the linearization phase
+lead to computing the hash value of the wrong input buffer.
+New implementation uses scatterwalk_map_and_copy()
+to address this issue.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 15b59e7c3733 ("crypto: mxs - Add Freescale MXS DCP driver")
+Signed-off-by: Rosioru Dragos <dragos.rosioru@nxp.com>
+Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/mxs-dcp.c |   54 ++++++++++++++++++++++-------------------------
+ 1 file changed, 26 insertions(+), 28 deletions(-)
+
+--- a/drivers/crypto/mxs-dcp.c
++++ b/drivers/crypto/mxs-dcp.c
+@@ -25,6 +25,7 @@
+ #include <crypto/sha.h>
+ #include <crypto/internal/hash.h>
+ #include <crypto/internal/skcipher.h>
++#include <crypto/scatterwalk.h>
+ #define DCP_MAX_CHANS 4
+ #define DCP_BUF_SZ    PAGE_SIZE
+@@ -621,49 +622,46 @@ static int dcp_sha_req_to_buf(struct cry
+       struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
+       struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
+       struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
+-      const int nents = sg_nents(req->src);
+       uint8_t *in_buf = sdcp->coh->sha_in_buf;
+       uint8_t *out_buf = sdcp->coh->sha_out_buf;
+-      uint8_t *src_buf;
+-
+       struct scatterlist *src;
+-      unsigned int i, len, clen;
++      unsigned int i, len, clen, oft = 0;
+       int ret;
+       int fin = rctx->fini;
+       if (fin)
+               rctx->fini = 0;
+-      for_each_sg(req->src, src, nents, i) {
+-              src_buf = sg_virt(src);
+-              len = sg_dma_len(src);
++      src = req->src;
++      len = req->nbytes;
+-              do {
+-                      if (actx->fill + len > DCP_BUF_SZ)
+-                              clen = DCP_BUF_SZ - actx->fill;
+-                      else
+-                              clen = len;
++      while (len) {
++              if (actx->fill + len > DCP_BUF_SZ)
++                      clen = DCP_BUF_SZ - actx->fill;
++              else
++                      clen = len;
+-                      memcpy(in_buf + actx->fill, src_buf, clen);
+-                      len -= clen;
+-                      src_buf += clen;
+-                      actx->fill += clen;
++              scatterwalk_map_and_copy(in_buf + actx->fill, src, oft, clen,
++                                       0);
+-                      /*
+-                       * If we filled the buffer and still have some
+-                       * more data, submit the buffer.
+-                       */
+-                      if (len && actx->fill == DCP_BUF_SZ) {
+-                              ret = mxs_dcp_run_sha(req);
+-                              if (ret)
+-                                      return ret;
+-                              actx->fill = 0;
+-                              rctx->init = 0;
+-                      }
+-              } while (len);
++              len -= clen;
++              oft += clen;
++              actx->fill += clen;
++
++              /*
++               * If we filled the buffer and still have some
++               * more data, submit the buffer.
++               */
++              if (len && actx->fill == DCP_BUF_SZ) {
++                      ret = mxs_dcp_run_sha(req);
++                      if (ret)
++                              return ret;
++                      actx->fill = 0;
++                      rctx->init = 0;
++              }
+       }
+       if (fin) {
diff --git a/queue-4.19/kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch b/queue-4.19/kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch
new file mode 100644 (file)
index 0000000..00f4f39
--- /dev/null
@@ -0,0 +1,160 @@
+From a1c77abb8d93381e25a8d2df3a917388244ba776 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+Date: Mon, 2 Mar 2020 22:27:35 -0800
+Subject: KVM: nVMX: Properly handle userspace interrupt window request
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+commit a1c77abb8d93381e25a8d2df3a917388244ba776 upstream.
+
+Return true for vmx_interrupt_allowed() if the vCPU is in L2 and L1 has
+external interrupt exiting enabled.  IRQs are never blocked in hardware
+if the CPU is in the guest (L2 from L1's perspective) when IRQs trigger
+VM-Exit.
+
+The new check percolates up to kvm_vcpu_ready_for_interrupt_injection()
+and thus vcpu_run(), and so KVM will exit to userspace if userspace has
+requested an interrupt window (to inject an IRQ into L1).
+
+Remove the @external_intr param from vmx_check_nested_events(), which is
+actually an indicator that userspace wants an interrupt window, e.g.
+it's named @req_int_win further up the stack.  Injecting a VM-Exit into
+L1 to try and bounce out to L0 userspace is all kinds of broken and is
+no longer necessary.
+
+Remove the hack in nested_vmx_vmexit() that attempted to workaround the
+breakage in vmx_check_nested_events() by only filling interrupt info if
+there's an actual interrupt pending.  The hack actually made things
+worse because it caused KVM to _never_ fill interrupt info when the
+LAPIC resides in userspace (kvm_cpu_has_interrupt() queries
+interrupt.injected, which is always cleared by prepare_vmcs12() before
+reaching the hack in nested_vmx_vmexit()).
+
+Fixes: 6550c4df7e50 ("KVM: nVMX: Fix interrupt window request with "Acknowledge interrupt on exit"")
+Cc: stable@vger.kernel.org
+Cc: Liran Alon <liran.alon@oracle.com>
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/kvm_host.h |    2 +-
+ arch/x86/kvm/vmx.c              |   27 +++++++++++----------------
+ arch/x86/kvm/x86.c              |   10 +++++-----
+ 3 files changed, 17 insertions(+), 22 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1070,7 +1070,7 @@ struct kvm_x86_ops {
+       bool (*xsaves_supported)(void);
+       bool (*umip_emulated)(void);
+-      int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
++      int (*check_nested_events)(struct kvm_vcpu *vcpu);
+       void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
+       void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -6954,8 +6954,13 @@ static int vmx_nmi_allowed(struct kvm_vc
+ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
+ {
+-      return (!to_vmx(vcpu)->nested.nested_run_pending &&
+-              vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
++      if (to_vmx(vcpu)->nested.nested_run_pending)
++              return false;
++
++      if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
++              return true;
++
++      return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+               !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+                       (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
+ }
+@@ -12990,7 +12995,7 @@ static void vmcs12_save_pending_event(st
+       }
+ }
+-static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
++static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long exit_qual;
+@@ -13028,8 +13033,7 @@ static int vmx_check_nested_events(struc
+               return 0;
+       }
+-      if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
+-          nested_exit_on_intr(vcpu)) {
++      if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(vcpu)) {
+               if (block_nested_events)
+                       return -EBUSY;
+               nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+@@ -13607,17 +13611,8 @@ static void nested_vmx_vmexit(struct kvm
+       vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+       if (likely(!vmx->fail)) {
+-              /*
+-               * TODO: SDM says that with acknowledge interrupt on
+-               * exit, bit 31 of the VM-exit interrupt information
+-               * (valid interrupt) is always set to 1 on
+-               * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
+-               * need kvm_cpu_has_interrupt().  See the commit
+-               * message for details.
+-               */
+-              if (nested_exit_intr_ack_set(vcpu) &&
+-                  exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
+-                  kvm_cpu_has_interrupt(vcpu)) {
++              if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
++                  nested_exit_intr_ack_set(vcpu)) {
+                       int irq = kvm_cpu_get_interrupt(vcpu);
+                       WARN_ON(irq < 0);
+                       vmcs12->vm_exit_intr_info = irq |
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -7124,7 +7124,7 @@ static void update_cr8_intercept(struct
+       kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
+ }
+-static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
++static int inject_pending_event(struct kvm_vcpu *vcpu)
+ {
+       int r;
+@@ -7160,7 +7160,7 @@ static int inject_pending_event(struct k
+        * from L2 to L1.
+        */
+       if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
+-              r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
++              r = kvm_x86_ops->check_nested_events(vcpu);
+               if (r != 0)
+                       return r;
+       }
+@@ -7210,7 +7210,7 @@ static int inject_pending_event(struct k
+                * KVM_REQ_EVENT only on certain events and not unconditionally?
+                */
+               if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
+-                      r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
++                      r = kvm_x86_ops->check_nested_events(vcpu);
+                       if (r != 0)
+                               return r;
+               }
+@@ -7683,7 +7683,7 @@ static int vcpu_enter_guest(struct kvm_v
+                       goto out;
+               }
+-              if (inject_pending_event(vcpu, req_int_win) != 0)
++              if (inject_pending_event(vcpu) != 0)
+                       req_immediate_exit = true;
+               else {
+                       /* Enable SMI/NMI/IRQ window open exits if needed.
+@@ -7894,7 +7894,7 @@ static inline int vcpu_block(struct kvm
+ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
+ {
+       if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+-              kvm_x86_ops->check_nested_events(vcpu, false);
++              kvm_x86_ops->check_nested_events(vcpu);
+       return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
+               !vcpu->arch.apf.halted);
diff --git a/queue-4.19/kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch b/queue-4.19/kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch
new file mode 100644 (file)
index 0000000..4ac68c1
--- /dev/null
@@ -0,0 +1,50 @@
+From 4d4cee96fb7a3cc53702a9be8299bf525be4ee98 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Fri, 3 Apr 2020 17:30:47 +0200
+Subject: KVM: s390: vsie: Fix delivery of addressing exceptions
+
+From: David Hildenbrand <david@redhat.com>
+
+commit 4d4cee96fb7a3cc53702a9be8299bf525be4ee98 upstream.
+
+Whenever we get an -EFAULT, we failed to read in guest 2 physical
+address space. Such addressing exceptions are reported via a program
+intercept to the nested hypervisor.
+
+We faked the intercept, we have to return to guest 2. Instead, right
+now we would be returning -EFAULT from the intercept handler, eventually
+crashing the VM.
+the correct thing to do is to return 1 as rc == 1 is the internal
+representation of "we have to go back into g2".
+
+Addressing exceptions can only happen if the g2->g3 page tables
+reference invalid g2 addresses (say, either a table or the final page is
+not accessible - so something that basically never happens in sane
+environments.
+
+Identified by manual code inspection.
+
+Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization")
+Cc: <stable@vger.kernel.org> # v4.8+
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Link: https://lore.kernel.org/r/20200403153050.20569-3-david@redhat.com
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
+[borntraeger@de.ibm.com: fix patch description]
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/vsie.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/s390/kvm/vsie.c
++++ b/arch/s390/kvm/vsie.c
+@@ -1024,6 +1024,7 @@ static int vsie_run(struct kvm_vcpu *vcp
+               scb_s->iprcc = PGM_ADDRESSING;
+               scb_s->pgmilc = 4;
+               scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
++              rc = 1;
+       }
+       return rc;
+ }
diff --git a/queue-4.19/kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch b/queue-4.19/kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch
new file mode 100644 (file)
index 0000000..56f6af7
--- /dev/null
@@ -0,0 +1,56 @@
+From a1d032a49522cb5368e5dfb945a85899b4c74f65 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Fri, 3 Apr 2020 17:30:46 +0200
+Subject: KVM: s390: vsie: Fix region 1 ASCE sanity shadow address checks
+
+From: David Hildenbrand <david@redhat.com>
+
+commit a1d032a49522cb5368e5dfb945a85899b4c74f65 upstream.
+
+In case we have a region 1 the following calculation
+(31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11)
+results in 64. As shifts beyond the size are undefined the compiler is
+free to use instructions like sllg. sllg will only use 6 bits of the
+shift value (here 64) resulting in no shift at all. That means that ALL
+addresses will be rejected.
+
+The can result in endless loops, e.g. when prefix cannot get mapped.
+
+Fixes: 4be130a08420 ("s390/mm: add shadow gmap support")
+Tested-by: Janosch Frank <frankja@linux.ibm.com>
+Reported-by: Janosch Frank <frankja@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # v4.8+
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Link: https://lore.kernel.org/r/20200403153050.20569-2-david@redhat.com
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
+[borntraeger@de.ibm.com: fix patch description, remove WARN_ON_ONCE]
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/mm/gmap.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/mm/gmap.c
++++ b/arch/s390/mm/gmap.c
+@@ -787,14 +787,18 @@ static void gmap_call_notifier(struct gm
+ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
+                                            unsigned long gaddr, int level)
+ {
++      const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
+       unsigned long *table;
+       if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4))
+               return NULL;
+       if (gmap_is_shadow(gmap) && gmap->removed)
+               return NULL;
+-      if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11)))
++
++      if (asce_type != _ASCE_TYPE_REGION1 &&
++          gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
+               return NULL;
++
+       table = gmap->table;
+       switch (gmap->asce & _ASCE_TYPE_MASK) {
+       case _ASCE_TYPE_REGION1:
diff --git a/queue-4.19/kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch b/queue-4.19/kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch
new file mode 100644 (file)
index 0000000..45fdd41
--- /dev/null
@@ -0,0 +1,180 @@
+From 31603d4fc2bb4f0815245d496cb970b27b4f636a Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+Date: Sat, 21 Mar 2020 12:37:49 -0700
+Subject: KVM: VMX: Always VMCLEAR in-use VMCSes during crash with kexec support
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+commit 31603d4fc2bb4f0815245d496cb970b27b4f636a upstream.
+
+VMCLEAR all in-use VMCSes during a crash, even if kdump's NMI shootdown
+interrupted a KVM update of the percpu in-use VMCS list.
+
+Because NMIs are not blocked by disabling IRQs, it's possible that
+crash_vmclear_local_loaded_vmcss() could be called while the percpu list
+of VMCSes is being modified, e.g. in the middle of list_add() in
+vmx_vcpu_load_vmcs().  This potential corner case was called out in the
+original commit[*], but the analysis of its impact was wrong.
+
+Skipping the VMCLEARs is wrong because it all but guarantees that a
+loaded, and therefore cached, VMCS will live across kexec and corrupt
+memory in the new kernel.  Corruption will occur because the CPU's VMCS
+cache is non-coherent, i.e. not snooped, and so the writeback of VMCS
+memory on its eviction will overwrite random memory in the new kernel.
+The VMCS will live because the NMI shootdown also disables VMX, i.e. the
+in-progress VMCLEAR will #UD, and existing Intel CPUs do not flush the
+VMCS cache on VMXOFF.
+
+Furthermore, interrupting list_add() and list_del() is safe due to
+crash_vmclear_local_loaded_vmcss() using forward iteration.  list_add()
+ensures the new entry is not visible to forward iteration unless the
+entire add completes, via WRITE_ONCE(prev->next, new).  A bad "prev"
+pointer could be observed if the NMI shootdown interrupted list_del() or
+list_add(), but list_for_each_entry() does not consume ->prev.
+
+In addition to removing the temporary disabling of VMCLEAR, open code
+loaded_vmcs_init() in __loaded_vmcs_clear() and reorder VMCLEAR so that
+the VMCS is deleted from the list only after it's been VMCLEAR'd.
+Deleting the VMCS before VMCLEAR would allow a race where the NMI
+shootdown could arrive between list_del() and vmcs_clear() and thus
+neither flow would execute a successful VMCLEAR.  Alternatively, more
+code could be moved into loaded_vmcs_init(), but that gets rather silly
+as the only other user, alloc_loaded_vmcs(), doesn't need the smp_wmb()
+and would need to work around the list_del().
+
+Update the smp_*() comments related to the list manipulation, and
+opportunistically reword them to improve clarity.
+
+[*] https://patchwork.kernel.org/patch/1675731/#3720461
+
+Fixes: 8f536b7697a0 ("KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Message-Id: <20200321193751.24985-2-sean.j.christopherson@intel.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |   67 ++++++++++++-----------------------------------------
+ 1 file changed, 16 insertions(+), 51 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2156,43 +2156,15 @@ static void vmcs_load(struct vmcs *vmcs)
+ }
+ #ifdef CONFIG_KEXEC_CORE
+-/*
+- * This bitmap is used to indicate whether the vmclear
+- * operation is enabled on all cpus. All disabled by
+- * default.
+- */
+-static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
+-
+-static inline void crash_enable_local_vmclear(int cpu)
+-{
+-      cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
+-}
+-
+-static inline void crash_disable_local_vmclear(int cpu)
+-{
+-      cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
+-}
+-
+-static inline int crash_local_vmclear_enabled(int cpu)
+-{
+-      return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
+-}
+-
+ static void crash_vmclear_local_loaded_vmcss(void)
+ {
+       int cpu = raw_smp_processor_id();
+       struct loaded_vmcs *v;
+-      if (!crash_local_vmclear_enabled(cpu))
+-              return;
+-
+       list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
+                           loaded_vmcss_on_cpu_link)
+               vmcs_clear(v->vmcs);
+ }
+-#else
+-static inline void crash_enable_local_vmclear(int cpu) { }
+-static inline void crash_disable_local_vmclear(int cpu) { }
+ #endif /* CONFIG_KEXEC_CORE */
+ static void __loaded_vmcs_clear(void *arg)
+@@ -2204,19 +2176,24 @@ static void __loaded_vmcs_clear(void *ar
+               return; /* vcpu migration can race with cpu offline */
+       if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
+               per_cpu(current_vmcs, cpu) = NULL;
+-      crash_disable_local_vmclear(cpu);
++
++      vmcs_clear(loaded_vmcs->vmcs);
++      if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
++              vmcs_clear(loaded_vmcs->shadow_vmcs);
++
+       list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
+       /*
+-       * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
+-       * is before setting loaded_vmcs->vcpu to -1 which is done in
+-       * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
+-       * then adds the vmcs into percpu list before it is deleted.
++       * Ensure all writes to loaded_vmcs, including deleting it from its
++       * current percpu list, complete before setting loaded_vmcs->vcpu to
++       * -1, otherwise a different cpu can see vcpu == -1 first and add
++       * loaded_vmcs to its percpu list before it's deleted from this cpu's
++       * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs().
+        */
+       smp_wmb();
+-      loaded_vmcs_init(loaded_vmcs);
+-      crash_enable_local_vmclear(cpu);
++      loaded_vmcs->cpu = -1;
++      loaded_vmcs->launched = 0;
+ }
+ static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
+@@ -3067,18 +3044,17 @@ static void vmx_vcpu_load(struct kvm_vcp
+       if (!already_loaded) {
+               loaded_vmcs_clear(vmx->loaded_vmcs);
+               local_irq_disable();
+-              crash_disable_local_vmclear(cpu);
+               /*
+-               * Read loaded_vmcs->cpu should be before fetching
+-               * loaded_vmcs->loaded_vmcss_on_cpu_link.
+-               * See the comments in __loaded_vmcs_clear().
++               * Ensure loaded_vmcs->cpu is read before adding loaded_vmcs to
++               * this cpu's percpu list, otherwise it may not yet be deleted
++               * from its previous cpu's percpu list.  Pairs with the
++               * smb_wmb() in __loaded_vmcs_clear().
+                */
+               smp_rmb();
+               list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
+                        &per_cpu(loaded_vmcss_on_cpu, cpu));
+-              crash_enable_local_vmclear(cpu);
+               local_irq_enable();
+       }
+@@ -4426,17 +4402,6 @@ static int hardware_enable(void)
+       INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+       spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+-      /*
+-       * Now we can enable the vmclear operation in kdump
+-       * since the loaded_vmcss_on_cpu list on this cpu
+-       * has been initialized.
+-       *
+-       * Though the cpu is not in VMX operation now, there
+-       * is no problem to enable the vmclear operation
+-       * for the loaded_vmcss_on_cpu list is empty!
+-       */
+-      crash_enable_local_vmclear(cpu);
+-
+       rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
+       test_bits = FEATURE_CONTROL_LOCKED;
diff --git a/queue-4.19/kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch b/queue-4.19/kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch
new file mode 100644 (file)
index 0000000..c2d7a3f
--- /dev/null
@@ -0,0 +1,75 @@
+From dbef2808af6c594922fe32833b30f55f35e9da6d Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Wed, 1 Apr 2020 10:13:48 +0200
+Subject: KVM: VMX: fix crash cleanup when KVM wasn't used
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit dbef2808af6c594922fe32833b30f55f35e9da6d upstream.
+
+If KVM wasn't used at all before we crash the cleanup procedure fails with
+ BUG: unable to handle page fault for address: ffffffffffffffc8
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 23215067 P4D 23215067 PUD 23217067 PMD 0
+ Oops: 0000 [#8] SMP PTI
+ CPU: 0 PID: 3542 Comm: bash Kdump: loaded Tainted: G      D           5.6.0-rc2+ #823
+ RIP: 0010:crash_vmclear_local_loaded_vmcss.cold+0x19/0x51 [kvm_intel]
+
+The root cause is that loaded_vmcss_on_cpu list is not yet initialized,
+we initialize it in hardware_enable() but this only happens when we start
+a VM.
+
+Previously, we used to have a bitmap with enabled CPUs and that was
+preventing [masking] the issue.
+
+Initialized loaded_vmcss_on_cpu list earlier, right before we assign
+crash_vmclear_loaded_vmcss pointer. blocked_vcpu_on_cpu list and
+blocked_vcpu_on_cpu_lock are moved altogether for consistency.
+
+Fixes: 31603d4fc2bb ("KVM: VMX: Always VMCLEAR in-use VMCSes during crash with kexec support")
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20200401081348.1345307-1-vkuznets@redhat.com>
+Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -4398,10 +4398,6 @@ static int hardware_enable(void)
+           !hv_get_vp_assist_page(cpu))
+               return -EFAULT;
+-      INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+-      INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+-      spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+-
+       rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
+       test_bits = FEATURE_CONTROL_LOCKED;
+@@ -14554,7 +14550,7 @@ module_exit(vmx_exit);
+ static int __init vmx_init(void)
+ {
+-      int r;
++      int r, cpu;
+ #if IS_ENABLED(CONFIG_HYPERV)
+       /*
+@@ -14605,6 +14601,12 @@ static int __init vmx_init(void)
+               }
+       }
++      for_each_possible_cpu(cpu) {
++              INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
++              INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
++              spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
++      }
++
+ #ifdef CONFIG_KEXEC_CORE
+       rcu_assign_pointer(crash_vmclear_loaded_vmcss,
+                          crash_vmclear_local_loaded_vmcss);
diff --git a/queue-4.19/kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch b/queue-4.19/kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch
new file mode 100644 (file)
index 0000000..a950d84
--- /dev/null
@@ -0,0 +1,102 @@
+From edd4fa37baa6ee8e44dc65523b27bd6fe44c94de Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+Date: Tue, 18 Feb 2020 13:07:15 -0800
+Subject: KVM: x86: Allocate new rmap and large page tracking when moving memslot
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+commit edd4fa37baa6ee8e44dc65523b27bd6fe44c94de upstream.
+
+Reallocate a rmap array and recalcuate large page compatibility when
+moving an existing memslot to correctly handle the alignment properties
+of the new memslot.  The number of rmap entries required at each level
+is dependent on the alignment of the memslot's base gfn with respect to
+that level, e.g. moving a large-page aligned memslot so that it becomes
+unaligned will increase the number of rmap entries needed at the now
+unaligned level.
+
+Not updating the rmap array is the most obvious bug, as KVM accesses
+garbage data beyond the end of the rmap.  KVM interprets the bad data as
+pointers, leading to non-canonical #GPs, unexpected #PFs, etc...
+
+  general protection fault: 0000 [#1] SMP
+  CPU: 0 PID: 1909 Comm: move_memory_reg Not tainted 5.4.0-rc7+ #139
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+  RIP: 0010:rmap_get_first+0x37/0x50 [kvm]
+  Code: <48> 8b 3b 48 85 ff 74 ec e8 6c f4 ff ff 85 c0 74 e3 48 89 d8 5b c3
+  RSP: 0018:ffffc9000021bbc8 EFLAGS: 00010246
+  RAX: ffff00617461642e RBX: ffff00617461642e RCX: 0000000000000012
+  RDX: ffff88827400f568 RSI: ffffc9000021bbe0 RDI: ffff88827400f570
+  RBP: 0010000000000000 R08: ffffc9000021bd00 R09: ffffc9000021bda8
+  R10: ffffc9000021bc48 R11: 0000000000000000 R12: 0030000000000000
+  R13: 0000000000000000 R14: ffff88827427d700 R15: ffffc9000021bce8
+  FS:  00007f7eda014700(0000) GS:ffff888277a00000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 00007f7ed9216ff8 CR3: 0000000274391003 CR4: 0000000000162eb0
+  Call Trace:
+   kvm_mmu_slot_set_dirty+0xa1/0x150 [kvm]
+   __kvm_set_memory_region.part.64+0x559/0x960 [kvm]
+   kvm_set_memory_region+0x45/0x60 [kvm]
+   kvm_vm_ioctl+0x30f/0x920 [kvm]
+   do_vfs_ioctl+0xa1/0x620
+   ksys_ioctl+0x66/0x70
+   __x64_sys_ioctl+0x16/0x20
+   do_syscall_64+0x4c/0x170
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  RIP: 0033:0x7f7ed9911f47
+  Code: <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 21 6f 2c 00 f7 d8 64 89 01 48
+  RSP: 002b:00007ffc00937498 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+  RAX: ffffffffffffffda RBX: 0000000001ab0010 RCX: 00007f7ed9911f47
+  RDX: 0000000001ab1350 RSI: 000000004020ae46 RDI: 0000000000000004
+  RBP: 000000000000000a R08: 0000000000000000 R09: 00007f7ed9214700
+  R10: 00007f7ed92149d0 R11: 0000000000000246 R12: 00000000bffff000
+  R13: 0000000000000003 R14: 00007f7ed9215000 R15: 0000000000000000
+  Modules linked in: kvm_intel kvm irqbypass
+  ---[ end trace 0c5f570b3358ca89 ]---
+
+The disallow_lpage tracking is more subtle.  Failure to update results
+in KVM creating large pages when it shouldn't, either due to stale data
+or again due to indexing beyond the end of the metadata arrays, which
+can lead to memory corruption and/or leaking data to guest/userspace.
+
+Note, the arrays for the old memslot are freed by the unconditional call
+to kvm_free_memslot() in __kvm_set_memory_region().
+
+Fixes: 05da45583de9b ("KVM: MMU: large page support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/x86.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9229,6 +9229,13 @@ int kvm_arch_create_memslot(struct kvm *
+ {
+       int i;
++      /*
++       * Clear out the previous array pointers for the KVM_MR_MOVE case.  The
++       * old arrays will be freed by __kvm_set_memory_region() if installing
++       * the new memslot is successful.
++       */
++      memset(&slot->arch, 0, sizeof(slot->arch));
++
+       for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+               struct kvm_lpage_info *linfo;
+               unsigned long ugfn;
+@@ -9303,6 +9310,10 @@ int kvm_arch_prepare_memory_region(struc
+                               const struct kvm_userspace_memory_region *mem,
+                               enum kvm_mr_change change)
+ {
++      if (change == KVM_MR_MOVE)
++              return kvm_arch_create_memslot(kvm, memslot,
++                                             mem->memory_size >> PAGE_SHIFT);
++
+       return 0;
+ }
diff --git a/queue-4.19/kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch b/queue-4.19/kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch
new file mode 100644 (file)
index 0000000..52f0ef8
--- /dev/null
@@ -0,0 +1,50 @@
+From d18b2f43b9147c8005ae0844fb445d8cc6a87e31 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+Date: Sun, 26 Jan 2020 16:41:11 -0800
+Subject: KVM: x86: Gracefully handle __vmalloc() failure during VM allocation
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+commit d18b2f43b9147c8005ae0844fb445d8cc6a87e31 upstream.
+
+Check the result of __vmalloc() to avoid dereferencing a NULL pointer in
+the event that allocation failres.
+
+Fixes: d1e5b0e98ea27 ("kvm: Make VM ioctl do valloc for some archs")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/svm.c |    4 ++++
+ arch/x86/kvm/vmx.c |    4 ++++
+ 2 files changed, 8 insertions(+)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1917,6 +1917,10 @@ static void __unregister_enc_region_lock
+ static struct kvm *svm_vm_alloc(void)
+ {
+       struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm));
++
++      if (!kvm_svm)
++              return NULL;
++
+       return &kvm_svm->kvm;
+ }
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -10986,6 +10986,10 @@ STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
+ static struct kvm *vmx_vm_alloc(void)
+ {
+       struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
++
++      if (!kvm_vmx)
++              return NULL;
++
+       return &kvm_vmx->kvm;
+ }
diff --git a/queue-4.19/mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch b/queue-4.19/mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch
new file mode 100644 (file)
index 0000000..b397238
--- /dev/null
@@ -0,0 +1,50 @@
+From b645ad39d56846618704e463b24bb994c9585c7f Mon Sep 17 00:00:00 2001
+From: Frieder Schrempf <frieder.schrempf@kontron.de>
+Date: Tue, 18 Feb 2020 10:05:35 +0000
+Subject: mtd: spinand: Do not erase the block before writing a bad block marker
+
+From: Frieder Schrempf <frieder.schrempf@kontron.de>
+
+commit b645ad39d56846618704e463b24bb994c9585c7f upstream.
+
+Currently when marking a block, we use spinand_erase_op() to erase
+the block before writing the marker to the OOB area. Doing so without
+waiting for the operation to finish can lead to the marking failing
+silently and no bad block marker being written to the flash.
+
+In fact we don't need to do an erase at all before writing the BBM.
+The ECC is disabled for raw accesses to the OOB data and we don't
+need to work around any issues with chips reporting ECC errors as it
+is known to be the case for raw NAND.
+
+Fixes: 7529df465248 ("mtd: nand: Add core infrastructure to support SPI NANDs")
+Cc: stable@vger.kernel.org
+Signed-off-by: Frieder Schrempf <frieder.schrempf@kontron.de>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20200218100432.32433-4-frieder.schrempf@kontron.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/nand/spi/core.c |    3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/drivers/mtd/nand/spi/core.c
++++ b/drivers/mtd/nand/spi/core.c
+@@ -673,7 +673,6 @@ static int spinand_markbad(struct nand_d
+       };
+       int ret;
+-      /* Erase block before marking it bad. */
+       ret = spinand_select_target(spinand, pos->target);
+       if (ret)
+               return ret;
+@@ -682,8 +681,6 @@ static int spinand_markbad(struct nand_d
+       if (ret)
+               return ret;
+-      spinand_erase_op(spinand, pos);
+-
+       return spinand_write_page(spinand, &req);
+ }
diff --git a/queue-4.19/mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch b/queue-4.19/mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch
new file mode 100644 (file)
index 0000000..51b5679
--- /dev/null
@@ -0,0 +1,85 @@
+From 2148937501ee3d663e0010e519a553fea67ad103 Mon Sep 17 00:00:00 2001
+From: Frieder Schrempf <frieder.schrempf@kontron.de>
+Date: Tue, 18 Feb 2020 10:05:14 +0000
+Subject: mtd: spinand: Stop using spinand->oobbuf for buffering bad block markers
+
+From: Frieder Schrempf <frieder.schrempf@kontron.de>
+
+commit 2148937501ee3d663e0010e519a553fea67ad103 upstream.
+
+For reading and writing the bad block markers, spinand->oobbuf is
+currently used as a buffer for the marker bytes. During the
+underlying read and write operations to actually get/set the content
+of the OOB area, the content of spinand->oobbuf is reused and changed
+by accessing it through spinand->oobbuf and/or spinand->databuf.
+
+This is a flaw in the original design of the SPI NAND core and at the
+latest from 13c15e07eedf ("mtd: spinand: Handle the case where
+PROGRAM LOAD does not reset the cache") on, it results in not having
+the bad block marker written at all, as the spinand->oobbuf is
+cleared to 0xff after setting the marker bytes to zero.
+
+To fix it, we now just store the two bytes for the marker on the
+stack and let the read/write operations copy it from/to the page
+buffer later.
+
+Fixes: 7529df465248 ("mtd: nand: Add core infrastructure to support SPI NANDs")
+Cc: stable@vger.kernel.org
+Signed-off-by: Frieder Schrempf <frieder.schrempf@kontron.de>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20200218100432.32433-2-frieder.schrempf@kontron.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mtd/nand/spi/core.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/mtd/nand/spi/core.c
++++ b/drivers/mtd/nand/spi/core.c
+@@ -629,18 +629,18 @@ static int spinand_mtd_write(struct mtd_
+ static bool spinand_isbad(struct nand_device *nand, const struct nand_pos *pos)
+ {
+       struct spinand_device *spinand = nand_to_spinand(nand);
++      u8 marker[2] = { };
+       struct nand_page_io_req req = {
+               .pos = *pos,
+-              .ooblen = 2,
++              .ooblen = sizeof(marker),
+               .ooboffs = 0,
+-              .oobbuf.in = spinand->oobbuf,
++              .oobbuf.in = marker,
+               .mode = MTD_OPS_RAW,
+       };
+-      memset(spinand->oobbuf, 0, 2);
+       spinand_select_target(spinand, pos->target);
+       spinand_read_page(spinand, &req, false);
+-      if (spinand->oobbuf[0] != 0xff || spinand->oobbuf[1] != 0xff)
++      if (marker[0] != 0xff || marker[1] != 0xff)
+               return true;
+       return false;
+@@ -664,11 +664,12 @@ static int spinand_mtd_block_isbad(struc
+ static int spinand_markbad(struct nand_device *nand, const struct nand_pos *pos)
+ {
+       struct spinand_device *spinand = nand_to_spinand(nand);
++      u8 marker[2] = { };
+       struct nand_page_io_req req = {
+               .pos = *pos,
+               .ooboffs = 0,
+-              .ooblen = 2,
+-              .oobbuf.out = spinand->oobbuf,
++              .ooblen = sizeof(marker),
++              .oobbuf.out = marker,
+       };
+       int ret;
+@@ -683,7 +684,6 @@ static int spinand_markbad(struct nand_d
+       spinand_erase_op(spinand, pos);
+-      memset(spinand->oobbuf, 0, 2);
+       return spinand_write_page(spinand, &req);
+ }
index cb6e20945a5bcc4931849e0e7ac90a9e10ab64a0..6b75aa546a8c20885809ebd312e558258fec2a59 100644 (file)
@@ -76,3 +76,19 @@ mips-octeon-irq-fix-potential-null-pointer-dereference.patch
 ath9k-handle-txpower-changes-even-when-tpc-is-disabled.patch
 signal-extend-exec_id-to-64bits.patch
 x86-entry-32-add-missing-asm_clac-to-general_protection-entry.patch
+kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch
+kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch
+kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch
+kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch
+kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch
+kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch
+kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch
+cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch
+mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch
+mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch
+btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch
+btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch
+btrfs-drop-block-from-cache-on-error-in-relocation.patch
+btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch
+btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch
+crypto-mxs-dcp-fix-scatterlist-linearization-for-hash.patch