]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.16-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 29 Jan 2022 12:16:53 +0000 (13:16 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 29 Jan 2022 12:16:53 +0000 (13:16 +0100)
added patches:

28 files changed:
queue-5.16/arm-9179-1-uaccess-avoid-alignment-faults-in-copy__kernel_nofault.patch [new file with mode: 0644]
queue-5.16/arm-9180-1-thumb2-align-alt_up-sections-in-modules-sufficiently.patch [new file with mode: 0644]
queue-5.16/bluetooth-refactor-malicious-adv-data-check.patch [new file with mode: 0644]
queue-5.16/bpf-guard-against-accessing-null-pt_regs-in-bpf_get_task_stack.patch [new file with mode: 0644]
queue-5.16/btrfs-add-back-missing-dirty-page-rate-limiting-to-defrag.patch [new file with mode: 0644]
queue-5.16/btrfs-allow-defrag-to-be-interruptible.patch [new file with mode: 0644]
queue-5.16/btrfs-defrag-fix-wrong-number-of-defragged-sectors.patch [new file with mode: 0644]
queue-5.16/btrfs-defrag-properly-update-range-start-for-autodefrag.patch [new file with mode: 0644]
queue-5.16/btrfs-fix-deadlock-when-reserving-space-during-defrag.patch [new file with mode: 0644]
queue-5.16/btrfs-fix-too-long-loop-when-defragging-a-1-byte-file.patch [new file with mode: 0644]
queue-5.16/btrfs-update-writeback-index-when-starting-defrag.patch [new file with mode: 0644]
queue-5.16/can-m_can-m_can_fifo_-read-write-don-t-read-or-write-from-to-fifo-if-length-is-0.patch [new file with mode: 0644]
queue-5.16/kvm-arm64-use-shadow-spsr_el1-when-injecting-exceptions-on-vhe.patch [new file with mode: 0644]
queue-5.16/kvm-arm64-vgic-v3-restrict-seis-workaround-to-known-broken-systems.patch [new file with mode: 0644]
queue-5.16/net-sfp-ignore-disabled-sfp-node.patch [new file with mode: 0644]
queue-5.16/net-stmmac-configure-ptp-clock-source-prior-to-ptp-initialization.patch [new file with mode: 0644]
queue-5.16/net-stmmac-skip-only-stmmac_ptp_register-when-resume-from-suspend.patch [new file with mode: 0644]
queue-5.16/powerpc-bpf-update-ldimm64-instructions-during-extra-pass.patch [new file with mode: 0644]
queue-5.16/powerpc32-bpf-fix-codegen-for-bpf-to-bpf-calls.patch [new file with mode: 0644]
queue-5.16/s390-hypfs-include-z-vm-guests-with-access-control-group-set.patch [new file with mode: 0644]
queue-5.16/s390-module-fix-loading-modules-with-a-lot-of-relocations.patch [new file with mode: 0644]
queue-5.16/s390-nmi-handle-guarded-storage-validity-failures-for-kvm-guests.patch [new file with mode: 0644]
queue-5.16/s390-nmi-handle-vector-validity-failures-for-kvm-guests.patch [new file with mode: 0644]
queue-5.16/scsi-zfcp-fix-failed-recovery-on-gone-remote-port-with-non-npiv-fcp-devices.patch [new file with mode: 0644]
queue-5.16/series [new file with mode: 0644]
queue-5.16/ucount-make-get_ucount-a-safe-get_user-replacement.patch [new file with mode: 0644]
queue-5.16/udf-fix-null-ptr-deref-when-converting-from-inline-format.patch [new file with mode: 0644]
queue-5.16/udf-restore-i_lenalloc-when-inode-expansion-fails.patch [new file with mode: 0644]

diff --git a/queue-5.16/arm-9179-1-uaccess-avoid-alignment-faults-in-copy__kernel_nofault.patch b/queue-5.16/arm-9179-1-uaccess-avoid-alignment-faults-in-copy__kernel_nofault.patch
new file mode 100644 (file)
index 0000000..65a1344
--- /dev/null
@@ -0,0 +1,65 @@
+From 15420269b02a63ed8c1841905d8b8b2403246004 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Tue, 18 Jan 2022 13:45:09 +0100
+Subject: ARM: 9179/1: uaccess: avoid alignment faults in copy_[from|to]_kernel_nofault
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 15420269b02a63ed8c1841905d8b8b2403246004 upstream.
+
+The helpers that are used to implement copy_from_kernel_nofault() and
+copy_to_kernel_nofault() cast a void* to a pointer to a wider type,
+which may result in alignment faults on ARM if the compiler decides to
+use double-word or multiple-word load/store instructions.
+
+Only configurations that define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
+are affected, given that commit 2423de2e6f4d ("ARM: 9115/1: mm/maccess:
+fix unaligned copy_{from,to}_kernel_nofault") ensures that dst and src
+are sufficiently aligned otherwise.
+
+So use the unaligned accessors for accessing dst and src in cases where
+they may be misaligned.
+
+Cc: <stable@vger.kernel.org> # depends on 2423de2e6f4d
+Fixes: 2df4c9a741a0 ("ARM: 9112/1: uaccess: add __{get,put}_kernel_nofault")
+Reviewed-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/include/asm/uaccess.h |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/arch/arm/include/asm/uaccess.h
++++ b/arch/arm/include/asm/uaccess.h
+@@ -11,6 +11,7 @@
+ #include <linux/string.h>
+ #include <asm/memory.h>
+ #include <asm/domain.h>
++#include <asm/unaligned.h>
+ #include <asm/unified.h>
+ #include <asm/compiler.h>
+@@ -497,7 +498,10 @@ do {                                                                      \
+       }                                                               \
+       default: __err = __get_user_bad(); break;                       \
+       }                                                               \
+-      *(type *)(dst) = __val;                                         \
++      if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))         \
++              put_unaligned(__val, (type *)(dst));                    \
++      else                                                            \
++              *(type *)(dst) = __val; /* aligned by caller */         \
+       if (__err)                                                      \
+               goto err_label;                                         \
+ } while (0)
+@@ -507,7 +511,9 @@ do {                                                                       \
+       const type *__pk_ptr = (dst);                                   \
+       unsigned long __dst = (unsigned long)__pk_ptr;                  \
+       int __err = 0;                                                  \
+-      type __val = *(type *)src;                                      \
++      type __val = IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \
++                   ? get_unaligned((type *)(src))                     \
++                   : *(type *)(src);  /* aligned by caller */         \
+       switch (sizeof(type)) {                                         \
+       case 1: __put_user_asm_byte(__val, __dst, __err, ""); break;    \
+       case 2: __put_user_asm_half(__val, __dst, __err, ""); break;    \
diff --git a/queue-5.16/arm-9180-1-thumb2-align-alt_up-sections-in-modules-sufficiently.patch b/queue-5.16/arm-9180-1-thumb2-align-alt_up-sections-in-modules-sufficiently.patch
new file mode 100644 (file)
index 0000000..8aa2957
--- /dev/null
@@ -0,0 +1,53 @@
+From 9f80ccda53b9417236945bc7ece4b519037df74d Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Tue, 18 Jan 2022 19:32:17 +0100
+Subject: ARM: 9180/1: Thumb2: align ALT_UP() sections in modules sufficiently
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 9f80ccda53b9417236945bc7ece4b519037df74d upstream.
+
+When building for Thumb2, the .alt.smp.init sections that are emitted by
+the ALT_UP() patching code may not be 32-bit aligned, even though the
+fixup_smp_on_up() routine expects that. This results in alignment faults
+at module load time, which need to be fixed up by the fault handler.
+
+So let's align those sections explicitly, and prevent this from occurring.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/include/asm/assembler.h |    2 ++
+ arch/arm/include/asm/processor.h |    1 +
+ 2 files changed, 3 insertions(+)
+
+--- a/arch/arm/include/asm/assembler.h
++++ b/arch/arm/include/asm/assembler.h
+@@ -288,6 +288,7 @@
+  */
+ #define ALT_UP(instr...)                                      \
+       .pushsection ".alt.smp.init", "a"                       ;\
++      .align  2                                               ;\
+       .long   9998b - .                                       ;\
+ 9997: instr                                                   ;\
+       .if . - 9997b == 2                                      ;\
+@@ -299,6 +300,7 @@
+       .popsection
+ #define ALT_UP_B(label)                                       \
+       .pushsection ".alt.smp.init", "a"                       ;\
++      .align  2                                               ;\
+       .long   9998b - .                                       ;\
+       W(b)    . + (label - 9998b)                                     ;\
+       .popsection
+--- a/arch/arm/include/asm/processor.h
++++ b/arch/arm/include/asm/processor.h
+@@ -96,6 +96,7 @@ unsigned long __get_wchan(struct task_st
+ #define __ALT_SMP_ASM(smp, up)                                                \
+       "9998:  " smp "\n"                                              \
+       "       .pushsection \".alt.smp.init\", \"a\"\n"                \
++      "       .align  2\n"                                            \
+       "       .long   9998b - .\n"                                    \
+       "       " up "\n"                                               \
+       "       .popsection\n"
diff --git a/queue-5.16/bluetooth-refactor-malicious-adv-data-check.patch b/queue-5.16/bluetooth-refactor-malicious-adv-data-check.patch
new file mode 100644 (file)
index 0000000..5143239
--- /dev/null
@@ -0,0 +1,48 @@
+From 899663be5e75dc0174dc8bda0b5e6826edf0b29a Mon Sep 17 00:00:00 2001
+From: Brian Gix <brian.gix@intel.com>
+Date: Wed, 24 Nov 2021 12:16:28 -0800
+Subject: Bluetooth: refactor malicious adv data check
+
+From: Brian Gix <brian.gix@intel.com>
+
+commit 899663be5e75dc0174dc8bda0b5e6826edf0b29a upstream.
+
+Check for out-of-bound read was being performed at the end of while
+num_reports loop, and would fill journal with false positives. Added
+check to beginning of loop processing so that it doesn't get checked
+after ptr has been advanced.
+
+Signed-off-by: Brian Gix <brian.gix@intel.com>
+Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
+Cc: syphyr <syphyr@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bluetooth/hci_event.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -5822,6 +5822,11 @@ static void hci_le_adv_report_evt(struct
+               struct hci_ev_le_advertising_info *ev = ptr;
+               s8 rssi;
++              if (ptr > (void *)skb_tail_pointer(skb) - sizeof(*ev)) {
++                      bt_dev_err(hdev, "Malicious advertising data.");
++                      break;
++              }
++
+               if (ev->length <= HCI_MAX_AD_LENGTH &&
+                   ev->data + ev->length <= skb_tail_pointer(skb)) {
+                       rssi = ev->data[ev->length];
+@@ -5833,11 +5838,6 @@ static void hci_le_adv_report_evt(struct
+               }
+               ptr += sizeof(*ev) + ev->length + 1;
+-
+-              if (ptr > (void *) skb_tail_pointer(skb) - sizeof(*ev)) {
+-                      bt_dev_err(hdev, "Malicious advertising data. Stopping processing");
+-                      break;
+-              }
+       }
+       hci_dev_unlock(hdev);
diff --git a/queue-5.16/bpf-guard-against-accessing-null-pt_regs-in-bpf_get_task_stack.patch b/queue-5.16/bpf-guard-against-accessing-null-pt_regs-in-bpf_get_task_stack.patch
new file mode 100644 (file)
index 0000000..eb06835
--- /dev/null
@@ -0,0 +1,44 @@
+From b992f01e66150fc5e90be4a96f5eb8e634c8249e Mon Sep 17 00:00:00 2001
+From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
+Date: Thu, 6 Jan 2022 17:15:05 +0530
+Subject: bpf: Guard against accessing NULL pt_regs in bpf_get_task_stack()
+
+From: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+
+commit b992f01e66150fc5e90be4a96f5eb8e634c8249e upstream.
+
+task_pt_regs() can return NULL on powerpc for kernel threads. This is
+then used in __bpf_get_stack() to check for user mode, resulting in a
+kernel oops. Guard against this by checking return value of
+task_pt_regs() before trying to obtain the call chain.
+
+Fixes: fa28dcb82a38f8 ("bpf: Introduce helper bpf_get_task_stack()")
+Cc: stable@vger.kernel.org # v5.9+
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/d5ef83c361cc255494afd15ff1b4fb02a36e1dcf.1641468127.git.naveen.n.rao@linux.vnet.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/stackmap.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/stackmap.c
++++ b/kernel/bpf/stackmap.c
+@@ -525,13 +525,14 @@ BPF_CALL_4(bpf_get_task_stack, struct ta
+          u32, size, u64, flags)
+ {
+       struct pt_regs *regs;
+-      long res;
++      long res = -EINVAL;
+       if (!try_get_task_stack(task))
+               return -EFAULT;
+       regs = task_pt_regs(task);
+-      res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
++      if (regs)
++              res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
+       put_task_stack(task);
+       return res;
diff --git a/queue-5.16/btrfs-add-back-missing-dirty-page-rate-limiting-to-defrag.patch b/queue-5.16/btrfs-add-back-missing-dirty-page-rate-limiting-to-defrag.patch
new file mode 100644 (file)
index 0000000..49b1806
--- /dev/null
@@ -0,0 +1,48 @@
+From 3c9d31c715948aaff0ee6d322a91a2dec07770bf Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 20 Jan 2022 17:11:52 +0000
+Subject: btrfs: add back missing dirty page rate limiting to defrag
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 3c9d31c715948aaff0ee6d322a91a2dec07770bf upstream.
+
+A defrag operation can dirty a lot of pages, specially if operating on
+the entire file or a large file range. Any task dirtying pages should
+periodically call balance_dirty_pages_ratelimited(), as stated in that
+function's comments, otherwise they can leave too many dirty pages in
+the system. This is what we did before the refactoring in 5.16, and
+it should have remained, just like in the buffered write path and
+relocation. So restore that behaviour.
+
+Fixes: 7b508037d4cac3 ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()")
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1553,6 +1553,7 @@ int btrfs_defrag_file(struct inode *inod
+       }
+       while (cur < last_byte) {
++              const unsigned long prev_sectors_defragged = sectors_defragged;
+               u64 cluster_end;
+               /* The cluster size 256K should always be page aligned */
+@@ -1584,6 +1585,10 @@ int btrfs_defrag_file(struct inode *inod
+                               cluster_end + 1 - cur, extent_thresh,
+                               newer_than, do_compress,
+                               &sectors_defragged, max_to_defrag);
++
++              if (sectors_defragged > prev_sectors_defragged)
++                      balance_dirty_pages_ratelimited(inode->i_mapping);
++
+               btrfs_inode_unlock(inode, 0);
+               if (ret < 0)
+                       break;
diff --git a/queue-5.16/btrfs-allow-defrag-to-be-interruptible.patch b/queue-5.16/btrfs-allow-defrag-to-be-interruptible.patch
new file mode 100644 (file)
index 0000000..39844cb
--- /dev/null
@@ -0,0 +1,52 @@
+From b767c2fc787e992daeadfff40d61c05f66c82da0 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 18 Jan 2022 13:43:31 +0000
+Subject: btrfs: allow defrag to be interruptible
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit b767c2fc787e992daeadfff40d61c05f66c82da0 upstream.
+
+During defrag, at btrfs_defrag_file(), we have this loop that iterates
+over a file range in steps no larger than 256K subranges. If the range
+is too long, there's no way to interrupt it. So make the loop check in
+each iteration if there's signal pending, and if there is, break and
+return -AGAIN to userspace.
+
+Before kernel 5.16, we used to allow defrag to be cancelled through a
+signal, but that was lost with commit 7b508037d4cac3 ("btrfs: defrag:
+use defrag_one_cluster() to implement btrfs_defrag_file()").
+
+This change adds back the possibility to cancel a defrag with a signal
+and keeps the same semantics, returning -EAGAIN to user space (and not
+the usually more expected -EINTR).
+
+This is also motivated by a recent bug on 5.16 where defragging a 1 byte
+file resulted in iterating from file range 0 to (u64)-1, as hitting the
+bug triggered a too long loop, basically requiring one to reboot the
+machine, as it was not possible to cancel defrag.
+
+Fixes: 7b508037d4cac3 ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()")
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1520,6 +1520,11 @@ int btrfs_defrag_file(struct inode *inod
+               /* The cluster size 256K should always be page aligned */
+               BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
++              if (btrfs_defrag_cancelled(fs_info)) {
++                      ret = -EAGAIN;
++                      break;
++              }
++
+               /* We want the cluster end at page boundary when possible */
+               cluster_end = (((cur >> PAGE_SHIFT) +
+                              (SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1;
diff --git a/queue-5.16/btrfs-defrag-fix-wrong-number-of-defragged-sectors.patch b/queue-5.16/btrfs-defrag-fix-wrong-number-of-defragged-sectors.patch
new file mode 100644 (file)
index 0000000..808eaf5
--- /dev/null
@@ -0,0 +1,99 @@
+From 484167da77739a8d0e225008c48e697fd3f781ae Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 18 Jan 2022 15:19:04 +0800
+Subject: btrfs: defrag: fix wrong number of defragged sectors
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 484167da77739a8d0e225008c48e697fd3f781ae upstream.
+
+[BUG]
+There are users using autodefrag mount option reporting obvious increase
+in IO:
+
+> If I compare the write average (in total, I don't have it per process)
+> when taking idle periods on the same machine:
+>     Linux 5.16:
+>         without autodefrag: ~ 10KiB/s
+>         with autodefrag: between 1 and 2MiB/s.
+>
+>     Linux 5.15:
+>         with autodefrag:~ 10KiB/s (around the same as without
+> autodefrag on 5.16)
+
+[CAUSE]
+When autodefrag mount option is enabled, btrfs_defrag_file() will be
+called with @max_sectors = BTRFS_DEFRAG_BATCH (1024) to limit how many
+sectors we can defrag in one try.
+
+And then use the number of sectors defragged to determine if we need to
+re-defrag.
+
+But commit b18c3ab2343d ("btrfs: defrag: introduce helper to defrag one
+cluster") uses wrong unit to increase @sectors_defragged, which should
+be in unit of sector, not byte.
+
+This means, if we have defragged any sector, then @sectors_defragged
+will be >= sectorsize (normally 4096), which is larger than
+BTRFS_DEFRAG_BATCH.
+
+This makes the @max_sectors check in defrag_one_cluster() to underflow,
+rendering the whole @max_sectors check useless.
+
+Thus causing way more IO for autodefrag mount options, as now there is
+no limit on how many sectors can really be defragged.
+
+[FIX]
+Fix the problems by:
+
+- Use sector as unit when increasing @sectors_defragged
+
+- Include @sectors_defragged > @max_sectors case to break the loop
+
+- Add extra comment on the return value of btrfs_defrag_file()
+
+Reported-by: Anthony Ruhier <aruhier@mailbox.org>
+Fixes: b18c3ab2343d ("btrfs: defrag: introduce helper to defrag one cluster")
+Link: https://lore.kernel.org/linux-btrfs/0a269612-e43f-da22-c5bc-b34b1b56ebe8@mailbox.org/
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1416,8 +1416,8 @@ static int defrag_one_cluster(struct btr
+       list_for_each_entry(entry, &target_list, list) {
+               u32 range_len = entry->len;
+-              /* Reached the limit */
+-              if (max_sectors && max_sectors == *sectors_defragged)
++              /* Reached or beyond the limit */
++              if (max_sectors && *sectors_defragged >= max_sectors)
+                       break;
+               if (max_sectors)
+@@ -1439,7 +1439,8 @@ static int defrag_one_cluster(struct btr
+                                      extent_thresh, newer_than, do_compress);
+               if (ret < 0)
+                       break;
+-              *sectors_defragged += range_len;
++              *sectors_defragged += range_len >>
++                                    inode->root->fs_info->sectorsize_bits;
+       }
+ out:
+       list_for_each_entry_safe(entry, tmp, &target_list, list) {
+@@ -1458,6 +1459,9 @@ out:
+  * @newer_than:          minimum transid to defrag
+  * @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode
+  *               will be defragged.
++ *
++ * Return <0 for error.
++ * Return >=0 for the number of sectors defragged.
+  */
+ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
+                     struct btrfs_ioctl_defrag_range_args *range,
diff --git a/queue-5.16/btrfs-defrag-properly-update-range-start-for-autodefrag.patch b/queue-5.16/btrfs-defrag-properly-update-range-start-for-autodefrag.patch
new file mode 100644 (file)
index 0000000..1846d32
--- /dev/null
@@ -0,0 +1,119 @@
+From c080b4144b9dd3b7af838a194ffad3204ca15166 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 18 Jan 2022 19:53:52 +0800
+Subject: btrfs: defrag: properly update range->start for autodefrag
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit c080b4144b9dd3b7af838a194ffad3204ca15166 upstream.
+
+[BUG]
+After commit 7b508037d4ca ("btrfs: defrag: use defrag_one_cluster() to
+implement btrfs_defrag_file()") autodefrag no longer properly re-defrag
+the file from previously finished location.
+
+[CAUSE]
+The recent refactoring of defrag only focuses on defrag ioctl subpage
+support, doesn't take autodefrag into consideration.
+
+There are two problems involved which prevents autodefrag to restart its
+scan:
+
+- No range.start update
+  Previously when one defrag target is found, range->start will be
+  updated to indicate where next search should start from.
+
+  But now btrfs_defrag_file() doesn't update it anymore, making all
+  autodefrag to rescan from file offset 0.
+
+  This would also make autodefrag to mark the same range dirty again and
+  again, causing extra IO.
+
+- No proper quick exit for defrag_one_cluster()
+  Currently if we reached or exceed @max_sectors limit, we just exit
+  defrag_one_cluster(), and let next defrag_one_cluster() call to do a
+  quick exit.
+  This makes @cur increase, thus no way to properly know which range is
+  defragged and which range is skipped.
+
+[FIX]
+The fix involves two modifications:
+
+- Update range->start to next cluster start
+  This is a little different from the old behavior.
+  Previously range->start is updated to the next defrag target.
+
+  But in the end, the behavior should still be pretty much the same,
+  as now we skip to next defrag target inside btrfs_defrag_file().
+
+  Thus if auto-defrag determines to re-scan, then we still do the skip,
+  just at a different timing.
+
+- Make defrag_one_cluster() to return >0 to indicate a quick exit
+  So that btrfs_defrag_file() can also do a quick exit, without
+  increasing @cur to the range end, and re-use @cur to update
+  @range->start.
+
+- Add comment for btrfs_defrag_file() to mention the range->start update
+  Currently only autodefrag utilize this behavior, as defrag ioctl won't
+  set @max_to_defrag parameter, thus unless interrupted it will always
+  try to defrag the whole range.
+
+Reported-by: Filipe Manana <fdmanana@suse.com>
+Fixes: 7b508037d4ca ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()")
+Link: https://lore.kernel.org/linux-btrfs/0a269612-e43f-da22-c5bc-b34b1b56ebe8@mailbox.org/
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |   18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1417,8 +1417,10 @@ static int defrag_one_cluster(struct btr
+               u32 range_len = entry->len;
+               /* Reached or beyond the limit */
+-              if (max_sectors && *sectors_defragged >= max_sectors)
++              if (max_sectors && *sectors_defragged >= max_sectors) {
++                      ret = 1;
+                       break;
++              }
+               if (max_sectors)
+                       range_len = min_t(u32, range_len,
+@@ -1461,7 +1463,10 @@ out:
+  *               will be defragged.
+  *
+  * Return <0 for error.
+- * Return >=0 for the number of sectors defragged.
++ * Return >=0 for the number of sectors defragged, and range->start will be updated
++ * to indicate the file offset where next defrag should be started at.
++ * (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without
++ *  defragging all the range).
+  */
+ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
+                     struct btrfs_ioctl_defrag_range_args *range,
+@@ -1554,10 +1559,19 @@ int btrfs_defrag_file(struct inode *inod
+               if (ret < 0)
+                       break;
+               cur = cluster_end + 1;
++              if (ret > 0) {
++                      ret = 0;
++                      break;
++              }
+       }
+       if (ra_allocated)
+               kfree(ra);
++      /*
++       * Update range.start for autodefrag, this will indicate where to start
++       * in next run.
++       */
++      range->start = cur;
+       if (sectors_defragged) {
+               /*
+                * We have defragged some sectors, for compression case they
diff --git a/queue-5.16/btrfs-fix-deadlock-when-reserving-space-during-defrag.patch b/queue-5.16/btrfs-fix-deadlock-when-reserving-space-during-defrag.patch
new file mode 100644 (file)
index 0000000..c408727
--- /dev/null
@@ -0,0 +1,96 @@
+From 0cb5950f3f3b51a4e8657d106f897f2b913e0586 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 20 Jan 2022 14:27:56 +0000
+Subject: btrfs: fix deadlock when reserving space during defrag
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 0cb5950f3f3b51a4e8657d106f897f2b913e0586 upstream.
+
+When defragging we can end up collecting a range for defrag that has
+already pages under delalloc (dirty), as long as the respective extent
+map for their range is not mapped to a hole, a prealloc extent or
+the extent map is from an old generation.
+
+Most of the time that is harmless from a functional perspective at
+least, however it can result in a deadlock:
+
+1) At defrag_collect_targets() we find an extent map that meets all
+   requirements but there's delalloc for the range it covers, and we add
+   its range to list of ranges to defrag;
+
+2) The defrag_collect_targets() function is called at defrag_one_range(),
+   after it locked a range that overlaps the range of the extent map;
+
+3) At defrag_one_range(), while the range is still locked, we call
+   defrag_one_locked_target() for the range associated to the extent
+   map we collected at step 1);
+
+4) Then finally at defrag_one_locked_target() we do a call to
+   btrfs_delalloc_reserve_space(), which will reserve data and metadata
+   space. If the space reservations can not be satisfied right away, the
+   flusher might be kicked in and start flushing delalloc and wait for
+   the respective ordered extents to complete. If this happens we will
+   deadlock, because both flushing delalloc and finishing an ordered
+   extent, requires locking the range in the inode's io tree, which was
+   already locked at defrag_collect_targets().
+
+So fix this by skipping extent maps for which there's already delalloc.
+
+Fixes: eb793cf857828d ("btrfs: defrag: introduce helper to collect target file extents")
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |   31 ++++++++++++++++++++++++++++++-
+ 1 file changed, 30 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1188,6 +1188,35 @@ static int defrag_collect_targets(struct
+                       goto next;
+               /*
++               * Our start offset might be in the middle of an existing extent
++               * map, so take that into account.
++               */
++              range_len = em->len - (cur - em->start);
++              /*
++               * If this range of the extent map is already flagged for delalloc,
++               * skip it, because:
++               *
++               * 1) We could deadlock later, when trying to reserve space for
++               *    delalloc, because in case we can't immediately reserve space
++               *    the flusher can start delalloc and wait for the respective
++               *    ordered extents to complete. The deadlock would happen
++               *    because we do the space reservation while holding the range
++               *    locked, and starting writeback, or finishing an ordered
++               *    extent, requires locking the range;
++               *
++               * 2) If there's delalloc there, it means there's dirty pages for
++               *    which writeback has not started yet (we clean the delalloc
++               *    flag when starting writeback and after creating an ordered
++               *    extent). If we mark pages in an adjacent range for defrag,
++               *    then we will have a larger contiguous range for delalloc,
++               *    very likely resulting in a larger extent after writeback is
++               *    triggered (except in a case of free space fragmentation).
++               */
++              if (test_range_bit(&inode->io_tree, cur, cur + range_len - 1,
++                                 EXTENT_DELALLOC, 0, NULL))
++                      goto next;
++
++              /*
+                * For do_compress case, we want to compress all valid file
+                * extents, thus no @extent_thresh or mergeable check.
+                */
+@@ -1195,7 +1224,7 @@ static int defrag_collect_targets(struct
+                       goto add;
+               /* Skip too large extent */
+-              if (em->len >= extent_thresh)
++              if (range_len >= extent_thresh)
+                       goto next;
+               next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
diff --git a/queue-5.16/btrfs-fix-too-long-loop-when-defragging-a-1-byte-file.patch b/queue-5.16/btrfs-fix-too-long-loop-when-defragging-a-1-byte-file.patch
new file mode 100644 (file)
index 0000000..27ae0ef
--- /dev/null
@@ -0,0 +1,85 @@
+From 6b34cd8e175bfbf4f3f01b6d19eae18245e1a8cc Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 17 Jan 2022 16:28:29 +0000
+Subject: btrfs: fix too long loop when defragging a 1 byte file
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 6b34cd8e175bfbf4f3f01b6d19eae18245e1a8cc upstream.
+
+When attempting to defrag a file with a single byte, we can end up in a
+too long loop, which is nearly infinite because at btrfs_defrag_file()
+we end up with the variable last_byte assigned with a value of
+18446744073709551615 (which is (u64)-1). The problem comes from the fact
+we end up doing:
+
+    last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
+
+So if last_byte was assigned 0, which is i_size - 1, we underflow and
+end up with the value 18446744073709551615.
+
+This is trivial to reproduce and the following script triggers it:
+
+  $ cat test.sh
+  #!/bin/bash
+
+  DEV=/dev/sdj
+  MNT=/mnt/sdj
+
+  mkfs.btrfs -f $DEV
+  mount $DEV $MNT
+
+  echo -n "X" > $MNT/foobar
+
+  btrfs filesystem defragment $MNT/foobar
+
+  umount $MNT
+
+So fix this by not decrementing last_byte by 1 before doing the sector
+size round up. Also, to make it easier to follow, make the round up right
+after computing last_byte.
+
+Reported-by: Anthony Ruhier <aruhier@mailbox.org>
+Fixes: 7b508037d4cac3 ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()")
+Link: https://lore.kernel.org/linux-btrfs/0a269612-e43f-da22-c5bc-b34b1b56ebe8@mailbox.org/
+CC: stable@vger.kernel.org # 5.16
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1492,12 +1492,16 @@ int btrfs_defrag_file(struct inode *inod
+       if (range->start + range->len > range->start) {
+               /* Got a specific range */
+-              last_byte = min(isize, range->start + range->len) - 1;
++              last_byte = min(isize, range->start + range->len);
+       } else {
+               /* Defrag until file end */
+-              last_byte = isize - 1;
++              last_byte = isize;
+       }
++      /* Align the range */
++      cur = round_down(range->start, fs_info->sectorsize);
++      last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
++
+       /*
+        * If we were not given a ra, allocate a readahead context. As
+        * readahead is just an optimization, defrag will work without it so
+@@ -1510,10 +1514,6 @@ int btrfs_defrag_file(struct inode *inod
+                       file_ra_state_init(ra, inode->i_mapping);
+       }
+-      /* Align the range */
+-      cur = round_down(range->start, fs_info->sectorsize);
+-      last_byte = round_up(last_byte, fs_info->sectorsize) - 1;
+-
+       while (cur < last_byte) {
+               u64 cluster_end;
diff --git a/queue-5.16/btrfs-update-writeback-index-when-starting-defrag.patch b/queue-5.16/btrfs-update-writeback-index-when-starting-defrag.patch
new file mode 100644 (file)
index 0000000..b22739d
--- /dev/null
@@ -0,0 +1,61 @@
+From 27cdfde181bcacd226c230b2fd831f6f5b8c215f Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 20 Jan 2022 17:41:17 +0000
+Subject: btrfs: update writeback index when starting defrag
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 27cdfde181bcacd226c230b2fd831f6f5b8c215f upstream.
+
+When starting a defrag, we should update the writeback index of the
+inode's mapping in case it currently has a value beyond the start of the
+range we are defragging. This can help performance and often result in
+getting less extents after writeback - for e.g., if the current value
+of the writeback index sits somewhere in the middle of a range that
+gets dirty by the defrag, then after writeback we can get two smaller
+extents instead of a single, larger extent.
+
+We used to have this before the refactoring in 5.16, but it was removed
+without any reason to do so. Originally it was added in kernel 3.1, by
+commit 2a0f7f5769992b ("Btrfs: fix recursive auto-defrag"), in order to
+fix a loop with autodefrag resulting in dirtying and writing pages over
+and over, but some testing on current code did not show that happening,
+at least with the test described in that commit.
+
+So add back the behaviour, as at the very least it is a nice to have
+optimization.
+
+Fixes: 7b508037d4cac3 ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()")
+CC: stable@vger.kernel.org # 5.16
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1511,6 +1511,7 @@ int btrfs_defrag_file(struct inode *inod
+       int compress_type = BTRFS_COMPRESS_ZLIB;
+       int ret = 0;
+       u32 extent_thresh = range->extent_thresh;
++      pgoff_t start_index;
+       if (isize == 0)
+               return 0;
+@@ -1552,6 +1553,14 @@ int btrfs_defrag_file(struct inode *inod
+                       file_ra_state_init(ra, inode->i_mapping);
+       }
++      /*
++       * Make writeback start from the beginning of the range, so that the
++       * defrag range can be written sequentially.
++       */
++      start_index = cur >> PAGE_SHIFT;
++      if (start_index < inode->i_mapping->writeback_index)
++              inode->i_mapping->writeback_index = start_index;
++
+       while (cur < last_byte) {
+               const unsigned long prev_sectors_defragged = sectors_defragged;
+               u64 cluster_end;
diff --git a/queue-5.16/can-m_can-m_can_fifo_-read-write-don-t-read-or-write-from-to-fifo-if-length-is-0.patch b/queue-5.16/can-m_can-m_can_fifo_-read-write-don-t-read-or-write-from-to-fifo-if-length-is-0.patch
new file mode 100644 (file)
index 0000000..092441c
--- /dev/null
@@ -0,0 +1,60 @@
+From db72589c49fd260bfc99c7160c079675bc7417af Mon Sep 17 00:00:00 2001
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+Date: Fri, 14 Jan 2022 15:35:01 +0100
+Subject: can: m_can: m_can_fifo_{read,write}: don't read or write from/to FIFO if length is 0
+
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+
+commit db72589c49fd260bfc99c7160c079675bc7417af upstream.
+
+In order to optimize FIFO access, especially on m_can cores attached
+to slow busses like SPI, in patch
+
+| e39381770ec9 ("can: m_can: Disable IRQs on FIFO bus errors")
+
+bulk read/write support has been added to the m_can_fifo_{read,write}
+functions.
+
+That change leads to the tcan driver to call
+regmap_bulk_{read,write}() with a length of 0 (for CAN frames with 0
+data length). regmap treats this as an error:
+
+| tcan4x5x spi1.0 tcan4x5x0: FIFO write returned -22
+
+This patch fixes the problem by not calling the
+cdev->ops->{read,write)_fifo() in case of a 0 length read/write.
+
+Fixes: e39381770ec9 ("can: m_can: Disable IRQs on FIFO bus errors")
+Link: https://lore.kernel.org/all/20220114155751.2651888-1-mkl@pengutronix.de
+Cc: stable@vger.kernel.org
+Cc: Matt Kline <matt@bitbashing.io>
+Cc: Chandrasekar Ramakrishnan <rcsekar@samsung.com>
+Reported-by: Michael Anochin <anochin@photo-meter.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/can/m_can/m_can.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/can/m_can/m_can.c
++++ b/drivers/net/can/m_can/m_can.c
+@@ -336,6 +336,9 @@ m_can_fifo_read(struct m_can_classdev *c
+       u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE +
+               offset;
++      if (val_count == 0)
++              return 0;
++
+       return cdev->ops->read_fifo(cdev, addr_offset, val, val_count);
+ }
+@@ -346,6 +349,9 @@ m_can_fifo_write(struct m_can_classdev *
+       u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE +
+               offset;
++      if (val_count == 0)
++              return 0;
++
+       return cdev->ops->write_fifo(cdev, addr_offset, val, val_count);
+ }
diff --git a/queue-5.16/kvm-arm64-use-shadow-spsr_el1-when-injecting-exceptions-on-vhe.patch b/queue-5.16/kvm-arm64-use-shadow-spsr_el1-when-injecting-exceptions-on-vhe.patch
new file mode 100644 (file)
index 0000000..abe0750
--- /dev/null
@@ -0,0 +1,46 @@
+From 278583055a237270fac70518275ba877bf9e4013 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Fri, 21 Jan 2022 18:42:07 +0000
+Subject: KVM: arm64: Use shadow SPSR_EL1 when injecting exceptions on !VHE
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 278583055a237270fac70518275ba877bf9e4013 upstream.
+
+Injecting an exception into a guest with non-VHE is risky business.
+Instead of writing in the shadow register for the switch code to
+restore it, we override the CPU register instead. Which gets
+overriden a few instructions later by said restore code.
+
+The result is that although the guest correctly gets the exception,
+it will return to the original context in some random state,
+depending on what was there the first place... Boo.
+
+Fix the issue by writing to the shadow register. The original code
+is absolutely fine on VHE, as the state is already loaded, and writing
+to the shadow register in that case would actually be a bug.
+
+Fixes: bb666c472ca2 ("KVM: arm64: Inject AArch64 exceptions from HYP")
+Cc: stable@vger.kernel.org
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Reviewed-by: Fuad Tabba <tabba@google.com>
+Link: https://lore.kernel.org/r/20220121184207.423426-1-maz@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/hyp/exception.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/arch/arm64/kvm/hyp/exception.c
++++ b/arch/arm64/kvm/hyp/exception.c
+@@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(
+ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
+ {
+-      write_sysreg_el1(val, SYS_SPSR);
++      if (has_vhe())
++              write_sysreg_el1(val, SYS_SPSR);
++      else
++              __vcpu_sys_reg(vcpu, SPSR_EL1) = val;
+ }
+ static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
diff --git a/queue-5.16/kvm-arm64-vgic-v3-restrict-seis-workaround-to-known-broken-systems.patch b/queue-5.16/kvm-arm64-vgic-v3-restrict-seis-workaround-to-known-broken-systems.patch
new file mode 100644 (file)
index 0000000..fcd6e3e
--- /dev/null
@@ -0,0 +1,76 @@
+From d11a327ed95dbec756b99cbfef2a7fd85c9eeb09 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Fri, 21 Jan 2022 21:07:47 +0000
+Subject: KVM: arm64: vgic-v3: Restrict SEIS workaround to known broken systems
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit d11a327ed95dbec756b99cbfef2a7fd85c9eeb09 upstream.
+
+Contrary to what df652bcf1136 ("KVM: arm64: vgic-v3: Work around GICv3
+locally generated SErrors") was asserting, there is at least one other
+system out there (Cavium ThunderX2) implementing SEIS, and not in
+an obviously broken way.
+
+So instead of imposing the M1 workaround on an innocent bystander,
+let's limit it to the two known broken Apple implementations.
+
+Fixes: df652bcf1136 ("KVM: arm64: vgic-v3: Work around GICv3 locally generated SErrors")
+Reported-by: Ard Biesheuvel <ardb@kernel.org>
+Tested-by: Ard Biesheuvel <ardb@kernel.org>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220122103912.795026-1-maz@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/hyp/vgic-v3-sr.c |    3 +++
+ arch/arm64/kvm/vgic/vgic-v3.c   |   17 +++++++++++++++--
+ 2 files changed, 18 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
++++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
+@@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct k
+       val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
+       /* IDbits */
+       val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
++      /* SEIS */
++      if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK)
++              val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT);
+       /* A3V */
+       val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
+       /* EOImode */
+--- a/arch/arm64/kvm/vgic/vgic-v3.c
++++ b/arch/arm64/kvm/vgic/vgic-v3.c
+@@ -609,6 +609,18 @@ static int __init early_gicv4_enable(cha
+ }
+ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
++static const struct midr_range broken_seis[] = {
++      MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
++      MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
++      {},
++};
++
++static bool vgic_v3_broken_seis(void)
++{
++      return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) &&
++              is_midr_in_range_list(read_cpuid_id(), broken_seis));
++}
++
+ /**
+  * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
+  * @info:     pointer to the GIC description
+@@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_i
+               group1_trap = true;
+       }
+-      if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) {
+-              kvm_info("GICv3 with locally generated SEI\n");
++      if (vgic_v3_broken_seis()) {
++              kvm_info("GICv3 with broken locally generated SEI\n");
++              kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK;
+               group0_trap = true;
+               group1_trap = true;
+               if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
diff --git a/queue-5.16/net-sfp-ignore-disabled-sfp-node.patch b/queue-5.16/net-sfp-ignore-disabled-sfp-node.patch
new file mode 100644 (file)
index 0000000..86c10db
--- /dev/null
@@ -0,0 +1,43 @@
+From 2148927e6ed43a1667baf7c2ae3e0e05a44b51a0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Beh=C3=BAn?= <kabel@kernel.org>
+Date: Wed, 19 Jan 2022 17:44:55 +0100
+Subject: net: sfp: ignore disabled SFP node
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marek Behún <kabel@kernel.org>
+
+commit 2148927e6ed43a1667baf7c2ae3e0e05a44b51a0 upstream.
+
+Commit ce0aa27ff3f6 ("sfp: add sfp-bus to bridge between network devices
+and sfp cages") added code which finds SFP bus DT node even if the node
+is disabled with status = "disabled". Because of this, when phylink is
+created, it ends with non-null .sfp_bus member, even though the SFP
+module is not probed (because the node is disabled).
+
+We need to ignore disabled SFP bus node.
+
+Fixes: ce0aa27ff3f6 ("sfp: add sfp-bus to bridge between network devices and sfp cages")
+Signed-off-by: Marek Behún <kabel@kernel.org>
+Cc: stable@vger.kernel.org # 2203cbf2c8b5 ("net: sfp: move fwnode parsing into sfp-bus layer")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/sfp-bus.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/phy/sfp-bus.c
++++ b/drivers/net/phy/sfp-bus.c
+@@ -651,6 +651,11 @@ struct sfp_bus *sfp_bus_find_fwnode(stru
+       else if (ret < 0)
+               return ERR_PTR(ret);
++      if (!fwnode_device_is_available(ref.fwnode)) {
++              fwnode_handle_put(ref.fwnode);
++              return NULL;
++      }
++
+       bus = sfp_bus_get(ref.fwnode);
+       fwnode_handle_put(ref.fwnode);
+       if (!bus)
diff --git a/queue-5.16/net-stmmac-configure-ptp-clock-source-prior-to-ptp-initialization.patch b/queue-5.16/net-stmmac-configure-ptp-clock-source-prior-to-ptp-initialization.patch
new file mode 100644 (file)
index 0000000..9d8a148
--- /dev/null
@@ -0,0 +1,47 @@
+From 94c82de43e01ef5747a95e4a590880de863fe423 Mon Sep 17 00:00:00 2001
+From: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
+Date: Wed, 26 Jan 2022 17:47:22 +0800
+Subject: net: stmmac: configure PTP clock source prior to PTP initialization
+
+From: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
+
+commit 94c82de43e01ef5747a95e4a590880de863fe423 upstream.
+
+For Intel platform, it is required to configure PTP clock source prior PTP
+initialization in MAC. So, need to move ptp_clk_freq_config execution from
+stmmac_ptp_register() to stmmac_init_ptp().
+
+Fixes: 76da35dc99af ("stmmac: intel: Add PSE and PCH PTP clock source selection")
+Cc: <stable@vger.kernel.org> # 5.15.x
+Signed-off-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    3 +++
+ drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c  |    3 ---
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -899,6 +899,9 @@ static int stmmac_init_ptp(struct stmmac
+       bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+       int ret;
++      if (priv->plat->ptp_clk_freq_config)
++              priv->plat->ptp_clk_freq_config(priv);
++
+       ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE);
+       if (ret)
+               return ret;
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+@@ -297,9 +297,6 @@ void stmmac_ptp_register(struct stmmac_p
+ {
+       int i;
+-      if (priv->plat->ptp_clk_freq_config)
+-              priv->plat->ptp_clk_freq_config(priv);
+-
+       for (i = 0; i < priv->dma_cap.pps_out_num; i++) {
+               if (i >= STMMAC_PPS_MAX)
+                       break;
diff --git a/queue-5.16/net-stmmac-skip-only-stmmac_ptp_register-when-resume-from-suspend.patch b/queue-5.16/net-stmmac-skip-only-stmmac_ptp_register-when-resume-from-suspend.patch
new file mode 100644 (file)
index 0000000..0fbbacd
--- /dev/null
@@ -0,0 +1,75 @@
+From 0735e639f129dff455aeb91da291f5c578cc33db Mon Sep 17 00:00:00 2001
+From: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
+Date: Wed, 26 Jan 2022 17:47:23 +0800
+Subject: net: stmmac: skip only stmmac_ptp_register when resume from suspend
+
+From: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
+
+commit 0735e639f129dff455aeb91da291f5c578cc33db upstream.
+
+When resume from suspend, besides skipping PTP registration, it also
+skipping PTP HW initialization. This could cause PTP clock not able to
+operate properly when resume from suspend.
+
+To fix this, only stmmac_ptp_register() is skipped when resume from
+suspend.
+
+Fixes: fe1319291150 ("stmmac: Don't init ptp again when resume from suspend/hibernation")
+Cc: <stable@vger.kernel.org> # 5.15.x
+Signed-off-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   20 +++++++++-----------
+ 1 file changed, 9 insertions(+), 11 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -924,8 +924,6 @@ static int stmmac_init_ptp(struct stmmac
+       priv->hwts_tx_en = 0;
+       priv->hwts_rx_en = 0;
+-      stmmac_ptp_register(priv);
+-
+       return 0;
+ }
+@@ -3248,7 +3246,7 @@ static int stmmac_fpe_start_wq(struct st
+ /**
+  * stmmac_hw_setup - setup mac in a usable state.
+  *  @dev : pointer to the device structure.
+- *  @init_ptp: initialize PTP if set
++ *  @ptp_register: register PTP if set
+  *  Description:
+  *  this is the main function to setup the HW in a usable state because the
+  *  dma engine is reset, the core registers are configured (e.g. AXI,
+@@ -3258,7 +3256,7 @@ static int stmmac_fpe_start_wq(struct st
+  *  0 on success and an appropriate (-)ve integer as defined in errno.h
+  *  file on failure.
+  */
+-static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
++static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
+ {
+       struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+@@ -3315,13 +3313,13 @@ static int stmmac_hw_setup(struct net_de
+       stmmac_mmc_setup(priv);
+-      if (init_ptp) {
+-              ret = stmmac_init_ptp(priv);
+-              if (ret == -EOPNOTSUPP)
+-                      netdev_warn(priv->dev, "PTP not supported by HW\n");
+-              else if (ret)
+-                      netdev_warn(priv->dev, "PTP init failed\n");
+-      }
++      ret = stmmac_init_ptp(priv);
++      if (ret == -EOPNOTSUPP)
++              netdev_warn(priv->dev, "PTP not supported by HW\n");
++      else if (ret)
++              netdev_warn(priv->dev, "PTP init failed\n");
++      else if (ptp_register)
++              stmmac_ptp_register(priv);
+       priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS;
diff --git a/queue-5.16/powerpc-bpf-update-ldimm64-instructions-during-extra-pass.patch b/queue-5.16/powerpc-bpf-update-ldimm64-instructions-during-extra-pass.patch
new file mode 100644 (file)
index 0000000..68b7387
--- /dev/null
@@ -0,0 +1,137 @@
+From f9320c49993ca3c0ec0f9a7026b313735306bb8b Mon Sep 17 00:00:00 2001
+From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
+Date: Thu, 6 Jan 2022 17:15:07 +0530
+Subject: powerpc/bpf: Update ldimm64 instructions during extra pass
+
+From: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+
+commit f9320c49993ca3c0ec0f9a7026b313735306bb8b upstream.
+
+These instructions are updated after the initial JIT, so redo codegen
+during the extra pass. Rename bpf_jit_fixup_subprog_calls() to clarify
+that this is more than just subprog calls.
+
+Fixes: 69c087ba6225b5 ("bpf: Add bpf_for_each_map_elem() helper")
+Cc: stable@vger.kernel.org # v5.15
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Tested-by: Jiri Olsa <jolsa@redhat.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/7cc162af77ba918eb3ecd26ec9e7824bc44b1fae.1641468127.git.naveen.n.rao@linux.vnet.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/net/bpf_jit_comp.c   |   29 +++++++++++++++++++++++------
+ arch/powerpc/net/bpf_jit_comp32.c |    6 ++++++
+ arch/powerpc/net/bpf_jit_comp64.c |    7 ++++++-
+ 3 files changed, 35 insertions(+), 7 deletions(-)
+
+--- a/arch/powerpc/net/bpf_jit_comp.c
++++ b/arch/powerpc/net/bpf_jit_comp.c
+@@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void
+       memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
+ }
+-/* Fix the branch target addresses for subprog calls */
+-static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
+-                                     struct codegen_context *ctx, u32 *addrs)
++/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
++static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
++                                 struct codegen_context *ctx, u32 *addrs)
+ {
+       const struct bpf_insn *insn = fp->insnsi;
+       bool func_addr_fixed;
+       u64 func_addr;
+       u32 tmp_idx;
+-      int i, ret;
++      int i, j, ret;
+       for (i = 0; i < fp->len; i++) {
+               /*
+@@ -66,6 +66,23 @@ static int bpf_jit_fixup_subprog_calls(s
+                        * of the JITed sequence remains unchanged.
+                        */
+                       ctx->idx = tmp_idx;
++              } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) {
++                      tmp_idx = ctx->idx;
++                      ctx->idx = addrs[i] / 4;
++#ifdef CONFIG_PPC32
++                      PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm);
++                      PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm);
++                      for (j = ctx->idx - addrs[i] / 4; j < 4; j++)
++                              EMIT(PPC_RAW_NOP());
++#else
++                      func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32);
++                      PPC_LI64(b2p[insn[i].dst_reg], func_addr);
++                      /* overwrite rest with nops */
++                      for (j = ctx->idx - addrs[i] / 4; j < 5; j++)
++                              EMIT(PPC_RAW_NOP());
++#endif
++                      ctx->idx = tmp_idx;
++                      i++;
+               }
+       }
+@@ -193,13 +210,13 @@ skip_init_ctx:
+               /*
+                * Do not touch the prologue and epilogue as they will remain
+                * unchanged. Only fix the branch target address for subprog
+-               * calls in the body.
++               * calls in the body, and ldimm64 instructions.
+                *
+                * This does not change the offsets and lengths of the subprog
+                * call instruction sequences and hence, the size of the JITed
+                * image as well.
+                */
+-              bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
++              bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs);
+               /* There is no need to perform the usual passes. */
+               goto skip_codegen_passes;
+--- a/arch/powerpc/net/bpf_jit_comp32.c
++++ b/arch/powerpc/net/bpf_jit_comp32.c
+@@ -292,6 +292,8 @@ int bpf_jit_build_body(struct bpf_prog *
+               bool func_addr_fixed;
+               u64 func_addr;
+               u32 true_cond;
++              u32 tmp_idx;
++              int j;
+               /*
+                * addrs[] maps a BPF bytecode address into a real offset from
+@@ -839,8 +841,12 @@ int bpf_jit_build_body(struct bpf_prog *
+                * 16 byte instruction that uses two 'struct bpf_insn'
+                */
+               case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
++                      tmp_idx = ctx->idx;
+                       PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
+                       PPC_LI32(dst_reg, (u32)insn[i].imm);
++                      /* padding to allow full 4 instructions for later patching */
++                      for (j = ctx->idx - tmp_idx; j < 4; j++)
++                              EMIT(PPC_RAW_NOP());
+                       /* Adjust for two bpf instructions */
+                       addrs[++i] = ctx->idx * 4;
+                       break;
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -318,6 +318,7 @@ int bpf_jit_build_body(struct bpf_prog *
+               u64 imm64;
+               u32 true_cond;
+               u32 tmp_idx;
++              int j;
+               /*
+                * addrs[] maps a BPF bytecode address into a real offset from
+@@ -806,9 +807,13 @@ emit_clear:
+               case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+                       imm64 = ((u64)(u32) insn[i].imm) |
+                                   (((u64)(u32) insn[i+1].imm) << 32);
++                      tmp_idx = ctx->idx;
++                      PPC_LI64(dst_reg, imm64);
++                      /* padding to allow full 5 instructions for later patching */
++                      for (j = ctx->idx - tmp_idx; j < 5; j++)
++                              EMIT(PPC_RAW_NOP());
+                       /* Adjust for two bpf instructions */
+                       addrs[++i] = ctx->idx * 4;
+-                      PPC_LI64(dst_reg, imm64);
+                       break;
+               /*
diff --git a/queue-5.16/powerpc32-bpf-fix-codegen-for-bpf-to-bpf-calls.patch b/queue-5.16/powerpc32-bpf-fix-codegen-for-bpf-to-bpf-calls.patch
new file mode 100644 (file)
index 0000000..333166c
--- /dev/null
@@ -0,0 +1,36 @@
+From fab07611fb2e6a15fac05c4583045ca5582fd826 Mon Sep 17 00:00:00 2001
+From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
+Date: Thu, 6 Jan 2022 17:15:06 +0530
+Subject: powerpc32/bpf: Fix codegen for bpf-to-bpf calls
+
+From: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+
+commit fab07611fb2e6a15fac05c4583045ca5582fd826 upstream.
+
+Pad instructions emitted for BPF_CALL so that the number of instructions
+generated does not change for different function addresses. This is
+especially important for calls to other bpf functions, whose address
+will only be known during extra pass.
+
+Fixes: 51c66ad849a703 ("powerpc/bpf: Implement extended BPF on PPC32")
+Cc: stable@vger.kernel.org # v5.13+
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/52d8fe51f7620a6f27f377791564d79d75463576.1641468127.git.naveen.n.rao@linux.vnet.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/net/bpf_jit_comp32.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/powerpc/net/bpf_jit_comp32.c
++++ b/arch/powerpc/net/bpf_jit_comp32.c
+@@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *ima
+       if (image && rel < 0x2000000 && rel >= -0x2000000) {
+               PPC_BL_ABS(func);
++              EMIT(PPC_RAW_NOP());
++              EMIT(PPC_RAW_NOP());
++              EMIT(PPC_RAW_NOP());
+       } else {
+               /* Load function address into r0 */
+               EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
diff --git a/queue-5.16/s390-hypfs-include-z-vm-guests-with-access-control-group-set.patch b/queue-5.16/s390-hypfs-include-z-vm-guests-with-access-control-group-set.patch
new file mode 100644 (file)
index 0000000..8a2a60e
--- /dev/null
@@ -0,0 +1,51 @@
+From 663d34c8df98740f1e90241e78e456d00b3c6cad Mon Sep 17 00:00:00 2001
+From: Vasily Gorbik <gor@linux.ibm.com>
+Date: Thu, 20 Jan 2022 16:23:19 +0100
+Subject: s390/hypfs: include z/VM guests with access control group set
+
+From: Vasily Gorbik <gor@linux.ibm.com>
+
+commit 663d34c8df98740f1e90241e78e456d00b3c6cad upstream.
+
+Currently if z/VM guest is allowed to retrieve hypervisor performance
+data globally for all guests (privilege class B) the query is formed in a
+way to include all guests but the group name is left empty. This leads to
+that z/VM guests which have access control group set not being included
+in the results (even local vm).
+
+Change the query group identifier from empty to "any" to retrieve
+information about all guests from any groups (or without a group set).
+
+Cc: stable@vger.kernel.org
+Fixes: 31cb4bd31a48 ("[S390] Hypervisor filesystem (s390_hypfs) for z/VM")
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/hypfs/hypfs_vm.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/hypfs/hypfs_vm.c
++++ b/arch/s390/hypfs/hypfs_vm.c
+@@ -20,6 +20,7 @@
+ static char local_guest[] = "        ";
+ static char all_guests[] = "*       ";
++static char *all_groups = all_guests;
+ static char *guest_query;
+ struct diag2fc_data {
+@@ -62,10 +63,11 @@ static int diag2fc(int size, char* query
+       memcpy(parm_list.userid, query, NAME_LEN);
+       ASCEBC(parm_list.userid, NAME_LEN);
+-      parm_list.addr = (unsigned long) addr ;
++      memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
++      ASCEBC(parm_list.aci_grp, NAME_LEN);
++      parm_list.addr = (unsigned long)addr;
+       parm_list.size = size;
+       parm_list.fmt = 0x02;
+-      memset(parm_list.aci_grp, 0x40, NAME_LEN);
+       rc = -1;
+       diag_stat_inc(DIAG_STAT_X2FC);
diff --git a/queue-5.16/s390-module-fix-loading-modules-with-a-lot-of-relocations.patch b/queue-5.16/s390-module-fix-loading-modules-with-a-lot-of-relocations.patch
new file mode 100644 (file)
index 0000000..f0c9568
--- /dev/null
@@ -0,0 +1,84 @@
+From f3b7e73b2c6619884351a3a0a7468642f852b8a2 Mon Sep 17 00:00:00 2001
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Wed, 19 Jan 2022 19:26:37 +0100
+Subject: s390/module: fix loading modules with a lot of relocations
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+commit f3b7e73b2c6619884351a3a0a7468642f852b8a2 upstream.
+
+If the size of the PLT entries generated by apply_rela() exceeds
+64KiB, the first ones can no longer reach __jump_r1 with brc. Fix by
+using brcl. An alternative solution is to add a __jump_r1 copy after
+every 64KiB, however, the space savings are quite small and do not
+justify the additional complexity.
+
+Fixes: f19fbd5ed642 ("s390: introduce execute-trampolines for branches")
+Cc: stable@vger.kernel.org
+Reported-by: Andrea Righi <andrea.righi@canonical.com>
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/module.c |   37 ++++++++++++++++++-------------------
+ 1 file changed, 18 insertions(+), 19 deletions(-)
+
+--- a/arch/s390/kernel/module.c
++++ b/arch/s390/kernel/module.c
+@@ -33,7 +33,7 @@
+ #define DEBUGP(fmt , ...)
+ #endif
+-#define PLT_ENTRY_SIZE 20
++#define PLT_ENTRY_SIZE 22
+ void *module_alloc(unsigned long size)
+ {
+@@ -341,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, El
+       case R_390_PLTOFF32:    /* 32 bit offset from GOT to PLT. */
+       case R_390_PLTOFF64:    /* 16 bit offset from GOT to PLT. */
+               if (info->plt_initialized == 0) {
+-                      unsigned int insn[5];
+-                      unsigned int *ip = me->core_layout.base +
+-                                         me->arch.plt_offset +
+-                                         info->plt_offset;
+-
+-                      insn[0] = 0x0d10e310;   /* basr 1,0  */
+-                      insn[1] = 0x100a0004;   /* lg   1,10(1) */
++                      unsigned char insn[PLT_ENTRY_SIZE];
++                      char *plt_base;
++                      char *ip;
++
++                      plt_base = me->core_layout.base + me->arch.plt_offset;
++                      ip = plt_base + info->plt_offset;
++                      *(int *)insn = 0x0d10e310;      /* basr 1,0  */
++                      *(int *)&insn[4] = 0x100c0004;  /* lg   1,12(1) */
+                       if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
+-                              unsigned int *ij;
+-                              ij = me->core_layout.base +
+-                                      me->arch.plt_offset +
+-                                      me->arch.plt_size - PLT_ENTRY_SIZE;
+-                              insn[2] = 0xa7f40000 +  /* j __jump_r1 */
+-                                      (unsigned int)(u16)
+-                                      (((unsigned long) ij - 8 -
+-                                        (unsigned long) ip) / 2);
++                              char *jump_r1;
++
++                              jump_r1 = plt_base + me->arch.plt_size -
++                                      PLT_ENTRY_SIZE;
++                              /* brcl 0xf,__jump_r1 */
++                              *(short *)&insn[8] = 0xc0f4;
++                              *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
+                       } else {
+-                              insn[2] = 0x07f10000;   /* br %r1 */
++                              *(int *)&insn[8] = 0x07f10000;  /* br %r1 */
+                       }
+-                      insn[3] = (unsigned int) (val >> 32);
+-                      insn[4] = (unsigned int) val;
++                      *(long *)&insn[14] = val;
+                       write(ip, insn, sizeof(insn));
+                       info->plt_initialized = 1;
diff --git a/queue-5.16/s390-nmi-handle-guarded-storage-validity-failures-for-kvm-guests.patch b/queue-5.16/s390-nmi-handle-guarded-storage-validity-failures-for-kvm-guests.patch
new file mode 100644 (file)
index 0000000..1453d0c
--- /dev/null
@@ -0,0 +1,57 @@
+From 1ea1d6a847d2b1d17fefd9196664b95f052a0775 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@linux.ibm.com>
+Date: Thu, 13 Jan 2022 11:44:19 +0100
+Subject: s390/nmi: handle guarded storage validity failures for KVM guests
+
+From: Christian Borntraeger <borntraeger@linux.ibm.com>
+
+commit 1ea1d6a847d2b1d17fefd9196664b95f052a0775 upstream.
+
+machine check validity bits reflect the state of the machine check. If a
+guest does not make use of guarded storage, the validity bit might be
+off. We can not use the host CR bit to decide if the validity bit must
+be on. So ignore "invalid" guarded storage controls for KVM guests in
+the host and rely on the machine check being forwarded to the guest.  If
+no other errors happen from a host perspective everything is fine and no
+process must be killed and the host can continue to run.
+
+Cc: stable@vger.kernel.org
+Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest")
+Reported-by: Carsten Otte <cotte@de.ibm.com>
+Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Tested-by: Carsten Otte <cotte@de.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/nmi.c |   18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/arch/s390/kernel/nmi.c
++++ b/arch/s390/kernel/nmi.c
+@@ -316,11 +316,21 @@ static int notrace s390_validate_registe
+       if (cr2.gse) {
+               if (!mci.gs) {
+                       /*
+-                       * Guarded storage register can't be restored and
+-                       * the current processes uses guarded storage.
+-                       * It has to be terminated.
++                       * 2 cases:
++                       * - machine check in kernel or userspace
++                       * - machine check while running SIE (KVM guest)
++                       * For kernel or userspace the userspace values of
++                       * guarded storage control can not be recreated, the
++                       * process must be terminated.
++                       * For SIE the guest values of guarded storage can not
++                       * be recreated. This is either due to a bug or due to
++                       * GS being disabled in the guest. The guest will be
++                       * notified by KVM code and the guests machine check
++                       * handling must take care of this.  The host values
++                       * are saved by KVM and are not affected.
+                        */
+-                      kill_task = 1;
++                      if (!test_cpu_flag(CIF_MCCK_GUEST))
++                              kill_task = 1;
+               } else {
+                       load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
+               }
diff --git a/queue-5.16/s390-nmi-handle-vector-validity-failures-for-kvm-guests.patch b/queue-5.16/s390-nmi-handle-vector-validity-failures-for-kvm-guests.patch
new file mode 100644 (file)
index 0000000..1a8ab5c
--- /dev/null
@@ -0,0 +1,42 @@
+From f094a39c6ba168f2df1edfd1731cca377af5f442 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@linux.ibm.com>
+Date: Mon, 17 Jan 2022 18:40:32 +0100
+Subject: s390/nmi: handle vector validity failures for KVM guests
+
+From: Christian Borntraeger <borntraeger@linux.ibm.com>
+
+commit f094a39c6ba168f2df1edfd1731cca377af5f442 upstream.
+
+The machine check validity bit tells about the context. If a KVM guest
+was running the bit tells about the guest validity and the host state is
+not affected. As a guest can disable the guest validity this might
+result in unwanted host errors on machine checks.
+
+Cc: stable@vger.kernel.org
+Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest")
+Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/nmi.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/kernel/nmi.c
++++ b/arch/s390/kernel/nmi.c
+@@ -273,7 +273,14 @@ static int notrace s390_validate_registe
+               /* Validate vector registers */
+               union ctlreg0 cr0;
+-              if (!mci.vr) {
++              /*
++               * The vector validity must only be checked if not running a
++               * KVM guest. For KVM guests the machine check is forwarded by
++               * KVM and it is the responsibility of the guest to take
++               * appropriate actions. The host vector or FPU values have been
++               * saved by KVM and will be restored by KVM.
++               */
++              if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) {
+                       /*
+                        * Vector registers can't be restored. If the kernel
+                        * currently uses vector registers the system is
diff --git a/queue-5.16/scsi-zfcp-fix-failed-recovery-on-gone-remote-port-with-non-npiv-fcp-devices.patch b/queue-5.16/scsi-zfcp-fix-failed-recovery-on-gone-remote-port-with-non-npiv-fcp-devices.patch
new file mode 100644 (file)
index 0000000..470f6a4
--- /dev/null
@@ -0,0 +1,111 @@
+From 8c9db6679be4348b8aae108e11d4be2f83976e30 Mon Sep 17 00:00:00 2001
+From: Steffen Maier <maier@linux.ibm.com>
+Date: Tue, 18 Jan 2022 17:58:03 +0100
+Subject: scsi: zfcp: Fix failed recovery on gone remote port with non-NPIV FCP devices
+
+From: Steffen Maier <maier@linux.ibm.com>
+
+commit 8c9db6679be4348b8aae108e11d4be2f83976e30 upstream.
+
+Suppose we have an environment with a number of non-NPIV FCP devices
+(virtual HBAs / FCP devices / zfcp "adapter"s) sharing the same physical
+FCP channel (HBA port) and its I_T nexus. Plus a number of storage target
+ports zoned to such shared channel. Now one target port logs out of the
+fabric causing an RSCN. Zfcp reacts with an ADISC ELS and subsequent port
+recovery depending on the ADISC result. This happens on all such FCP
+devices (in different Linux images) concurrently as they all receive a copy
+of this RSCN. In the following we look at one of those FCP devices.
+
+Requests other than FSF_QTCB_FCP_CMND can be slow until they get a
+response.
+
+Depending on which requests are affected by slow responses, there are
+different recovery outcomes. Here we want to fix failed recoveries on port
+or adapter level by avoiding recovery requests that can be slow.
+
+We need the cached N_Port_ID for the remote port "link" test with ADISC.
+Just before sending the ADISC, we now intentionally forget the old cached
+N_Port_ID. The idea is that on receiving an RSCN for a port, we have to
+assume that any cached information about this port is stale.  This forces a
+fresh new GID_PN [FC-GS] nameserver lookup on any subsequent recovery for
+the same port. Since we typically can still communicate with the nameserver
+efficiently, we now reach steady state quicker: Either the nameserver still
+does not know about the port so we stop recovery, or the nameserver already
+knows the port potentially with a new N_Port_ID and we can successfully and
+quickly perform open port recovery.  For the one case, where ADISC returns
+successfully, we re-initialize port->d_id because that case does not
+involve any port recovery.
+
+This also solves a problem if the storage WWPN quickly logs into the fabric
+again but with a different N_Port_ID. Such as on virtual WWPN takeover
+during target NPIV failover.
+[https://www.redbooks.ibm.com/abstracts/redp5477.html] In that case the
+RSCN from the storage FDISC was ignored by zfcp and we could not
+successfully recover the failover. On some later failback on the storage,
+we could have been lucky if the virtual WWPN got the same old N_Port_ID
+from the SAN switch as we still had cached.  Then the related RSCN
+triggered a successful port reopen recovery.  However, there is no
+guarantee to get the same N_Port_ID on NPIV FDISC.
+
+Even though NPIV-enabled FCP devices are not affected by this problem, this
+code change optimizes recovery time for gone remote ports as a side effect.
+The timely drop of cached N_Port_IDs prevents unnecessary slow open port
+attempts.
+
+While the problem might have been in code before v2.6.32 commit
+799b76d09aee ("[SCSI] zfcp: Decouple gid_pn requests from erp") this fix
+depends on the gid_pn_work introduced with that commit, so we mark it as
+culprit to satisfy fix dependencies.
+
+Note: Point-to-point remote port is already handled separately and gets its
+N_Port_ID from the cached peer_d_id. So resetting port->d_id in general
+does not affect PtP.
+
+Link: https://lore.kernel.org/r/20220118165803.3667947-1-maier@linux.ibm.com
+Fixes: 799b76d09aee ("[SCSI] zfcp: Decouple gid_pn requests from erp")
+Cc: <stable@vger.kernel.org> #2.6.32+
+Suggested-by: Benjamin Block <bblock@linux.ibm.com>
+Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
+Signed-off-by: Steffen Maier <maier@linux.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/scsi/zfcp_fc.c |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/drivers/s390/scsi/zfcp_fc.c
++++ b/drivers/s390/scsi/zfcp_fc.c
+@@ -521,6 +521,8 @@ static void zfcp_fc_adisc_handler(void *
+               goto out;
+       }
++      /* re-init to undo drop from zfcp_fc_adisc() */
++      port->d_id = ntoh24(adisc_resp->adisc_port_id);
+       /* port is good, unblock rport without going through erp */
+       zfcp_scsi_schedule_rport_register(port);
+  out:
+@@ -534,6 +536,7 @@ static int zfcp_fc_adisc(struct zfcp_por
+       struct zfcp_fc_req *fc_req;
+       struct zfcp_adapter *adapter = port->adapter;
+       struct Scsi_Host *shost = adapter->scsi_host;
++      u32 d_id;
+       int ret;
+       fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC);
+@@ -558,7 +561,15 @@ static int zfcp_fc_adisc(struct zfcp_por
+       fc_req->u.adisc.req.adisc_cmd = ELS_ADISC;
+       hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost));
+-      ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els,
++      d_id = port->d_id; /* remember as destination for send els below */
++      /*
++       * Force fresh GID_PN lookup on next port recovery.
++       * Must happen after request setup and before sending request,
++       * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler().
++       */
++      port->d_id = 0;
++
++      ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els,
+                               ZFCP_FC_CTELS_TMO);
+       if (ret)
+               kmem_cache_free(zfcp_fc_req_cache, fc_req);
diff --git a/queue-5.16/series b/queue-5.16/series
new file mode 100644 (file)
index 0000000..cb4f51e
--- /dev/null
@@ -0,0 +1,27 @@
+bluetooth-refactor-malicious-adv-data-check.patch
+btrfs-fix-too-long-loop-when-defragging-a-1-byte-file.patch
+btrfs-allow-defrag-to-be-interruptible.patch
+btrfs-defrag-fix-wrong-number-of-defragged-sectors.patch
+btrfs-defrag-properly-update-range-start-for-autodefrag.patch
+btrfs-fix-deadlock-when-reserving-space-during-defrag.patch
+btrfs-add-back-missing-dirty-page-rate-limiting-to-defrag.patch
+btrfs-update-writeback-index-when-starting-defrag.patch
+can-m_can-m_can_fifo_-read-write-don-t-read-or-write-from-to-fifo-if-length-is-0.patch
+net-sfp-ignore-disabled-sfp-node.patch
+net-stmmac-configure-ptp-clock-source-prior-to-ptp-initialization.patch
+net-stmmac-skip-only-stmmac_ptp_register-when-resume-from-suspend.patch
+arm-9179-1-uaccess-avoid-alignment-faults-in-copy__kernel_nofault.patch
+arm-9180-1-thumb2-align-alt_up-sections-in-modules-sufficiently.patch
+kvm-arm64-use-shadow-spsr_el1-when-injecting-exceptions-on-vhe.patch
+kvm-arm64-vgic-v3-restrict-seis-workaround-to-known-broken-systems.patch
+s390-module-fix-loading-modules-with-a-lot-of-relocations.patch
+s390-hypfs-include-z-vm-guests-with-access-control-group-set.patch
+s390-nmi-handle-guarded-storage-validity-failures-for-kvm-guests.patch
+s390-nmi-handle-vector-validity-failures-for-kvm-guests.patch
+bpf-guard-against-accessing-null-pt_regs-in-bpf_get_task_stack.patch
+powerpc32-bpf-fix-codegen-for-bpf-to-bpf-calls.patch
+powerpc-bpf-update-ldimm64-instructions-during-extra-pass.patch
+ucount-make-get_ucount-a-safe-get_user-replacement.patch
+scsi-zfcp-fix-failed-recovery-on-gone-remote-port-with-non-npiv-fcp-devices.patch
+udf-restore-i_lenalloc-when-inode-expansion-fails.patch
+udf-fix-null-ptr-deref-when-converting-from-inline-format.patch
diff --git a/queue-5.16/ucount-make-get_ucount-a-safe-get_user-replacement.patch b/queue-5.16/ucount-make-get_ucount-a-safe-get_user-replacement.patch
new file mode 100644 (file)
index 0000000..09f6acc
--- /dev/null
@@ -0,0 +1,57 @@
+From f9d87929d451d3e649699d0f1d74f71f77ad38f5 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 24 Jan 2022 12:46:50 -0600
+Subject: ucount:  Make get_ucount a safe get_user replacement
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit f9d87929d451d3e649699d0f1d74f71f77ad38f5 upstream.
+
+When the ucount code was refactored to create get_ucount it was missed
+that some of the contexts in which a rlimit is kept elevated can be
+the only reference to the user/ucount in the system.
+
+Ordinary ucount references exist in places that also have a reference
+to the user namspace, but in POSIX message queues, the SysV shm code,
+and the SIGPENDING code there is no independent user namespace
+reference.
+
+Inspection of the the user_namespace show no instance of circular
+references between struct ucounts and the user_namespace.  So
+hold a reference from struct ucount to i's user_namespace to
+resolve this problem.
+
+Link: https://lore.kernel.org/lkml/YZV7Z+yXbsx9p3JN@fixkernel.com/
+Reported-by: Qian Cai <quic_qiancai@quicinc.com>
+Reported-by: Mathias Krause <minipli@grsecurity.net>
+Tested-by: Mathias Krause <minipli@grsecurity.net>
+Reviewed-by: Mathias Krause <minipli@grsecurity.net>
+Reviewed-by: Alexey Gladkov <legion@kernel.org>
+Fixes: d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts")
+Fixes: 6e52a9f0532f ("Reimplement RLIMIT_MSGQUEUE on top of ucounts")
+Fixes: d7c9e99aee48 ("Reimplement RLIMIT_MEMLOCK on top of ucounts")
+Cc: stable@vger.kernel.org
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/ucount.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/ucount.c
++++ b/kernel/ucount.c
+@@ -190,6 +190,7 @@ struct ucounts *alloc_ucounts(struct use
+                       kfree(new);
+               } else {
+                       hlist_add_head(&new->node, hashent);
++                      get_user_ns(new->ns);
+                       spin_unlock_irq(&ucounts_lock);
+                       return new;
+               }
+@@ -210,6 +211,7 @@ void put_ucounts(struct ucounts *ucounts
+       if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
+               hlist_del_init(&ucounts->node);
+               spin_unlock_irqrestore(&ucounts_lock, flags);
++              put_user_ns(ucounts->ns);
+               kfree(ucounts);
+       }
+ }
diff --git a/queue-5.16/udf-fix-null-ptr-deref-when-converting-from-inline-format.patch b/queue-5.16/udf-fix-null-ptr-deref-when-converting-from-inline-format.patch
new file mode 100644 (file)
index 0000000..279c2f6
--- /dev/null
@@ -0,0 +1,64 @@
+From 7fc3b7c2981bbd1047916ade327beccb90994eee Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 17 Jan 2022 18:22:13 +0100
+Subject: udf: Fix NULL ptr deref when converting from inline format
+
+From: Jan Kara <jack@suse.cz>
+
+commit 7fc3b7c2981bbd1047916ade327beccb90994eee upstream.
+
+udf_expand_file_adinicb() calls directly ->writepage to write data
+expanded into a page. This however misses to setup inode for writeback
+properly and so we can crash on inode->i_wb dereference when submitting
+page for IO like:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000158
+  #PF: supervisor read access in kernel mode
+...
+  <TASK>
+  __folio_start_writeback+0x2ac/0x350
+  __block_write_full_page+0x37d/0x490
+  udf_expand_file_adinicb+0x255/0x400 [udf]
+  udf_file_write_iter+0xbe/0x1b0 [udf]
+  new_sync_write+0x125/0x1c0
+  vfs_write+0x28e/0x400
+
+Fix the problem by marking the page dirty and going through the standard
+writeback path to write the page. Strictly speaking we would not even
+have to write the page but we want to catch e.g. ENOSPC errors early.
+
+Reported-by: butt3rflyh4ck <butterflyhuangxx@gmail.com>
+CC: stable@vger.kernel.org
+Fixes: 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks")
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/udf/inode.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -258,10 +258,6 @@ int udf_expand_file_adinicb(struct inode
+       char *kaddr;
+       struct udf_inode_info *iinfo = UDF_I(inode);
+       int err;
+-      struct writeback_control udf_wbc = {
+-              .sync_mode = WB_SYNC_NONE,
+-              .nr_to_write = 1,
+-      };
+       WARN_ON_ONCE(!inode_is_locked(inode));
+       if (!iinfo->i_lenAlloc) {
+@@ -305,8 +301,10 @@ int udf_expand_file_adinicb(struct inode
+               iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
+       /* from now on we have normal address_space methods */
+       inode->i_data.a_ops = &udf_aops;
++      set_page_dirty(page);
++      unlock_page(page);
+       up_write(&iinfo->i_data_sem);
+-      err = inode->i_data.a_ops->writepage(page, &udf_wbc);
++      err = filemap_fdatawrite(inode->i_mapping);
+       if (err) {
+               /* Restore everything back so that we don't lose data... */
+               lock_page(page);
diff --git a/queue-5.16/udf-restore-i_lenalloc-when-inode-expansion-fails.patch b/queue-5.16/udf-restore-i_lenalloc-when-inode-expansion-fails.patch
new file mode 100644 (file)
index 0000000..793f477
--- /dev/null
@@ -0,0 +1,34 @@
+From ea8569194b43f0f01f0a84c689388542c7254a1f Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Tue, 18 Jan 2022 09:57:25 +0100
+Subject: udf: Restore i_lenAlloc when inode expansion fails
+
+From: Jan Kara <jack@suse.cz>
+
+commit ea8569194b43f0f01f0a84c689388542c7254a1f upstream.
+
+When we fail to expand inode from inline format to a normal format, we
+restore inode to contain the original inline formatting but we forgot to
+set i_lenAlloc back. The mismatch between i_lenAlloc and i_size was then
+causing further problems such as warnings and lost data down the line.
+
+Reported-by: butt3rflyh4ck <butterflyhuangxx@gmail.com>
+CC: stable@vger.kernel.org
+Fixes: 7e49b6f2480c ("udf: Convert UDF to new truncate calling sequence")
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/udf/inode.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -317,6 +317,7 @@ int udf_expand_file_adinicb(struct inode
+               unlock_page(page);
+               iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
+               inode->i_data.a_ops = &udf_adinicb_aops;
++              iinfo->i_lenAlloc = inode->i_size;
+               up_write(&iinfo->i_data_sem);
+       }
+       put_page(page);