From a67dfa9dfc8764a746e368e3a5848dc83940edcb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 29 Jan 2022 13:16:53 +0100 Subject: [PATCH] 5.16-stable patches added patches: --- ...nment-faults-in-copy__kernel_nofault.patch | 65 +++++++++ ..._up-sections-in-modules-sufficiently.patch | 53 +++++++ ...th-refactor-malicious-adv-data-check.patch | 48 ++++++ ...g-null-pt_regs-in-bpf_get_task_stack.patch | 44 ++++++ ...g-dirty-page-rate-limiting-to-defrag.patch | 48 ++++++ ...rfs-allow-defrag-to-be-interruptible.patch | 52 +++++++ ...ix-wrong-number-of-defragged-sectors.patch | 99 +++++++++++++ ...ly-update-range-start-for-autodefrag.patch | 119 +++++++++++++++ ...k-when-reserving-space-during-defrag.patch | 96 ++++++++++++ ...g-loop-when-defragging-a-1-byte-file.patch | 85 +++++++++++ ...writeback-index-when-starting-defrag.patch | 61 ++++++++ ...or-write-from-to-fifo-if-length-is-0.patch | 60 ++++++++ ...el1-when-injecting-exceptions-on-vhe.patch | 46 ++++++ ...s-workaround-to-known-broken-systems.patch | 76 ++++++++++ .../net-sfp-ignore-disabled-sfp-node.patch | 43 ++++++ ...k-source-prior-to-ptp-initialization.patch | 47 ++++++ ...tp_register-when-resume-from-suspend.patch | 75 ++++++++++ ...imm64-instructions-during-extra-pass.patch | 137 ++++++++++++++++++ ...bpf-fix-codegen-for-bpf-to-bpf-calls.patch | 36 +++++ ...guests-with-access-control-group-set.patch | 51 +++++++ ...ng-modules-with-a-lot-of-relocations.patch | 84 +++++++++++ ...age-validity-failures-for-kvm-guests.patch | 57 ++++++++ ...tor-validity-failures-for-kvm-guests.patch | 42 ++++++ ...emote-port-with-non-npiv-fcp-devices.patch | 111 ++++++++++++++ queue-5.16/series | 27 ++++ ...t_ucount-a-safe-get_user-replacement.patch | 57 ++++++++ ...f-when-converting-from-inline-format.patch | 64 ++++++++ ..._lenalloc-when-inode-expansion-fails.patch | 34 +++++ 28 files changed, 1817 insertions(+) create mode 100644 queue-5.16/arm-9179-1-uaccess-avoid-alignment-faults-in-copy__kernel_nofault.patch create mode 100644 queue-5.16/arm-9180-1-thumb2-align-alt_up-sections-in-modules-sufficiently.patch create mode 100644 queue-5.16/bluetooth-refactor-malicious-adv-data-check.patch create mode 100644 queue-5.16/bpf-guard-against-accessing-null-pt_regs-in-bpf_get_task_stack.patch create mode 100644 queue-5.16/btrfs-add-back-missing-dirty-page-rate-limiting-to-defrag.patch create mode 100644 queue-5.16/btrfs-allow-defrag-to-be-interruptible.patch create mode 100644 queue-5.16/btrfs-defrag-fix-wrong-number-of-defragged-sectors.patch create mode 100644 queue-5.16/btrfs-defrag-properly-update-range-start-for-autodefrag.patch create mode 100644 queue-5.16/btrfs-fix-deadlock-when-reserving-space-during-defrag.patch create mode 100644 queue-5.16/btrfs-fix-too-long-loop-when-defragging-a-1-byte-file.patch create mode 100644 queue-5.16/btrfs-update-writeback-index-when-starting-defrag.patch create mode 100644 queue-5.16/can-m_can-m_can_fifo_-read-write-don-t-read-or-write-from-to-fifo-if-length-is-0.patch create mode 100644 queue-5.16/kvm-arm64-use-shadow-spsr_el1-when-injecting-exceptions-on-vhe.patch create mode 100644 queue-5.16/kvm-arm64-vgic-v3-restrict-seis-workaround-to-known-broken-systems.patch create mode 100644 queue-5.16/net-sfp-ignore-disabled-sfp-node.patch create mode 100644 queue-5.16/net-stmmac-configure-ptp-clock-source-prior-to-ptp-initialization.patch create mode 100644 queue-5.16/net-stmmac-skip-only-stmmac_ptp_register-when-resume-from-suspend.patch create mode 100644 queue-5.16/powerpc-bpf-update-ldimm64-instructions-during-extra-pass.patch create mode 100644 queue-5.16/powerpc32-bpf-fix-codegen-for-bpf-to-bpf-calls.patch create mode 100644 queue-5.16/s390-hypfs-include-z-vm-guests-with-access-control-group-set.patch create mode 100644 queue-5.16/s390-module-fix-loading-modules-with-a-lot-of-relocations.patch create mode 100644 queue-5.16/s390-nmi-handle-guarded-storage-validity-failures-for-kvm-guests.patch create mode 100644 queue-5.16/s390-nmi-handle-vector-validity-failures-for-kvm-guests.patch create mode 100644 queue-5.16/scsi-zfcp-fix-failed-recovery-on-gone-remote-port-with-non-npiv-fcp-devices.patch create mode 100644 queue-5.16/series create mode 100644 queue-5.16/ucount-make-get_ucount-a-safe-get_user-replacement.patch create mode 100644 queue-5.16/udf-fix-null-ptr-deref-when-converting-from-inline-format.patch create mode 100644 queue-5.16/udf-restore-i_lenalloc-when-inode-expansion-fails.patch diff --git a/queue-5.16/arm-9179-1-uaccess-avoid-alignment-faults-in-copy__kernel_nofault.patch b/queue-5.16/arm-9179-1-uaccess-avoid-alignment-faults-in-copy__kernel_nofault.patch new file mode 100644 index 00000000000..65a134461ce --- /dev/null +++ b/queue-5.16/arm-9179-1-uaccess-avoid-alignment-faults-in-copy__kernel_nofault.patch @@ -0,0 +1,65 @@ +From 15420269b02a63ed8c1841905d8b8b2403246004 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Tue, 18 Jan 2022 13:45:09 +0100 +Subject: ARM: 9179/1: uaccess: avoid alignment faults in copy_[from|to]_kernel_nofault + +From: Ard Biesheuvel + +commit 15420269b02a63ed8c1841905d8b8b2403246004 upstream. + +The helpers that are used to implement copy_from_kernel_nofault() and +copy_to_kernel_nofault() cast a void* to a pointer to a wider type, +which may result in alignment faults on ARM if the compiler decides to +use double-word or multiple-word load/store instructions. + +Only configurations that define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y +are affected, given that commit 2423de2e6f4d ("ARM: 9115/1: mm/maccess: +fix unaligned copy_{from,to}_kernel_nofault") ensures that dst and src +are sufficiently aligned otherwise. + +So use the unaligned accessors for accessing dst and src in cases where +they may be misaligned. + +Cc: # depends on 2423de2e6f4d +Fixes: 2df4c9a741a0 ("ARM: 9112/1: uaccess: add __{get,put}_kernel_nofault") +Reviewed-by: Arnd Bergmann +Signed-off-by: Ard Biesheuvel +Signed-off-by: Russell King (Oracle) +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/include/asm/uaccess.h | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/arch/arm/include/asm/uaccess.h ++++ b/arch/arm/include/asm/uaccess.h +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -497,7 +498,10 @@ do { \ + } \ + default: __err = __get_user_bad(); break; \ + } \ +- *(type *)(dst) = __val; \ ++ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) \ ++ put_unaligned(__val, (type *)(dst)); \ ++ else \ ++ *(type *)(dst) = __val; /* aligned by caller */ \ + if (__err) \ + goto err_label; \ + } while (0) +@@ -507,7 +511,9 @@ do { \ + const type *__pk_ptr = (dst); \ + unsigned long __dst = (unsigned long)__pk_ptr; \ + int __err = 0; \ +- type __val = *(type *)src; \ ++ type __val = IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ ++ ? get_unaligned((type *)(src)) \ ++ : *(type *)(src); /* aligned by caller */ \ + switch (sizeof(type)) { \ + case 1: __put_user_asm_byte(__val, __dst, __err, ""); break; \ + case 2: __put_user_asm_half(__val, __dst, __err, ""); break; \ diff --git a/queue-5.16/arm-9180-1-thumb2-align-alt_up-sections-in-modules-sufficiently.patch b/queue-5.16/arm-9180-1-thumb2-align-alt_up-sections-in-modules-sufficiently.patch new file mode 100644 index 00000000000..8aa29575f39 --- /dev/null +++ b/queue-5.16/arm-9180-1-thumb2-align-alt_up-sections-in-modules-sufficiently.patch @@ -0,0 +1,53 @@ +From 9f80ccda53b9417236945bc7ece4b519037df74d Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Tue, 18 Jan 2022 19:32:17 +0100 +Subject: ARM: 9180/1: Thumb2: align ALT_UP() sections in modules sufficiently + +From: Ard Biesheuvel + +commit 9f80ccda53b9417236945bc7ece4b519037df74d upstream. + +When building for Thumb2, the .alt.smp.init sections that are emitted by +the ALT_UP() patching code may not be 32-bit aligned, even though the +fixup_smp_on_up() routine expects that. This results in alignment faults +at module load time, which need to be fixed up by the fault handler. + +So let's align those sections explicitly, and prevent this from occurring. + +Cc: +Signed-off-by: Ard Biesheuvel +Signed-off-by: Russell King (Oracle) +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/include/asm/assembler.h | 2 ++ + arch/arm/include/asm/processor.h | 1 + + 2 files changed, 3 insertions(+) + +--- a/arch/arm/include/asm/assembler.h ++++ b/arch/arm/include/asm/assembler.h +@@ -288,6 +288,7 @@ + */ + #define ALT_UP(instr...) \ + .pushsection ".alt.smp.init", "a" ;\ ++ .align 2 ;\ + .long 9998b - . ;\ + 9997: instr ;\ + .if . - 9997b == 2 ;\ +@@ -299,6 +300,7 @@ + .popsection + #define ALT_UP_B(label) \ + .pushsection ".alt.smp.init", "a" ;\ ++ .align 2 ;\ + .long 9998b - . ;\ + W(b) . + (label - 9998b) ;\ + .popsection +--- a/arch/arm/include/asm/processor.h ++++ b/arch/arm/include/asm/processor.h +@@ -96,6 +96,7 @@ unsigned long __get_wchan(struct task_st + #define __ALT_SMP_ASM(smp, up) \ + "9998: " smp "\n" \ + " .pushsection \".alt.smp.init\", \"a\"\n" \ ++ " .align 2\n" \ + " .long 9998b - .\n" \ + " " up "\n" \ + " .popsection\n" diff --git a/queue-5.16/bluetooth-refactor-malicious-adv-data-check.patch b/queue-5.16/bluetooth-refactor-malicious-adv-data-check.patch new file mode 100644 index 00000000000..514323934ba --- /dev/null +++ b/queue-5.16/bluetooth-refactor-malicious-adv-data-check.patch @@ -0,0 +1,48 @@ +From 899663be5e75dc0174dc8bda0b5e6826edf0b29a Mon Sep 17 00:00:00 2001 +From: Brian Gix +Date: Wed, 24 Nov 2021 12:16:28 -0800 +Subject: Bluetooth: refactor malicious adv data check + +From: Brian Gix + +commit 899663be5e75dc0174dc8bda0b5e6826edf0b29a upstream. + +Check for out-of-bound read was being performed at the end of while +num_reports loop, and would fill journal with false positives. Added +check to beginning of loop processing so that it doesn't get checked +after ptr has been advanced. + +Signed-off-by: Brian Gix +Signed-off-by: Marcel Holtmann +Cc: syphyr +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/hci_event.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -5822,6 +5822,11 @@ static void hci_le_adv_report_evt(struct + struct hci_ev_le_advertising_info *ev = ptr; + s8 rssi; + ++ if (ptr > (void *)skb_tail_pointer(skb) - sizeof(*ev)) { ++ bt_dev_err(hdev, "Malicious advertising data."); ++ break; ++ } ++ + if (ev->length <= HCI_MAX_AD_LENGTH && + ev->data + ev->length <= skb_tail_pointer(skb)) { + rssi = ev->data[ev->length]; +@@ -5833,11 +5838,6 @@ static void hci_le_adv_report_evt(struct + } + + ptr += sizeof(*ev) + ev->length + 1; +- +- if (ptr > (void *) skb_tail_pointer(skb) - sizeof(*ev)) { +- bt_dev_err(hdev, "Malicious advertising data. Stopping processing"); +- break; +- } + } + + hci_dev_unlock(hdev); diff --git a/queue-5.16/bpf-guard-against-accessing-null-pt_regs-in-bpf_get_task_stack.patch b/queue-5.16/bpf-guard-against-accessing-null-pt_regs-in-bpf_get_task_stack.patch new file mode 100644 index 00000000000..eb0683550d5 --- /dev/null +++ b/queue-5.16/bpf-guard-against-accessing-null-pt_regs-in-bpf_get_task_stack.patch @@ -0,0 +1,44 @@ +From b992f01e66150fc5e90be4a96f5eb8e634c8249e Mon Sep 17 00:00:00 2001 +From: "Naveen N. Rao" +Date: Thu, 6 Jan 2022 17:15:05 +0530 +Subject: bpf: Guard against accessing NULL pt_regs in bpf_get_task_stack() + +From: Naveen N. Rao + +commit b992f01e66150fc5e90be4a96f5eb8e634c8249e upstream. + +task_pt_regs() can return NULL on powerpc for kernel threads. This is +then used in __bpf_get_stack() to check for user mode, resulting in a +kernel oops. Guard against this by checking return value of +task_pt_regs() before trying to obtain the call chain. + +Fixes: fa28dcb82a38f8 ("bpf: Introduce helper bpf_get_task_stack()") +Cc: stable@vger.kernel.org # v5.9+ +Signed-off-by: Naveen N. Rao +Acked-by: Daniel Borkmann +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/d5ef83c361cc255494afd15ff1b4fb02a36e1dcf.1641468127.git.naveen.n.rao@linux.vnet.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/stackmap.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/stackmap.c ++++ b/kernel/bpf/stackmap.c +@@ -525,13 +525,14 @@ BPF_CALL_4(bpf_get_task_stack, struct ta + u32, size, u64, flags) + { + struct pt_regs *regs; +- long res; ++ long res = -EINVAL; + + if (!try_get_task_stack(task)) + return -EFAULT; + + regs = task_pt_regs(task); +- res = __bpf_get_stack(regs, task, NULL, buf, size, flags); ++ if (regs) ++ res = __bpf_get_stack(regs, task, NULL, buf, size, flags); + put_task_stack(task); + + return res; diff --git a/queue-5.16/btrfs-add-back-missing-dirty-page-rate-limiting-to-defrag.patch b/queue-5.16/btrfs-add-back-missing-dirty-page-rate-limiting-to-defrag.patch new file mode 100644 index 00000000000..49b1806511d --- /dev/null +++ b/queue-5.16/btrfs-add-back-missing-dirty-page-rate-limiting-to-defrag.patch @@ -0,0 +1,48 @@ +From 3c9d31c715948aaff0ee6d322a91a2dec07770bf Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 20 Jan 2022 17:11:52 +0000 +Subject: btrfs: add back missing dirty page rate limiting to defrag + +From: Filipe Manana + +commit 3c9d31c715948aaff0ee6d322a91a2dec07770bf upstream. + +A defrag operation can dirty a lot of pages, specially if operating on +the entire file or a large file range. Any task dirtying pages should +periodically call balance_dirty_pages_ratelimited(), as stated in that +function's comments, otherwise they can leave too many dirty pages in +the system. This is what we did before the refactoring in 5.16, and +it should have remained, just like in the buffered write path and +relocation. So restore that behaviour. + +Fixes: 7b508037d4cac3 ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()") +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1553,6 +1553,7 @@ int btrfs_defrag_file(struct inode *inod + } + + while (cur < last_byte) { ++ const unsigned long prev_sectors_defragged = sectors_defragged; + u64 cluster_end; + + /* The cluster size 256K should always be page aligned */ +@@ -1584,6 +1585,10 @@ int btrfs_defrag_file(struct inode *inod + cluster_end + 1 - cur, extent_thresh, + newer_than, do_compress, + §ors_defragged, max_to_defrag); ++ ++ if (sectors_defragged > prev_sectors_defragged) ++ balance_dirty_pages_ratelimited(inode->i_mapping); ++ + btrfs_inode_unlock(inode, 0); + if (ret < 0) + break; diff --git a/queue-5.16/btrfs-allow-defrag-to-be-interruptible.patch b/queue-5.16/btrfs-allow-defrag-to-be-interruptible.patch new file mode 100644 index 00000000000..39844cb9234 --- /dev/null +++ b/queue-5.16/btrfs-allow-defrag-to-be-interruptible.patch @@ -0,0 +1,52 @@ +From b767c2fc787e992daeadfff40d61c05f66c82da0 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Tue, 18 Jan 2022 13:43:31 +0000 +Subject: btrfs: allow defrag to be interruptible + +From: Filipe Manana + +commit b767c2fc787e992daeadfff40d61c05f66c82da0 upstream. + +During defrag, at btrfs_defrag_file(), we have this loop that iterates +over a file range in steps no larger than 256K subranges. If the range +is too long, there's no way to interrupt it. So make the loop check in +each iteration if there's signal pending, and if there is, break and +return -AGAIN to userspace. + +Before kernel 5.16, we used to allow defrag to be cancelled through a +signal, but that was lost with commit 7b508037d4cac3 ("btrfs: defrag: +use defrag_one_cluster() to implement btrfs_defrag_file()"). + +This change adds back the possibility to cancel a defrag with a signal +and keeps the same semantics, returning -EAGAIN to user space (and not +the usually more expected -EINTR). + +This is also motivated by a recent bug on 5.16 where defragging a 1 byte +file resulted in iterating from file range 0 to (u64)-1, as hitting the +bug triggered a too long loop, basically requiring one to reboot the +machine, as it was not possible to cancel defrag. + +Fixes: 7b508037d4cac3 ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()") +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1520,6 +1520,11 @@ int btrfs_defrag_file(struct inode *inod + /* The cluster size 256K should always be page aligned */ + BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE)); + ++ if (btrfs_defrag_cancelled(fs_info)) { ++ ret = -EAGAIN; ++ break; ++ } ++ + /* We want the cluster end at page boundary when possible */ + cluster_end = (((cur >> PAGE_SHIFT) + + (SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1; diff --git a/queue-5.16/btrfs-defrag-fix-wrong-number-of-defragged-sectors.patch b/queue-5.16/btrfs-defrag-fix-wrong-number-of-defragged-sectors.patch new file mode 100644 index 00000000000..808eaf5b89c --- /dev/null +++ b/queue-5.16/btrfs-defrag-fix-wrong-number-of-defragged-sectors.patch @@ -0,0 +1,99 @@ +From 484167da77739a8d0e225008c48e697fd3f781ae Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Tue, 18 Jan 2022 15:19:04 +0800 +Subject: btrfs: defrag: fix wrong number of defragged sectors + +From: Qu Wenruo + +commit 484167da77739a8d0e225008c48e697fd3f781ae upstream. + +[BUG] +There are users using autodefrag mount option reporting obvious increase +in IO: + +> If I compare the write average (in total, I don't have it per process) +> when taking idle periods on the same machine: +> Linux 5.16: +> without autodefrag: ~ 10KiB/s +> with autodefrag: between 1 and 2MiB/s. +> +> Linux 5.15: +> with autodefrag:~ 10KiB/s (around the same as without +> autodefrag on 5.16) + +[CAUSE] +When autodefrag mount option is enabled, btrfs_defrag_file() will be +called with @max_sectors = BTRFS_DEFRAG_BATCH (1024) to limit how many +sectors we can defrag in one try. + +And then use the number of sectors defragged to determine if we need to +re-defrag. + +But commit b18c3ab2343d ("btrfs: defrag: introduce helper to defrag one +cluster") uses wrong unit to increase @sectors_defragged, which should +be in unit of sector, not byte. + +This means, if we have defragged any sector, then @sectors_defragged +will be >= sectorsize (normally 4096), which is larger than +BTRFS_DEFRAG_BATCH. + +This makes the @max_sectors check in defrag_one_cluster() to underflow, +rendering the whole @max_sectors check useless. + +Thus causing way more IO for autodefrag mount options, as now there is +no limit on how many sectors can really be defragged. + +[FIX] +Fix the problems by: + +- Use sector as unit when increasing @sectors_defragged + +- Include @sectors_defragged > @max_sectors case to break the loop + +- Add extra comment on the return value of btrfs_defrag_file() + +Reported-by: Anthony Ruhier +Fixes: b18c3ab2343d ("btrfs: defrag: introduce helper to defrag one cluster") +Link: https://lore.kernel.org/linux-btrfs/0a269612-e43f-da22-c5bc-b34b1b56ebe8@mailbox.org/ +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1416,8 +1416,8 @@ static int defrag_one_cluster(struct btr + list_for_each_entry(entry, &target_list, list) { + u32 range_len = entry->len; + +- /* Reached the limit */ +- if (max_sectors && max_sectors == *sectors_defragged) ++ /* Reached or beyond the limit */ ++ if (max_sectors && *sectors_defragged >= max_sectors) + break; + + if (max_sectors) +@@ -1439,7 +1439,8 @@ static int defrag_one_cluster(struct btr + extent_thresh, newer_than, do_compress); + if (ret < 0) + break; +- *sectors_defragged += range_len; ++ *sectors_defragged += range_len >> ++ inode->root->fs_info->sectorsize_bits; + } + out: + list_for_each_entry_safe(entry, tmp, &target_list, list) { +@@ -1458,6 +1459,9 @@ out: + * @newer_than: minimum transid to defrag + * @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode + * will be defragged. ++ * ++ * Return <0 for error. ++ * Return >=0 for the number of sectors defragged. + */ + int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, + struct btrfs_ioctl_defrag_range_args *range, diff --git a/queue-5.16/btrfs-defrag-properly-update-range-start-for-autodefrag.patch b/queue-5.16/btrfs-defrag-properly-update-range-start-for-autodefrag.patch new file mode 100644 index 00000000000..1846d32f3a9 --- /dev/null +++ b/queue-5.16/btrfs-defrag-properly-update-range-start-for-autodefrag.patch @@ -0,0 +1,119 @@ +From c080b4144b9dd3b7af838a194ffad3204ca15166 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Tue, 18 Jan 2022 19:53:52 +0800 +Subject: btrfs: defrag: properly update range->start for autodefrag + +From: Qu Wenruo + +commit c080b4144b9dd3b7af838a194ffad3204ca15166 upstream. + +[BUG] +After commit 7b508037d4ca ("btrfs: defrag: use defrag_one_cluster() to +implement btrfs_defrag_file()") autodefrag no longer properly re-defrag +the file from previously finished location. + +[CAUSE] +The recent refactoring of defrag only focuses on defrag ioctl subpage +support, doesn't take autodefrag into consideration. + +There are two problems involved which prevents autodefrag to restart its +scan: + +- No range.start update + Previously when one defrag target is found, range->start will be + updated to indicate where next search should start from. + + But now btrfs_defrag_file() doesn't update it anymore, making all + autodefrag to rescan from file offset 0. + + This would also make autodefrag to mark the same range dirty again and + again, causing extra IO. + +- No proper quick exit for defrag_one_cluster() + Currently if we reached or exceed @max_sectors limit, we just exit + defrag_one_cluster(), and let next defrag_one_cluster() call to do a + quick exit. + This makes @cur increase, thus no way to properly know which range is + defragged and which range is skipped. + +[FIX] +The fix involves two modifications: + +- Update range->start to next cluster start + This is a little different from the old behavior. + Previously range->start is updated to the next defrag target. + + But in the end, the behavior should still be pretty much the same, + as now we skip to next defrag target inside btrfs_defrag_file(). + + Thus if auto-defrag determines to re-scan, then we still do the skip, + just at a different timing. + +- Make defrag_one_cluster() to return >0 to indicate a quick exit + So that btrfs_defrag_file() can also do a quick exit, without + increasing @cur to the range end, and re-use @cur to update + @range->start. + +- Add comment for btrfs_defrag_file() to mention the range->start update + Currently only autodefrag utilize this behavior, as defrag ioctl won't + set @max_to_defrag parameter, thus unless interrupted it will always + try to defrag the whole range. + +Reported-by: Filipe Manana +Fixes: 7b508037d4ca ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()") +Link: https://lore.kernel.org/linux-btrfs/0a269612-e43f-da22-c5bc-b34b1b56ebe8@mailbox.org/ +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1417,8 +1417,10 @@ static int defrag_one_cluster(struct btr + u32 range_len = entry->len; + + /* Reached or beyond the limit */ +- if (max_sectors && *sectors_defragged >= max_sectors) ++ if (max_sectors && *sectors_defragged >= max_sectors) { ++ ret = 1; + break; ++ } + + if (max_sectors) + range_len = min_t(u32, range_len, +@@ -1461,7 +1463,10 @@ out: + * will be defragged. + * + * Return <0 for error. +- * Return >=0 for the number of sectors defragged. ++ * Return >=0 for the number of sectors defragged, and range->start will be updated ++ * to indicate the file offset where next defrag should be started at. ++ * (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without ++ * defragging all the range). + */ + int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, + struct btrfs_ioctl_defrag_range_args *range, +@@ -1554,10 +1559,19 @@ int btrfs_defrag_file(struct inode *inod + if (ret < 0) + break; + cur = cluster_end + 1; ++ if (ret > 0) { ++ ret = 0; ++ break; ++ } + } + + if (ra_allocated) + kfree(ra); ++ /* ++ * Update range.start for autodefrag, this will indicate where to start ++ * in next run. ++ */ ++ range->start = cur; + if (sectors_defragged) { + /* + * We have defragged some sectors, for compression case they diff --git a/queue-5.16/btrfs-fix-deadlock-when-reserving-space-during-defrag.patch b/queue-5.16/btrfs-fix-deadlock-when-reserving-space-during-defrag.patch new file mode 100644 index 00000000000..c4087270159 --- /dev/null +++ b/queue-5.16/btrfs-fix-deadlock-when-reserving-space-during-defrag.patch @@ -0,0 +1,96 @@ +From 0cb5950f3f3b51a4e8657d106f897f2b913e0586 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 20 Jan 2022 14:27:56 +0000 +Subject: btrfs: fix deadlock when reserving space during defrag + +From: Filipe Manana + +commit 0cb5950f3f3b51a4e8657d106f897f2b913e0586 upstream. + +When defragging we can end up collecting a range for defrag that has +already pages under delalloc (dirty), as long as the respective extent +map for their range is not mapped to a hole, a prealloc extent or +the extent map is from an old generation. + +Most of the time that is harmless from a functional perspective at +least, however it can result in a deadlock: + +1) At defrag_collect_targets() we find an extent map that meets all + requirements but there's delalloc for the range it covers, and we add + its range to list of ranges to defrag; + +2) The defrag_collect_targets() function is called at defrag_one_range(), + after it locked a range that overlaps the range of the extent map; + +3) At defrag_one_range(), while the range is still locked, we call + defrag_one_locked_target() for the range associated to the extent + map we collected at step 1); + +4) Then finally at defrag_one_locked_target() we do a call to + btrfs_delalloc_reserve_space(), which will reserve data and metadata + space. If the space reservations can not be satisfied right away, the + flusher might be kicked in and start flushing delalloc and wait for + the respective ordered extents to complete. If this happens we will + deadlock, because both flushing delalloc and finishing an ordered + extent, requires locking the range in the inode's io tree, which was + already locked at defrag_collect_targets(). + +So fix this by skipping extent maps for which there's already delalloc. + +Fixes: eb793cf857828d ("btrfs: defrag: introduce helper to collect target file extents") +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 31 ++++++++++++++++++++++++++++++- + 1 file changed, 30 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1188,6 +1188,35 @@ static int defrag_collect_targets(struct + goto next; + + /* ++ * Our start offset might be in the middle of an existing extent ++ * map, so take that into account. ++ */ ++ range_len = em->len - (cur - em->start); ++ /* ++ * If this range of the extent map is already flagged for delalloc, ++ * skip it, because: ++ * ++ * 1) We could deadlock later, when trying to reserve space for ++ * delalloc, because in case we can't immediately reserve space ++ * the flusher can start delalloc and wait for the respective ++ * ordered extents to complete. The deadlock would happen ++ * because we do the space reservation while holding the range ++ * locked, and starting writeback, or finishing an ordered ++ * extent, requires locking the range; ++ * ++ * 2) If there's delalloc there, it means there's dirty pages for ++ * which writeback has not started yet (we clean the delalloc ++ * flag when starting writeback and after creating an ordered ++ * extent). If we mark pages in an adjacent range for defrag, ++ * then we will have a larger contiguous range for delalloc, ++ * very likely resulting in a larger extent after writeback is ++ * triggered (except in a case of free space fragmentation). ++ */ ++ if (test_range_bit(&inode->io_tree, cur, cur + range_len - 1, ++ EXTENT_DELALLOC, 0, NULL)) ++ goto next; ++ ++ /* + * For do_compress case, we want to compress all valid file + * extents, thus no @extent_thresh or mergeable check. + */ +@@ -1195,7 +1224,7 @@ static int defrag_collect_targets(struct + goto add; + + /* Skip too large extent */ +- if (em->len >= extent_thresh) ++ if (range_len >= extent_thresh) + goto next; + + next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, diff --git a/queue-5.16/btrfs-fix-too-long-loop-when-defragging-a-1-byte-file.patch b/queue-5.16/btrfs-fix-too-long-loop-when-defragging-a-1-byte-file.patch new file mode 100644 index 00000000000..27ae0efc63d --- /dev/null +++ b/queue-5.16/btrfs-fix-too-long-loop-when-defragging-a-1-byte-file.patch @@ -0,0 +1,85 @@ +From 6b34cd8e175bfbf4f3f01b6d19eae18245e1a8cc Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 17 Jan 2022 16:28:29 +0000 +Subject: btrfs: fix too long loop when defragging a 1 byte file + +From: Filipe Manana + +commit 6b34cd8e175bfbf4f3f01b6d19eae18245e1a8cc upstream. + +When attempting to defrag a file with a single byte, we can end up in a +too long loop, which is nearly infinite because at btrfs_defrag_file() +we end up with the variable last_byte assigned with a value of +18446744073709551615 (which is (u64)-1). The problem comes from the fact +we end up doing: + + last_byte = round_up(last_byte, fs_info->sectorsize) - 1; + +So if last_byte was assigned 0, which is i_size - 1, we underflow and +end up with the value 18446744073709551615. + +This is trivial to reproduce and the following script triggers it: + + $ cat test.sh + #!/bin/bash + + DEV=/dev/sdj + MNT=/mnt/sdj + + mkfs.btrfs -f $DEV + mount $DEV $MNT + + echo -n "X" > $MNT/foobar + + btrfs filesystem defragment $MNT/foobar + + umount $MNT + +So fix this by not decrementing last_byte by 1 before doing the sector +size round up. Also, to make it easier to follow, make the round up right +after computing last_byte. + +Reported-by: Anthony Ruhier +Fixes: 7b508037d4cac3 ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()") +Link: https://lore.kernel.org/linux-btrfs/0a269612-e43f-da22-c5bc-b34b1b56ebe8@mailbox.org/ +CC: stable@vger.kernel.org # 5.16 +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1492,12 +1492,16 @@ int btrfs_defrag_file(struct inode *inod + + if (range->start + range->len > range->start) { + /* Got a specific range */ +- last_byte = min(isize, range->start + range->len) - 1; ++ last_byte = min(isize, range->start + range->len); + } else { + /* Defrag until file end */ +- last_byte = isize - 1; ++ last_byte = isize; + } + ++ /* Align the range */ ++ cur = round_down(range->start, fs_info->sectorsize); ++ last_byte = round_up(last_byte, fs_info->sectorsize) - 1; ++ + /* + * If we were not given a ra, allocate a readahead context. As + * readahead is just an optimization, defrag will work without it so +@@ -1510,10 +1514,6 @@ int btrfs_defrag_file(struct inode *inod + file_ra_state_init(ra, inode->i_mapping); + } + +- /* Align the range */ +- cur = round_down(range->start, fs_info->sectorsize); +- last_byte = round_up(last_byte, fs_info->sectorsize) - 1; +- + while (cur < last_byte) { + u64 cluster_end; + diff --git a/queue-5.16/btrfs-update-writeback-index-when-starting-defrag.patch b/queue-5.16/btrfs-update-writeback-index-when-starting-defrag.patch new file mode 100644 index 00000000000..b22739d6108 --- /dev/null +++ b/queue-5.16/btrfs-update-writeback-index-when-starting-defrag.patch @@ -0,0 +1,61 @@ +From 27cdfde181bcacd226c230b2fd831f6f5b8c215f Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 20 Jan 2022 17:41:17 +0000 +Subject: btrfs: update writeback index when starting defrag + +From: Filipe Manana + +commit 27cdfde181bcacd226c230b2fd831f6f5b8c215f upstream. + +When starting a defrag, we should update the writeback index of the +inode's mapping in case it currently has a value beyond the start of the +range we are defragging. This can help performance and often result in +getting less extents after writeback - for e.g., if the current value +of the writeback index sits somewhere in the middle of a range that +gets dirty by the defrag, then after writeback we can get two smaller +extents instead of a single, larger extent. + +We used to have this before the refactoring in 5.16, but it was removed +without any reason to do so. Originally it was added in kernel 3.1, by +commit 2a0f7f5769992b ("Btrfs: fix recursive auto-defrag"), in order to +fix a loop with autodefrag resulting in dirtying and writing pages over +and over, but some testing on current code did not show that happening, +at least with the test described in that commit. + +So add back the behaviour, as at the very least it is a nice to have +optimization. + +Fixes: 7b508037d4cac3 ("btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file()") +CC: stable@vger.kernel.org # 5.16 +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1511,6 +1511,7 @@ int btrfs_defrag_file(struct inode *inod + int compress_type = BTRFS_COMPRESS_ZLIB; + int ret = 0; + u32 extent_thresh = range->extent_thresh; ++ pgoff_t start_index; + + if (isize == 0) + return 0; +@@ -1552,6 +1553,14 @@ int btrfs_defrag_file(struct inode *inod + file_ra_state_init(ra, inode->i_mapping); + } + ++ /* ++ * Make writeback start from the beginning of the range, so that the ++ * defrag range can be written sequentially. ++ */ ++ start_index = cur >> PAGE_SHIFT; ++ if (start_index < inode->i_mapping->writeback_index) ++ inode->i_mapping->writeback_index = start_index; ++ + while (cur < last_byte) { + const unsigned long prev_sectors_defragged = sectors_defragged; + u64 cluster_end; diff --git a/queue-5.16/can-m_can-m_can_fifo_-read-write-don-t-read-or-write-from-to-fifo-if-length-is-0.patch b/queue-5.16/can-m_can-m_can_fifo_-read-write-don-t-read-or-write-from-to-fifo-if-length-is-0.patch new file mode 100644 index 00000000000..092441c4032 --- /dev/null +++ b/queue-5.16/can-m_can-m_can_fifo_-read-write-don-t-read-or-write-from-to-fifo-if-length-is-0.patch @@ -0,0 +1,60 @@ +From db72589c49fd260bfc99c7160c079675bc7417af Mon Sep 17 00:00:00 2001 +From: Marc Kleine-Budde +Date: Fri, 14 Jan 2022 15:35:01 +0100 +Subject: can: m_can: m_can_fifo_{read,write}: don't read or write from/to FIFO if length is 0 + +From: Marc Kleine-Budde + +commit db72589c49fd260bfc99c7160c079675bc7417af upstream. + +In order to optimize FIFO access, especially on m_can cores attached +to slow busses like SPI, in patch + +| e39381770ec9 ("can: m_can: Disable IRQs on FIFO bus errors") + +bulk read/write support has been added to the m_can_fifo_{read,write} +functions. + +That change leads to the tcan driver to call +regmap_bulk_{read,write}() with a length of 0 (for CAN frames with 0 +data length). regmap treats this as an error: + +| tcan4x5x spi1.0 tcan4x5x0: FIFO write returned -22 + +This patch fixes the problem by not calling the +cdev->ops->{read,write)_fifo() in case of a 0 length read/write. + +Fixes: e39381770ec9 ("can: m_can: Disable IRQs on FIFO bus errors") +Link: https://lore.kernel.org/all/20220114155751.2651888-1-mkl@pengutronix.de +Cc: stable@vger.kernel.org +Cc: Matt Kline +Cc: Chandrasekar Ramakrishnan +Reported-by: Michael Anochin +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/m_can/m_can.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/net/can/m_can/m_can.c ++++ b/drivers/net/can/m_can/m_can.c +@@ -336,6 +336,9 @@ m_can_fifo_read(struct m_can_classdev *c + u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE + + offset; + ++ if (val_count == 0) ++ return 0; ++ + return cdev->ops->read_fifo(cdev, addr_offset, val, val_count); + } + +@@ -346,6 +349,9 @@ m_can_fifo_write(struct m_can_classdev * + u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE + + offset; + ++ if (val_count == 0) ++ return 0; ++ + return cdev->ops->write_fifo(cdev, addr_offset, val, val_count); + } + diff --git a/queue-5.16/kvm-arm64-use-shadow-spsr_el1-when-injecting-exceptions-on-vhe.patch b/queue-5.16/kvm-arm64-use-shadow-spsr_el1-when-injecting-exceptions-on-vhe.patch new file mode 100644 index 00000000000..abe07506835 --- /dev/null +++ b/queue-5.16/kvm-arm64-use-shadow-spsr_el1-when-injecting-exceptions-on-vhe.patch @@ -0,0 +1,46 @@ +From 278583055a237270fac70518275ba877bf9e4013 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Fri, 21 Jan 2022 18:42:07 +0000 +Subject: KVM: arm64: Use shadow SPSR_EL1 when injecting exceptions on !VHE + +From: Marc Zyngier + +commit 278583055a237270fac70518275ba877bf9e4013 upstream. + +Injecting an exception into a guest with non-VHE is risky business. +Instead of writing in the shadow register for the switch code to +restore it, we override the CPU register instead. Which gets +overriden a few instructions later by said restore code. + +The result is that although the guest correctly gets the exception, +it will return to the original context in some random state, +depending on what was there the first place... Boo. + +Fix the issue by writing to the shadow register. The original code +is absolutely fine on VHE, as the state is already loaded, and writing +to the shadow register in that case would actually be a bug. + +Fixes: bb666c472ca2 ("KVM: arm64: Inject AArch64 exceptions from HYP") +Cc: stable@vger.kernel.org +Signed-off-by: Marc Zyngier +Reviewed-by: Fuad Tabba +Link: https://lore.kernel.org/r/20220121184207.423426-1-maz@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/hyp/exception.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/arm64/kvm/hyp/exception.c ++++ b/arch/arm64/kvm/hyp/exception.c +@@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg( + + static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) + { +- write_sysreg_el1(val, SYS_SPSR); ++ if (has_vhe()) ++ write_sysreg_el1(val, SYS_SPSR); ++ else ++ __vcpu_sys_reg(vcpu, SPSR_EL1) = val; + } + + static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) diff --git a/queue-5.16/kvm-arm64-vgic-v3-restrict-seis-workaround-to-known-broken-systems.patch b/queue-5.16/kvm-arm64-vgic-v3-restrict-seis-workaround-to-known-broken-systems.patch new file mode 100644 index 00000000000..fcd6e3ee010 --- /dev/null +++ b/queue-5.16/kvm-arm64-vgic-v3-restrict-seis-workaround-to-known-broken-systems.patch @@ -0,0 +1,76 @@ +From d11a327ed95dbec756b99cbfef2a7fd85c9eeb09 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Fri, 21 Jan 2022 21:07:47 +0000 +Subject: KVM: arm64: vgic-v3: Restrict SEIS workaround to known broken systems + +From: Marc Zyngier + +commit d11a327ed95dbec756b99cbfef2a7fd85c9eeb09 upstream. + +Contrary to what df652bcf1136 ("KVM: arm64: vgic-v3: Work around GICv3 +locally generated SErrors") was asserting, there is at least one other +system out there (Cavium ThunderX2) implementing SEIS, and not in +an obviously broken way. + +So instead of imposing the M1 workaround on an innocent bystander, +let's limit it to the two known broken Apple implementations. + +Fixes: df652bcf1136 ("KVM: arm64: vgic-v3: Work around GICv3 locally generated SErrors") +Reported-by: Ard Biesheuvel +Tested-by: Ard Biesheuvel +Acked-by: Ard Biesheuvel +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220122103912.795026-1-maz@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/hyp/vgic-v3-sr.c | 3 +++ + arch/arm64/kvm/vgic/vgic-v3.c | 17 +++++++++++++++-- + 2 files changed, 18 insertions(+), 2 deletions(-) + +--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c ++++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c +@@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct k + val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; + /* IDbits */ + val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; ++ /* SEIS */ ++ if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) ++ val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT); + /* A3V */ + val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; + /* EOImode */ +--- a/arch/arm64/kvm/vgic/vgic-v3.c ++++ b/arch/arm64/kvm/vgic/vgic-v3.c +@@ -609,6 +609,18 @@ static int __init early_gicv4_enable(cha + } + early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); + ++static const struct midr_range broken_seis[] = { ++ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), ++ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), ++ {}, ++}; ++ ++static bool vgic_v3_broken_seis(void) ++{ ++ return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) && ++ is_midr_in_range_list(read_cpuid_id(), broken_seis)); ++} ++ + /** + * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller + * @info: pointer to the GIC description +@@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_i + group1_trap = true; + } + +- if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) { +- kvm_info("GICv3 with locally generated SEI\n"); ++ if (vgic_v3_broken_seis()) { ++ kvm_info("GICv3 with broken locally generated SEI\n"); + ++ kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK; + group0_trap = true; + group1_trap = true; + if (ich_vtr_el2 & ICH_VTR_TDS_MASK) diff --git a/queue-5.16/net-sfp-ignore-disabled-sfp-node.patch b/queue-5.16/net-sfp-ignore-disabled-sfp-node.patch new file mode 100644 index 00000000000..86c10dbaf58 --- /dev/null +++ b/queue-5.16/net-sfp-ignore-disabled-sfp-node.patch @@ -0,0 +1,43 @@ +From 2148927e6ed43a1667baf7c2ae3e0e05a44b51a0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Beh=C3=BAn?= +Date: Wed, 19 Jan 2022 17:44:55 +0100 +Subject: net: sfp: ignore disabled SFP node +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Marek Behún + +commit 2148927e6ed43a1667baf7c2ae3e0e05a44b51a0 upstream. + +Commit ce0aa27ff3f6 ("sfp: add sfp-bus to bridge between network devices +and sfp cages") added code which finds SFP bus DT node even if the node +is disabled with status = "disabled". Because of this, when phylink is +created, it ends with non-null .sfp_bus member, even though the SFP +module is not probed (because the node is disabled). + +We need to ignore disabled SFP bus node. + +Fixes: ce0aa27ff3f6 ("sfp: add sfp-bus to bridge between network devices and sfp cages") +Signed-off-by: Marek Behún +Cc: stable@vger.kernel.org # 2203cbf2c8b5 ("net: sfp: move fwnode parsing into sfp-bus layer") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/sfp-bus.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/phy/sfp-bus.c ++++ b/drivers/net/phy/sfp-bus.c +@@ -651,6 +651,11 @@ struct sfp_bus *sfp_bus_find_fwnode(stru + else if (ret < 0) + return ERR_PTR(ret); + ++ if (!fwnode_device_is_available(ref.fwnode)) { ++ fwnode_handle_put(ref.fwnode); ++ return NULL; ++ } ++ + bus = sfp_bus_get(ref.fwnode); + fwnode_handle_put(ref.fwnode); + if (!bus) diff --git a/queue-5.16/net-stmmac-configure-ptp-clock-source-prior-to-ptp-initialization.patch b/queue-5.16/net-stmmac-configure-ptp-clock-source-prior-to-ptp-initialization.patch new file mode 100644 index 00000000000..9d8a148ac90 --- /dev/null +++ b/queue-5.16/net-stmmac-configure-ptp-clock-source-prior-to-ptp-initialization.patch @@ -0,0 +1,47 @@ +From 94c82de43e01ef5747a95e4a590880de863fe423 Mon Sep 17 00:00:00 2001 +From: Mohammad Athari Bin Ismail +Date: Wed, 26 Jan 2022 17:47:22 +0800 +Subject: net: stmmac: configure PTP clock source prior to PTP initialization + +From: Mohammad Athari Bin Ismail + +commit 94c82de43e01ef5747a95e4a590880de863fe423 upstream. + +For Intel platform, it is required to configure PTP clock source prior PTP +initialization in MAC. So, need to move ptp_clk_freq_config execution from +stmmac_ptp_register() to stmmac_init_ptp(). + +Fixes: 76da35dc99af ("stmmac: intel: Add PSE and PCH PTP clock source selection") +Cc: # 5.15.x +Signed-off-by: Mohammad Athari Bin Ismail +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ + drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 3 --- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -899,6 +899,9 @@ static int stmmac_init_ptp(struct stmmac + bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; + int ret; + ++ if (priv->plat->ptp_clk_freq_config) ++ priv->plat->ptp_clk_freq_config(priv); ++ + ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE); + if (ret) + return ret; +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +@@ -297,9 +297,6 @@ void stmmac_ptp_register(struct stmmac_p + { + int i; + +- if (priv->plat->ptp_clk_freq_config) +- priv->plat->ptp_clk_freq_config(priv); +- + for (i = 0; i < priv->dma_cap.pps_out_num; i++) { + if (i >= STMMAC_PPS_MAX) + break; diff --git a/queue-5.16/net-stmmac-skip-only-stmmac_ptp_register-when-resume-from-suspend.patch b/queue-5.16/net-stmmac-skip-only-stmmac_ptp_register-when-resume-from-suspend.patch new file mode 100644 index 00000000000..0fbbacdf4f8 --- /dev/null +++ b/queue-5.16/net-stmmac-skip-only-stmmac_ptp_register-when-resume-from-suspend.patch @@ -0,0 +1,75 @@ +From 0735e639f129dff455aeb91da291f5c578cc33db Mon Sep 17 00:00:00 2001 +From: Mohammad Athari Bin Ismail +Date: Wed, 26 Jan 2022 17:47:23 +0800 +Subject: net: stmmac: skip only stmmac_ptp_register when resume from suspend + +From: Mohammad Athari Bin Ismail + +commit 0735e639f129dff455aeb91da291f5c578cc33db upstream. + +When resume from suspend, besides skipping PTP registration, it also +skipping PTP HW initialization. This could cause PTP clock not able to +operate properly when resume from suspend. + +To fix this, only stmmac_ptp_register() is skipped when resume from +suspend. + +Fixes: fe1319291150 ("stmmac: Don't init ptp again when resume from suspend/hibernation") +Cc: # 5.15.x +Signed-off-by: Mohammad Athari Bin Ismail +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 20 +++++++++----------- + 1 file changed, 9 insertions(+), 11 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -924,8 +924,6 @@ static int stmmac_init_ptp(struct stmmac + priv->hwts_tx_en = 0; + priv->hwts_rx_en = 0; + +- stmmac_ptp_register(priv); +- + return 0; + } + +@@ -3248,7 +3246,7 @@ static int stmmac_fpe_start_wq(struct st + /** + * stmmac_hw_setup - setup mac in a usable state. + * @dev : pointer to the device structure. +- * @init_ptp: initialize PTP if set ++ * @ptp_register: register PTP if set + * Description: + * this is the main function to setup the HW in a usable state because the + * dma engine is reset, the core registers are configured (e.g. AXI, +@@ -3258,7 +3256,7 @@ static int stmmac_fpe_start_wq(struct st + * 0 on success and an appropriate (-)ve integer as defined in errno.h + * file on failure. + */ +-static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) ++static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) + { + struct stmmac_priv *priv = netdev_priv(dev); + u32 rx_cnt = priv->plat->rx_queues_to_use; +@@ -3315,13 +3313,13 @@ static int stmmac_hw_setup(struct net_de + + stmmac_mmc_setup(priv); + +- if (init_ptp) { +- ret = stmmac_init_ptp(priv); +- if (ret == -EOPNOTSUPP) +- netdev_warn(priv->dev, "PTP not supported by HW\n"); +- else if (ret) +- netdev_warn(priv->dev, "PTP init failed\n"); +- } ++ ret = stmmac_init_ptp(priv); ++ if (ret == -EOPNOTSUPP) ++ netdev_warn(priv->dev, "PTP not supported by HW\n"); ++ else if (ret) ++ netdev_warn(priv->dev, "PTP init failed\n"); ++ else if (ptp_register) ++ stmmac_ptp_register(priv); + + priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS; + diff --git a/queue-5.16/powerpc-bpf-update-ldimm64-instructions-during-extra-pass.patch b/queue-5.16/powerpc-bpf-update-ldimm64-instructions-during-extra-pass.patch new file mode 100644 index 00000000000..68b73878f69 --- /dev/null +++ b/queue-5.16/powerpc-bpf-update-ldimm64-instructions-during-extra-pass.patch @@ -0,0 +1,137 @@ +From f9320c49993ca3c0ec0f9a7026b313735306bb8b Mon Sep 17 00:00:00 2001 +From: "Naveen N. Rao" +Date: Thu, 6 Jan 2022 17:15:07 +0530 +Subject: powerpc/bpf: Update ldimm64 instructions during extra pass + +From: Naveen N. Rao + +commit f9320c49993ca3c0ec0f9a7026b313735306bb8b upstream. + +These instructions are updated after the initial JIT, so redo codegen +during the extra pass. Rename bpf_jit_fixup_subprog_calls() to clarify +that this is more than just subprog calls. + +Fixes: 69c087ba6225b5 ("bpf: Add bpf_for_each_map_elem() helper") +Cc: stable@vger.kernel.org # v5.15 +Signed-off-by: Naveen N. Rao +Tested-by: Jiri Olsa +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/7cc162af77ba918eb3ecd26ec9e7824bc44b1fae.1641468127.git.naveen.n.rao@linux.vnet.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/net/bpf_jit_comp.c | 29 +++++++++++++++++++++++------ + arch/powerpc/net/bpf_jit_comp32.c | 6 ++++++ + arch/powerpc/net/bpf_jit_comp64.c | 7 ++++++- + 3 files changed, 35 insertions(+), 7 deletions(-) + +--- a/arch/powerpc/net/bpf_jit_comp.c ++++ b/arch/powerpc/net/bpf_jit_comp.c +@@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void + memset32(area, BREAKPOINT_INSTRUCTION, size / 4); + } + +-/* Fix the branch target addresses for subprog calls */ +-static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, +- struct codegen_context *ctx, u32 *addrs) ++/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */ ++static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, ++ struct codegen_context *ctx, u32 *addrs) + { + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; +- int i, ret; ++ int i, j, ret; + + for (i = 0; i < fp->len; i++) { + /* +@@ -66,6 +66,23 @@ static int bpf_jit_fixup_subprog_calls(s + * of the JITed sequence remains unchanged. + */ + ctx->idx = tmp_idx; ++ } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) { ++ tmp_idx = ctx->idx; ++ ctx->idx = addrs[i] / 4; ++#ifdef CONFIG_PPC32 ++ PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm); ++ PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm); ++ for (j = ctx->idx - addrs[i] / 4; j < 4; j++) ++ EMIT(PPC_RAW_NOP()); ++#else ++ func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); ++ PPC_LI64(b2p[insn[i].dst_reg], func_addr); ++ /* overwrite rest with nops */ ++ for (j = ctx->idx - addrs[i] / 4; j < 5; j++) ++ EMIT(PPC_RAW_NOP()); ++#endif ++ ctx->idx = tmp_idx; ++ i++; + } + } + +@@ -193,13 +210,13 @@ skip_init_ctx: + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog +- * calls in the body. ++ * calls in the body, and ldimm64 instructions. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ +- bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); ++ bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; +--- a/arch/powerpc/net/bpf_jit_comp32.c ++++ b/arch/powerpc/net/bpf_jit_comp32.c +@@ -292,6 +292,8 @@ int bpf_jit_build_body(struct bpf_prog * + bool func_addr_fixed; + u64 func_addr; + u32 true_cond; ++ u32 tmp_idx; ++ int j; + + /* + * addrs[] maps a BPF bytecode address into a real offset from +@@ -839,8 +841,12 @@ int bpf_jit_build_body(struct bpf_prog * + * 16 byte instruction that uses two 'struct bpf_insn' + */ + case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ ++ tmp_idx = ctx->idx; + PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); + PPC_LI32(dst_reg, (u32)insn[i].imm); ++ /* padding to allow full 4 instructions for later patching */ ++ for (j = ctx->idx - tmp_idx; j < 4; j++) ++ EMIT(PPC_RAW_NOP()); + /* Adjust for two bpf instructions */ + addrs[++i] = ctx->idx * 4; + break; +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -318,6 +318,7 @@ int bpf_jit_build_body(struct bpf_prog * + u64 imm64; + u32 true_cond; + u32 tmp_idx; ++ int j; + + /* + * addrs[] maps a BPF bytecode address into a real offset from +@@ -806,9 +807,13 @@ emit_clear: + case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + imm64 = ((u64)(u32) insn[i].imm) | + (((u64)(u32) insn[i+1].imm) << 32); ++ tmp_idx = ctx->idx; ++ PPC_LI64(dst_reg, imm64); ++ /* padding to allow full 5 instructions for later patching */ ++ for (j = ctx->idx - tmp_idx; j < 5; j++) ++ EMIT(PPC_RAW_NOP()); + /* Adjust for two bpf instructions */ + addrs[++i] = ctx->idx * 4; +- PPC_LI64(dst_reg, imm64); + break; + + /* diff --git a/queue-5.16/powerpc32-bpf-fix-codegen-for-bpf-to-bpf-calls.patch b/queue-5.16/powerpc32-bpf-fix-codegen-for-bpf-to-bpf-calls.patch new file mode 100644 index 00000000000..333166c944e --- /dev/null +++ b/queue-5.16/powerpc32-bpf-fix-codegen-for-bpf-to-bpf-calls.patch @@ -0,0 +1,36 @@ +From fab07611fb2e6a15fac05c4583045ca5582fd826 Mon Sep 17 00:00:00 2001 +From: "Naveen N. Rao" +Date: Thu, 6 Jan 2022 17:15:06 +0530 +Subject: powerpc32/bpf: Fix codegen for bpf-to-bpf calls + +From: Naveen N. Rao + +commit fab07611fb2e6a15fac05c4583045ca5582fd826 upstream. + +Pad instructions emitted for BPF_CALL so that the number of instructions +generated does not change for different function addresses. This is +especially important for calls to other bpf functions, whose address +will only be known during extra pass. + +Fixes: 51c66ad849a703 ("powerpc/bpf: Implement extended BPF on PPC32") +Cc: stable@vger.kernel.org # v5.13+ +Signed-off-by: Naveen N. Rao +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/52d8fe51f7620a6f27f377791564d79d75463576.1641468127.git.naveen.n.rao@linux.vnet.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/net/bpf_jit_comp32.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/powerpc/net/bpf_jit_comp32.c ++++ b/arch/powerpc/net/bpf_jit_comp32.c +@@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *ima + + if (image && rel < 0x2000000 && rel >= -0x2000000) { + PPC_BL_ABS(func); ++ EMIT(PPC_RAW_NOP()); ++ EMIT(PPC_RAW_NOP()); ++ EMIT(PPC_RAW_NOP()); + } else { + /* Load function address into r0 */ + EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); diff --git a/queue-5.16/s390-hypfs-include-z-vm-guests-with-access-control-group-set.patch b/queue-5.16/s390-hypfs-include-z-vm-guests-with-access-control-group-set.patch new file mode 100644 index 00000000000..8a2a60e1939 --- /dev/null +++ b/queue-5.16/s390-hypfs-include-z-vm-guests-with-access-control-group-set.patch @@ -0,0 +1,51 @@ +From 663d34c8df98740f1e90241e78e456d00b3c6cad Mon Sep 17 00:00:00 2001 +From: Vasily Gorbik +Date: Thu, 20 Jan 2022 16:23:19 +0100 +Subject: s390/hypfs: include z/VM guests with access control group set + +From: Vasily Gorbik + +commit 663d34c8df98740f1e90241e78e456d00b3c6cad upstream. + +Currently if z/VM guest is allowed to retrieve hypervisor performance +data globally for all guests (privilege class B) the query is formed in a +way to include all guests but the group name is left empty. This leads to +that z/VM guests which have access control group set not being included +in the results (even local vm). + +Change the query group identifier from empty to "any" to retrieve +information about all guests from any groups (or without a group set). + +Cc: stable@vger.kernel.org +Fixes: 31cb4bd31a48 ("[S390] Hypervisor filesystem (s390_hypfs) for z/VM") +Reviewed-by: Gerald Schaefer +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/hypfs/hypfs_vm.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/s390/hypfs/hypfs_vm.c ++++ b/arch/s390/hypfs/hypfs_vm.c +@@ -20,6 +20,7 @@ + + static char local_guest[] = " "; + static char all_guests[] = "* "; ++static char *all_groups = all_guests; + static char *guest_query; + + struct diag2fc_data { +@@ -62,10 +63,11 @@ static int diag2fc(int size, char* query + + memcpy(parm_list.userid, query, NAME_LEN); + ASCEBC(parm_list.userid, NAME_LEN); +- parm_list.addr = (unsigned long) addr ; ++ memcpy(parm_list.aci_grp, all_groups, NAME_LEN); ++ ASCEBC(parm_list.aci_grp, NAME_LEN); ++ parm_list.addr = (unsigned long)addr; + parm_list.size = size; + parm_list.fmt = 0x02; +- memset(parm_list.aci_grp, 0x40, NAME_LEN); + rc = -1; + + diag_stat_inc(DIAG_STAT_X2FC); diff --git a/queue-5.16/s390-module-fix-loading-modules-with-a-lot-of-relocations.patch b/queue-5.16/s390-module-fix-loading-modules-with-a-lot-of-relocations.patch new file mode 100644 index 00000000000..f0c9568574d --- /dev/null +++ b/queue-5.16/s390-module-fix-loading-modules-with-a-lot-of-relocations.patch @@ -0,0 +1,84 @@ +From f3b7e73b2c6619884351a3a0a7468642f852b8a2 Mon Sep 17 00:00:00 2001 +From: Ilya Leoshkevich +Date: Wed, 19 Jan 2022 19:26:37 +0100 +Subject: s390/module: fix loading modules with a lot of relocations + +From: Ilya Leoshkevich + +commit f3b7e73b2c6619884351a3a0a7468642f852b8a2 upstream. + +If the size of the PLT entries generated by apply_rela() exceeds +64KiB, the first ones can no longer reach __jump_r1 with brc. Fix by +using brcl. An alternative solution is to add a __jump_r1 copy after +every 64KiB, however, the space savings are quite small and do not +justify the additional complexity. + +Fixes: f19fbd5ed642 ("s390: introduce execute-trampolines for branches") +Cc: stable@vger.kernel.org +Reported-by: Andrea Righi +Signed-off-by: Ilya Leoshkevich +Reviewed-by: Heiko Carstens +Cc: Vasily Gorbik +Cc: Christian Borntraeger +Signed-off-by: Heiko Carstens +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kernel/module.c | 37 ++++++++++++++++++------------------- + 1 file changed, 18 insertions(+), 19 deletions(-) + +--- a/arch/s390/kernel/module.c ++++ b/arch/s390/kernel/module.c +@@ -33,7 +33,7 @@ + #define DEBUGP(fmt , ...) + #endif + +-#define PLT_ENTRY_SIZE 20 ++#define PLT_ENTRY_SIZE 22 + + void *module_alloc(unsigned long size) + { +@@ -341,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, El + case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ + case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ + if (info->plt_initialized == 0) { +- unsigned int insn[5]; +- unsigned int *ip = me->core_layout.base + +- me->arch.plt_offset + +- info->plt_offset; +- +- insn[0] = 0x0d10e310; /* basr 1,0 */ +- insn[1] = 0x100a0004; /* lg 1,10(1) */ ++ unsigned char insn[PLT_ENTRY_SIZE]; ++ char *plt_base; ++ char *ip; ++ ++ plt_base = me->core_layout.base + me->arch.plt_offset; ++ ip = plt_base + info->plt_offset; ++ *(int *)insn = 0x0d10e310; /* basr 1,0 */ ++ *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */ + if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { +- unsigned int *ij; +- ij = me->core_layout.base + +- me->arch.plt_offset + +- me->arch.plt_size - PLT_ENTRY_SIZE; +- insn[2] = 0xa7f40000 + /* j __jump_r1 */ +- (unsigned int)(u16) +- (((unsigned long) ij - 8 - +- (unsigned long) ip) / 2); ++ char *jump_r1; ++ ++ jump_r1 = plt_base + me->arch.plt_size - ++ PLT_ENTRY_SIZE; ++ /* brcl 0xf,__jump_r1 */ ++ *(short *)&insn[8] = 0xc0f4; ++ *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2; + } else { +- insn[2] = 0x07f10000; /* br %r1 */ ++ *(int *)&insn[8] = 0x07f10000; /* br %r1 */ + } +- insn[3] = (unsigned int) (val >> 32); +- insn[4] = (unsigned int) val; ++ *(long *)&insn[14] = val; + + write(ip, insn, sizeof(insn)); + info->plt_initialized = 1; diff --git a/queue-5.16/s390-nmi-handle-guarded-storage-validity-failures-for-kvm-guests.patch b/queue-5.16/s390-nmi-handle-guarded-storage-validity-failures-for-kvm-guests.patch new file mode 100644 index 00000000000..1453d0c3677 --- /dev/null +++ b/queue-5.16/s390-nmi-handle-guarded-storage-validity-failures-for-kvm-guests.patch @@ -0,0 +1,57 @@ +From 1ea1d6a847d2b1d17fefd9196664b95f052a0775 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Thu, 13 Jan 2022 11:44:19 +0100 +Subject: s390/nmi: handle guarded storage validity failures for KVM guests + +From: Christian Borntraeger + +commit 1ea1d6a847d2b1d17fefd9196664b95f052a0775 upstream. + +machine check validity bits reflect the state of the machine check. If a +guest does not make use of guarded storage, the validity bit might be +off. We can not use the host CR bit to decide if the validity bit must +be on. So ignore "invalid" guarded storage controls for KVM guests in +the host and rely on the machine check being forwarded to the guest. If +no other errors happen from a host perspective everything is fine and no +process must be killed and the host can continue to run. + +Cc: stable@vger.kernel.org +Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest") +Reported-by: Carsten Otte +Signed-off-by: Christian Borntraeger +Tested-by: Carsten Otte +Reviewed-by: Heiko Carstens +Signed-off-by: Heiko Carstens +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kernel/nmi.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/arch/s390/kernel/nmi.c ++++ b/arch/s390/kernel/nmi.c +@@ -316,11 +316,21 @@ static int notrace s390_validate_registe + if (cr2.gse) { + if (!mci.gs) { + /* +- * Guarded storage register can't be restored and +- * the current processes uses guarded storage. +- * It has to be terminated. ++ * 2 cases: ++ * - machine check in kernel or userspace ++ * - machine check while running SIE (KVM guest) ++ * For kernel or userspace the userspace values of ++ * guarded storage control can not be recreated, the ++ * process must be terminated. ++ * For SIE the guest values of guarded storage can not ++ * be recreated. This is either due to a bug or due to ++ * GS being disabled in the guest. The guest will be ++ * notified by KVM code and the guests machine check ++ * handling must take care of this. The host values ++ * are saved by KVM and are not affected. + */ +- kill_task = 1; ++ if (!test_cpu_flag(CIF_MCCK_GUEST)) ++ kill_task = 1; + } else { + load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area); + } diff --git a/queue-5.16/s390-nmi-handle-vector-validity-failures-for-kvm-guests.patch b/queue-5.16/s390-nmi-handle-vector-validity-failures-for-kvm-guests.patch new file mode 100644 index 00000000000..1a8ab5c51d2 --- /dev/null +++ b/queue-5.16/s390-nmi-handle-vector-validity-failures-for-kvm-guests.patch @@ -0,0 +1,42 @@ +From f094a39c6ba168f2df1edfd1731cca377af5f442 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Mon, 17 Jan 2022 18:40:32 +0100 +Subject: s390/nmi: handle vector validity failures for KVM guests + +From: Christian Borntraeger + +commit f094a39c6ba168f2df1edfd1731cca377af5f442 upstream. + +The machine check validity bit tells about the context. If a KVM guest +was running the bit tells about the guest validity and the host state is +not affected. As a guest can disable the guest validity this might +result in unwanted host errors on machine checks. + +Cc: stable@vger.kernel.org +Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest") +Signed-off-by: Christian Borntraeger +Reviewed-by: Heiko Carstens +Signed-off-by: Heiko Carstens +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kernel/nmi.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/arch/s390/kernel/nmi.c ++++ b/arch/s390/kernel/nmi.c +@@ -273,7 +273,14 @@ static int notrace s390_validate_registe + /* Validate vector registers */ + union ctlreg0 cr0; + +- if (!mci.vr) { ++ /* ++ * The vector validity must only be checked if not running a ++ * KVM guest. For KVM guests the machine check is forwarded by ++ * KVM and it is the responsibility of the guest to take ++ * appropriate actions. The host vector or FPU values have been ++ * saved by KVM and will be restored by KVM. ++ */ ++ if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) { + /* + * Vector registers can't be restored. If the kernel + * currently uses vector registers the system is diff --git a/queue-5.16/scsi-zfcp-fix-failed-recovery-on-gone-remote-port-with-non-npiv-fcp-devices.patch b/queue-5.16/scsi-zfcp-fix-failed-recovery-on-gone-remote-port-with-non-npiv-fcp-devices.patch new file mode 100644 index 00000000000..470f6a4b6e1 --- /dev/null +++ b/queue-5.16/scsi-zfcp-fix-failed-recovery-on-gone-remote-port-with-non-npiv-fcp-devices.patch @@ -0,0 +1,111 @@ +From 8c9db6679be4348b8aae108e11d4be2f83976e30 Mon Sep 17 00:00:00 2001 +From: Steffen Maier +Date: Tue, 18 Jan 2022 17:58:03 +0100 +Subject: scsi: zfcp: Fix failed recovery on gone remote port with non-NPIV FCP devices + +From: Steffen Maier + +commit 8c9db6679be4348b8aae108e11d4be2f83976e30 upstream. + +Suppose we have an environment with a number of non-NPIV FCP devices +(virtual HBAs / FCP devices / zfcp "adapter"s) sharing the same physical +FCP channel (HBA port) and its I_T nexus. Plus a number of storage target +ports zoned to such shared channel. Now one target port logs out of the +fabric causing an RSCN. Zfcp reacts with an ADISC ELS and subsequent port +recovery depending on the ADISC result. This happens on all such FCP +devices (in different Linux images) concurrently as they all receive a copy +of this RSCN. In the following we look at one of those FCP devices. + +Requests other than FSF_QTCB_FCP_CMND can be slow until they get a +response. + +Depending on which requests are affected by slow responses, there are +different recovery outcomes. Here we want to fix failed recoveries on port +or adapter level by avoiding recovery requests that can be slow. + +We need the cached N_Port_ID for the remote port "link" test with ADISC. +Just before sending the ADISC, we now intentionally forget the old cached +N_Port_ID. The idea is that on receiving an RSCN for a port, we have to +assume that any cached information about this port is stale. This forces a +fresh new GID_PN [FC-GS] nameserver lookup on any subsequent recovery for +the same port. Since we typically can still communicate with the nameserver +efficiently, we now reach steady state quicker: Either the nameserver still +does not know about the port so we stop recovery, or the nameserver already +knows the port potentially with a new N_Port_ID and we can successfully and +quickly perform open port recovery. For the one case, where ADISC returns +successfully, we re-initialize port->d_id because that case does not +involve any port recovery. + +This also solves a problem if the storage WWPN quickly logs into the fabric +again but with a different N_Port_ID. Such as on virtual WWPN takeover +during target NPIV failover. +[https://www.redbooks.ibm.com/abstracts/redp5477.html] In that case the +RSCN from the storage FDISC was ignored by zfcp and we could not +successfully recover the failover. On some later failback on the storage, +we could have been lucky if the virtual WWPN got the same old N_Port_ID +from the SAN switch as we still had cached. Then the related RSCN +triggered a successful port reopen recovery. However, there is no +guarantee to get the same N_Port_ID on NPIV FDISC. + +Even though NPIV-enabled FCP devices are not affected by this problem, this +code change optimizes recovery time for gone remote ports as a side effect. +The timely drop of cached N_Port_IDs prevents unnecessary slow open port +attempts. + +While the problem might have been in code before v2.6.32 commit +799b76d09aee ("[SCSI] zfcp: Decouple gid_pn requests from erp") this fix +depends on the gid_pn_work introduced with that commit, so we mark it as +culprit to satisfy fix dependencies. + +Note: Point-to-point remote port is already handled separately and gets its +N_Port_ID from the cached peer_d_id. So resetting port->d_id in general +does not affect PtP. + +Link: https://lore.kernel.org/r/20220118165803.3667947-1-maier@linux.ibm.com +Fixes: 799b76d09aee ("[SCSI] zfcp: Decouple gid_pn requests from erp") +Cc: #2.6.32+ +Suggested-by: Benjamin Block +Reviewed-by: Benjamin Block +Signed-off-by: Steffen Maier +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/scsi/zfcp_fc.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/drivers/s390/scsi/zfcp_fc.c ++++ b/drivers/s390/scsi/zfcp_fc.c +@@ -521,6 +521,8 @@ static void zfcp_fc_adisc_handler(void * + goto out; + } + ++ /* re-init to undo drop from zfcp_fc_adisc() */ ++ port->d_id = ntoh24(adisc_resp->adisc_port_id); + /* port is good, unblock rport without going through erp */ + zfcp_scsi_schedule_rport_register(port); + out: +@@ -534,6 +536,7 @@ static int zfcp_fc_adisc(struct zfcp_por + struct zfcp_fc_req *fc_req; + struct zfcp_adapter *adapter = port->adapter; + struct Scsi_Host *shost = adapter->scsi_host; ++ u32 d_id; + int ret; + + fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC); +@@ -558,7 +561,15 @@ static int zfcp_fc_adisc(struct zfcp_por + fc_req->u.adisc.req.adisc_cmd = ELS_ADISC; + hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost)); + +- ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els, ++ d_id = port->d_id; /* remember as destination for send els below */ ++ /* ++ * Force fresh GID_PN lookup on next port recovery. ++ * Must happen after request setup and before sending request, ++ * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler(). ++ */ ++ port->d_id = 0; ++ ++ ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els, + ZFCP_FC_CTELS_TMO); + if (ret) + kmem_cache_free(zfcp_fc_req_cache, fc_req); diff --git a/queue-5.16/series b/queue-5.16/series new file mode 100644 index 00000000000..cb4f51e4e96 --- /dev/null +++ b/queue-5.16/series @@ -0,0 +1,27 @@ +bluetooth-refactor-malicious-adv-data-check.patch +btrfs-fix-too-long-loop-when-defragging-a-1-byte-file.patch +btrfs-allow-defrag-to-be-interruptible.patch +btrfs-defrag-fix-wrong-number-of-defragged-sectors.patch +btrfs-defrag-properly-update-range-start-for-autodefrag.patch +btrfs-fix-deadlock-when-reserving-space-during-defrag.patch +btrfs-add-back-missing-dirty-page-rate-limiting-to-defrag.patch +btrfs-update-writeback-index-when-starting-defrag.patch +can-m_can-m_can_fifo_-read-write-don-t-read-or-write-from-to-fifo-if-length-is-0.patch +net-sfp-ignore-disabled-sfp-node.patch +net-stmmac-configure-ptp-clock-source-prior-to-ptp-initialization.patch +net-stmmac-skip-only-stmmac_ptp_register-when-resume-from-suspend.patch +arm-9179-1-uaccess-avoid-alignment-faults-in-copy__kernel_nofault.patch +arm-9180-1-thumb2-align-alt_up-sections-in-modules-sufficiently.patch +kvm-arm64-use-shadow-spsr_el1-when-injecting-exceptions-on-vhe.patch +kvm-arm64-vgic-v3-restrict-seis-workaround-to-known-broken-systems.patch +s390-module-fix-loading-modules-with-a-lot-of-relocations.patch +s390-hypfs-include-z-vm-guests-with-access-control-group-set.patch +s390-nmi-handle-guarded-storage-validity-failures-for-kvm-guests.patch +s390-nmi-handle-vector-validity-failures-for-kvm-guests.patch +bpf-guard-against-accessing-null-pt_regs-in-bpf_get_task_stack.patch +powerpc32-bpf-fix-codegen-for-bpf-to-bpf-calls.patch +powerpc-bpf-update-ldimm64-instructions-during-extra-pass.patch +ucount-make-get_ucount-a-safe-get_user-replacement.patch +scsi-zfcp-fix-failed-recovery-on-gone-remote-port-with-non-npiv-fcp-devices.patch +udf-restore-i_lenalloc-when-inode-expansion-fails.patch +udf-fix-null-ptr-deref-when-converting-from-inline-format.patch diff --git a/queue-5.16/ucount-make-get_ucount-a-safe-get_user-replacement.patch b/queue-5.16/ucount-make-get_ucount-a-safe-get_user-replacement.patch new file mode 100644 index 00000000000..09f6acc7321 --- /dev/null +++ b/queue-5.16/ucount-make-get_ucount-a-safe-get_user-replacement.patch @@ -0,0 +1,57 @@ +From f9d87929d451d3e649699d0f1d74f71f77ad38f5 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Mon, 24 Jan 2022 12:46:50 -0600 +Subject: ucount: Make get_ucount a safe get_user replacement + +From: Eric W. Biederman + +commit f9d87929d451d3e649699d0f1d74f71f77ad38f5 upstream. + +When the ucount code was refactored to create get_ucount it was missed +that some of the contexts in which a rlimit is kept elevated can be +the only reference to the user/ucount in the system. + +Ordinary ucount references exist in places that also have a reference +to the user namspace, but in POSIX message queues, the SysV shm code, +and the SIGPENDING code there is no independent user namespace +reference. + +Inspection of the the user_namespace show no instance of circular +references between struct ucounts and the user_namespace. So +hold a reference from struct ucount to i's user_namespace to +resolve this problem. + +Link: https://lore.kernel.org/lkml/YZV7Z+yXbsx9p3JN@fixkernel.com/ +Reported-by: Qian Cai +Reported-by: Mathias Krause +Tested-by: Mathias Krause +Reviewed-by: Mathias Krause +Reviewed-by: Alexey Gladkov +Fixes: d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts") +Fixes: 6e52a9f0532f ("Reimplement RLIMIT_MSGQUEUE on top of ucounts") +Fixes: d7c9e99aee48 ("Reimplement RLIMIT_MEMLOCK on top of ucounts") +Cc: stable@vger.kernel.org +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman +--- + kernel/ucount.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/ucount.c ++++ b/kernel/ucount.c +@@ -190,6 +190,7 @@ struct ucounts *alloc_ucounts(struct use + kfree(new); + } else { + hlist_add_head(&new->node, hashent); ++ get_user_ns(new->ns); + spin_unlock_irq(&ucounts_lock); + return new; + } +@@ -210,6 +211,7 @@ void put_ucounts(struct ucounts *ucounts + if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { + hlist_del_init(&ucounts->node); + spin_unlock_irqrestore(&ucounts_lock, flags); ++ put_user_ns(ucounts->ns); + kfree(ucounts); + } + } diff --git a/queue-5.16/udf-fix-null-ptr-deref-when-converting-from-inline-format.patch b/queue-5.16/udf-fix-null-ptr-deref-when-converting-from-inline-format.patch new file mode 100644 index 00000000000..279c2f6cad6 --- /dev/null +++ b/queue-5.16/udf-fix-null-ptr-deref-when-converting-from-inline-format.patch @@ -0,0 +1,64 @@ +From 7fc3b7c2981bbd1047916ade327beccb90994eee Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Mon, 17 Jan 2022 18:22:13 +0100 +Subject: udf: Fix NULL ptr deref when converting from inline format + +From: Jan Kara + +commit 7fc3b7c2981bbd1047916ade327beccb90994eee upstream. + +udf_expand_file_adinicb() calls directly ->writepage to write data +expanded into a page. This however misses to setup inode for writeback +properly and so we can crash on inode->i_wb dereference when submitting +page for IO like: + + BUG: kernel NULL pointer dereference, address: 0000000000000158 + #PF: supervisor read access in kernel mode +... + + __folio_start_writeback+0x2ac/0x350 + __block_write_full_page+0x37d/0x490 + udf_expand_file_adinicb+0x255/0x400 [udf] + udf_file_write_iter+0xbe/0x1b0 [udf] + new_sync_write+0x125/0x1c0 + vfs_write+0x28e/0x400 + +Fix the problem by marking the page dirty and going through the standard +writeback path to write the page. Strictly speaking we would not even +have to write the page but we want to catch e.g. ENOSPC errors early. + +Reported-by: butt3rflyh4ck +CC: stable@vger.kernel.org +Fixes: 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") +Reviewed-by: Christoph Hellwig +Signed-off-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman +--- + fs/udf/inode.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/fs/udf/inode.c ++++ b/fs/udf/inode.c +@@ -258,10 +258,6 @@ int udf_expand_file_adinicb(struct inode + char *kaddr; + struct udf_inode_info *iinfo = UDF_I(inode); + int err; +- struct writeback_control udf_wbc = { +- .sync_mode = WB_SYNC_NONE, +- .nr_to_write = 1, +- }; + + WARN_ON_ONCE(!inode_is_locked(inode)); + if (!iinfo->i_lenAlloc) { +@@ -305,8 +301,10 @@ int udf_expand_file_adinicb(struct inode + iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; + /* from now on we have normal address_space methods */ + inode->i_data.a_ops = &udf_aops; ++ set_page_dirty(page); ++ unlock_page(page); + up_write(&iinfo->i_data_sem); +- err = inode->i_data.a_ops->writepage(page, &udf_wbc); ++ err = filemap_fdatawrite(inode->i_mapping); + if (err) { + /* Restore everything back so that we don't lose data... */ + lock_page(page); diff --git a/queue-5.16/udf-restore-i_lenalloc-when-inode-expansion-fails.patch b/queue-5.16/udf-restore-i_lenalloc-when-inode-expansion-fails.patch new file mode 100644 index 00000000000..793f477c8af --- /dev/null +++ b/queue-5.16/udf-restore-i_lenalloc-when-inode-expansion-fails.patch @@ -0,0 +1,34 @@ +From ea8569194b43f0f01f0a84c689388542c7254a1f Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Tue, 18 Jan 2022 09:57:25 +0100 +Subject: udf: Restore i_lenAlloc when inode expansion fails + +From: Jan Kara + +commit ea8569194b43f0f01f0a84c689388542c7254a1f upstream. + +When we fail to expand inode from inline format to a normal format, we +restore inode to contain the original inline formatting but we forgot to +set i_lenAlloc back. The mismatch between i_lenAlloc and i_size was then +causing further problems such as warnings and lost data down the line. + +Reported-by: butt3rflyh4ck +CC: stable@vger.kernel.org +Fixes: 7e49b6f2480c ("udf: Convert UDF to new truncate calling sequence") +Reviewed-by: Christoph Hellwig +Signed-off-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman +--- + fs/udf/inode.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/udf/inode.c ++++ b/fs/udf/inode.c +@@ -317,6 +317,7 @@ int udf_expand_file_adinicb(struct inode + unlock_page(page); + iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; + inode->i_data.a_ops = &udf_adinicb_aops; ++ iinfo->i_lenAlloc = inode->i_size; + up_write(&iinfo->i_data_sem); + } + put_page(page); -- 2.47.2