]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.18
authorSasha Levin <sashal@kernel.org>
Mon, 15 Aug 2022 05:56:00 +0000 (01:56 -0400)
committerSasha Levin <sashal@kernel.org>
Mon, 15 Aug 2022 05:56:00 +0000 (01:56 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
110 files changed:
queue-5.18/__follow_mount_rcu-verify-that-mount_lock-remains-un.patch [new file with mode: 0644]
queue-5.18/acpi-cppc-do-not-prevent-cppc-from-working-in-the-fu.patch [new file with mode: 0644]
queue-5.18/batman-adv-tracing-use-the-new-__vstring-helper.patch [new file with mode: 0644]
queue-5.18/block-add-a-bdev_max_zone_append_sectors-helper.patch [new file with mode: 0644]
queue-5.18/block-add-bdev_max_segments-helper.patch [new file with mode: 0644]
queue-5.18/block-don-t-allow-the-same-type-rq_qos-add-more-than.patch [new file with mode: 0644]
queue-5.18/block-serialize-all-debugfs-operations-using-q-debug.patch [new file with mode: 0644]
queue-5.18/btrfs-ensure-pages-are-unlocked-on-cow_file_range-fa.patch [new file with mode: 0644]
queue-5.18/btrfs-fix-error-handling-of-fallback-uncompress-writ.patch [new file with mode: 0644]
queue-5.18/btrfs-let-can_allocate_chunk-return-error.patch [new file with mode: 0644]
queue-5.18/btrfs-make-the-bg_reclaim_threshold-per-space-info.patch [new file with mode: 0644]
queue-5.18/btrfs-properly-flag-filesystem-with-btrfs_feature_in.patch [new file with mode: 0644]
queue-5.18/btrfs-replace-btrfs_max_extent_size-with-fs_info-max.patch [new file with mode: 0644]
queue-5.18/btrfs-reset-block-group-chunk-force-if-we-have-to-wa.patch [new file with mode: 0644]
queue-5.18/btrfs-store-chunk-size-in-space-info-struct.patch [new file with mode: 0644]
queue-5.18/btrfs-tree-log-make-the-return-value-for-log-syncing.patch [new file with mode: 0644]
queue-5.18/btrfs-zoned-activate-metadata-block-group-on-flush_s.patch [new file with mode: 0644]
queue-5.18/btrfs-zoned-activate-necessary-block-group.patch [new file with mode: 0644]
queue-5.18/btrfs-zoned-disable-metadata-overcommit-for-zoned.patch [new file with mode: 0644]
queue-5.18/btrfs-zoned-finish-least-available-block-group-on-da.patch [new file with mode: 0644]
queue-5.18/btrfs-zoned-introduce-btrfs_zoned_bg_is_full.patch [new file with mode: 0644]
queue-5.18/btrfs-zoned-introduce-space_info-active_total_bytes.patch [new file with mode: 0644]
queue-5.18/btrfs-zoned-revive-max_zone_append_bytes.patch [new file with mode: 0644]
queue-5.18/btrfs-zoned-wait-until-zone-is-finished-when-allocat.patch [new file with mode: 0644]
queue-5.18/btrfs-zoned-write-out-partially-allocated-region.patch [new file with mode: 0644]
queue-5.18/crypto-blake2s-remove-shash-module.patch [new file with mode: 0644]
queue-5.18/dm-raid-fix-address-sanitizer-warning-in-raid_resume.patch [new file with mode: 0644]
queue-5.18/dm-raid-fix-address-sanitizer-warning-in-raid_status.patch [new file with mode: 0644]
queue-5.18/dm-thin-fix-use-after-free-crash-in-dm_sm_register_t.patch [new file with mode: 0644]
queue-5.18/dm-writecache-set-a-default-max_writeback_jobs.patch [new file with mode: 0644]
queue-5.18/drivers-base-fix-userspace-break-from-using-bin_attr.patch [new file with mode: 0644]
queue-5.18/drm-dp-mst-read-the-extended-dpcd-capabilities-durin.patch [new file with mode: 0644]
queue-5.18/drm-mediatek-keep-dsi-as-lp00-before-dcs-cmds-transf.patch [new file with mode: 0644]
queue-5.18/drm-vc4-drv-adopt-the-dma-configuration-from-the-hvs.patch [new file with mode: 0644]
queue-5.18/ext4-add-ext4_inode_has_xattr_space-macro-in-xattr.h.patch [new file with mode: 0644]
queue-5.18/ext4-check-if-directory-block-is-within-i_size.patch [new file with mode: 0644]
queue-5.18/ext4-correct-max_inline_xattr_value_size-computing.patch [new file with mode: 0644]
queue-5.18/ext4-correct-the-misjudgment-in-ext4_iget_extra_inod.patch [new file with mode: 0644]
queue-5.18/ext4-fix-extent-status-tree-race-in-writeback-error-.patch [new file with mode: 0644]
queue-5.18/ext4-fix-race-when-reusing-xattr-blocks.patch [new file with mode: 0644]
queue-5.18/ext4-fix-use-after-free-in-ext4_xattr_set_entry.patch [new file with mode: 0644]
queue-5.18/ext4-fix-warning-in-ext4_iomap_begin-as-race-between.patch [new file with mode: 0644]
queue-5.18/ext4-make-sure-ext4_append-always-allocates-new-bloc.patch [new file with mode: 0644]
queue-5.18/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch [new file with mode: 0644]
queue-5.18/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch [new file with mode: 0644]
queue-5.18/ext4-update-s_overhead_clusters-in-the-superblock-du.patch [new file with mode: 0644]
queue-5.18/ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch [new file with mode: 0644]
queue-5.18/firmware-arm_scpi-ensure-scpi_info-is-not-assigned-i.patch [new file with mode: 0644]
queue-5.18/ftrace-x86-add-back-ftrace_expected-assignment.patch-2936 [new file with mode: 0644]
queue-5.18/hugetlb_cgroup-fix-wrong-hugetlb-cgroup-numa-stat.patch [new file with mode: 0644]
queue-5.18/input-gscps2-check-return-value-of-ioremap-in-gscps2.patch [new file with mode: 0644]
queue-5.18/intel_idle-add-alderlake-support.patch [new file with mode: 0644]
queue-5.18/intel_idle-make-spr-c1-and-c1e-be-independent.patch [new file with mode: 0644]
queue-5.18/intel_th-pci-add-meteor-lake-p-support.patch [new file with mode: 0644]
queue-5.18/intel_th-pci-add-raptor-lake-s-cpu-support.patch [new file with mode: 0644]
queue-5.18/intel_th-pci-add-raptor-lake-s-pch-support.patch [new file with mode: 0644]
queue-5.18/iommu-vt-d-avoid-invalid-memory-access-via-node_onli.patch [new file with mode: 0644]
queue-5.18/kexec-clean-up-arch_kexec_kernel_verify_sig.patch [new file with mode: 0644]
queue-5.18/kexec-keys-s390-make-use-of-built-in-and-secondary-k.patch [new file with mode: 0644]
queue-5.18/kexec_file-drop-weak-attribute-from-functions.patch [new file with mode: 0644]
queue-5.18/keys-asymmetric-enforce-sm2-signature-use-pkey-algo.patch [new file with mode: 0644]
queue-5.18/ksmbd-add-smbd-max-io-size-parameter.patch [new file with mode: 0644]
queue-5.18/ksmbd-fix-wrong-smbd-max-read-write-size-check.patch [new file with mode: 0644]
queue-5.18/ksmbd-prevent-out-of-bound-read-for-smb2_write.patch [new file with mode: 0644]
queue-5.18/ksmbd-smbd-change-prototypes-of-rdma-read-write-rela.patch [new file with mode: 0644]
queue-5.18/ksmbd-smbd-introduce-read-write-credits-for-rdma-rea.patch [new file with mode: 0644]
queue-5.18/ksmbd-validate-length-in-smb2_write.patch [new file with mode: 0644]
queue-5.18/kvm-nvmx-attempt-to-load-perf_global_ctrl-on-nvmx-xf.patch [new file with mode: 0644]
queue-5.18/kvm-set_msr_mce-permit-guests-to-ignore-single-bit-e.patch [new file with mode: 0644]
queue-5.18/kvm-vmx-add-helper-to-check-if-the-guest-pmu-has-per.patch [new file with mode: 0644]
queue-5.18/kvm-vmx-mark-all-perf_global_-ovf-_ctrl-bits-reserve.patch [new file with mode: 0644]
queue-5.18/kvm-x86-pmu-ignore-pmu-global_ctrl-check-if-vpmu-doe.patch [new file with mode: 0644]
queue-5.18/kvm-x86-pmu-introduce-the-ctrl_mask-value-for-fixed-.patch [new file with mode: 0644]
queue-5.18/kvm-x86-signal-gp-not-eperm-on-bad-wrmsr-mci_ctl-sta.patch [new file with mode: 0644]
queue-5.18/locking-csd_lock-change-csdlock_debug-from-early_par.patch [new file with mode: 0644]
queue-5.18/mm-damon-reclaim-fix-potential-memory-leak-in-damon_.patch [new file with mode: 0644]
queue-5.18/net-9p-initialize-the-iounit-field-during-fid-creati.patch [new file with mode: 0644]
queue-5.18/pci-aer-iterate-over-error-counters-instead-of-error.patch [new file with mode: 0644]
queue-5.18/pci-qcom-power-on-phy-before-ipq8074-dbi-register-ac.patch [new file with mode: 0644]
queue-5.18/powerpc-powernv-kvm-use-darn-for-h_random-on-power9.patch [new file with mode: 0644]
queue-5.18/s390-unwind-fix-fgraph-return-address-recovery.patch [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-edif-fix-dropped-ike-message.patch [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-fix-crash-due-to-stale-srb-access-aroun.patch [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch-4818 [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci.patch [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-fix-excessive-i-o-error-messages-by-def.patch [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-fix-imbalance-vha-vref_count.patch-27970 [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-pe.patch [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-d.patch [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-fix-losing-target-when-it-reappears-dur.patch [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-fix-response-queue-handler-reading-stal.patch [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch-18430 [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-update-manufacturer-details.patch [new file with mode: 0644]
queue-5.18/scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch-27996 [new file with mode: 0644]
queue-5.18/serial-8250-add-proper-clock-handling-for-oxsemi-pci.patch [new file with mode: 0644]
queue-5.18/serial-8250-fold-endrun-device-support-into-oxsemi-t.patch [new file with mode: 0644]
queue-5.18/series
queue-5.18/spmi-trace-fix-stack-out-of-bound-access-in-spmi-tra.patch [new file with mode: 0644]
queue-5.18/timekeeping-contribute-wall-clock-to-rng-on-time-cha.patch [new file with mode: 0644]
queue-5.18/tpm-add-check-for-failure-mode-for-tpm2-modules.patch [new file with mode: 0644]
queue-5.18/tpm-eventlog-fix-section-mismatch-for-debug_section_.patch [new file with mode: 0644]
queue-5.18/tracing-events-add-__vstring-and-__assign_vstr-helpe.patch [new file with mode: 0644]
queue-5.18/tracing-use-a-struct-alignof-to-determine-trace-even.patch [new file with mode: 0644]
queue-5.18/tty-8250-add-support-for-brainboxes-px-cards.patch [new file with mode: 0644]
queue-5.18/usbnet-smsc95xx-avoid-link-settings-race-on-interrup.patch [new file with mode: 0644]
queue-5.18/usbnet-smsc95xx-don-t-clear-read-only-phy-interrupt.patch [new file with mode: 0644]
queue-5.18/usbnet-smsc95xx-fix-deadlock-on-runtime-resume.patch [new file with mode: 0644]
queue-5.18/usbnet-smsc95xx-forward-phy-interrupts-to-phy-driver.patch [new file with mode: 0644]
queue-5.18/x86-kprobes-update-kcb-status-flag-after-singlestepp.patch [new file with mode: 0644]
queue-5.18/x86-olpc-fix-logical-not-is-only-applied-to-the-left.patch [new file with mode: 0644]

diff --git a/queue-5.18/__follow_mount_rcu-verify-that-mount_lock-remains-un.patch b/queue-5.18/__follow_mount_rcu-verify-that-mount_lock-remains-un.patch
new file mode 100644 (file)
index 0000000..c561a7f
--- /dev/null
@@ -0,0 +1,51 @@
+From 4be2ce739fb3c1ad0fbc2337b07b33a326009677 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Jul 2022 17:26:29 -0400
+Subject: __follow_mount_rcu(): verify that mount_lock remains unchanged
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit 20aac6c60981f5bfacd66661d090d907bf1482f0 ]
+
+Validate mount_lock seqcount as soon as we cross into mount in RCU
+mode.  Sure, ->mnt_root is pinned and will remain so until we
+do rcu_read_unlock() anyway, and we will eventually fail to unlazy if
+the mount_lock had been touched, but we might run into a hard error
+(e.g. -ENOENT) before trying to unlazy.  And it's possible to end
+up with RCU pathwalk racing with rename() and umount() in a way
+that would fail with -ENOENT while non-RCU pathwalk would've
+succeeded with any timings.
+
+Once upon a time we hadn't needed that, but analysis had been subtle,
+brittle and went out of window as soon as RENAME_EXCHANGE had been
+added.
+
+It's narrow, hard to hit and won't get you anything other than
+stray -ENOENT that could be arranged in much easier way with the
+same priveleges, but it's a bug all the same.
+
+Cc: stable@kernel.org
+X-sky-is-falling: unlikely
+Fixes: da1ce0670c14 "vfs: add cross-rename"
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namei.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/namei.c b/fs/namei.c
+index 740a40802780..2fa412c5a082 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -1511,6 +1511,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
+                                * becoming unpinned.
+                                */
+                               flags = dentry->d_flags;
++                              if (read_seqretry(&mount_lock, nd->m_seq))
++                                      return false;
+                               continue;
+                       }
+                       if (read_seqretry(&mount_lock, nd->m_seq))
+-- 
+2.35.1
+
diff --git a/queue-5.18/acpi-cppc-do-not-prevent-cppc-from-working-in-the-fu.patch b/queue-5.18/acpi-cppc-do-not-prevent-cppc-from-working-in-the-fu.patch
new file mode 100644 (file)
index 0000000..1d1f000
--- /dev/null
@@ -0,0 +1,131 @@
+From 5f735daa405bd9da9301cbe524cf0d5239a6082d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 19:41:10 +0200
+Subject: ACPI: CPPC: Do not prevent CPPC from working in the future
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+[ Upstream commit 4f4179fcf420873002035cf1941d844c9e0e7cb3 ]
+
+There is a problem with the current revision checks in
+is_cppc_supported() that they essentially prevent the CPPC support
+from working if a new _CPC package format revision being a proper
+superset of the v3 and only causing _CPC to return a package with more
+entries (while retaining the types and meaning of the entries defined by
+the v3) is introduced in the future and used by the platform firmware.
+
+In that case, as long as the number of entries in the _CPC return
+package is at least CPPC_V3_NUM_ENT, it should be perfectly fine to
+use the v3 support code and disregard the additional package entries
+added by the new package format revision.
+
+For this reason, drop is_cppc_supported() altogether, put the revision
+checks directly into acpi_cppc_processor_probe() so they are easier to
+follow and rework them to take the case mentioned above into account.
+
+Fixes: 4773e77cdc9b ("ACPI / CPPC: Add support for CPPC v3")
+Cc: 4.18+ <stable@vger.kernel.org> # 4.18+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/acpi/cppc_acpi.c | 54 ++++++++++++++++++----------------------
+ include/acpi/cppc_acpi.h |  2 +-
+ 2 files changed, 25 insertions(+), 31 deletions(-)
+
+diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
+index b8e26b6b5523..35d894674eba 100644
+--- a/drivers/acpi/cppc_acpi.c
++++ b/drivers/acpi/cppc_acpi.c
+@@ -600,33 +600,6 @@ static int pcc_data_alloc(int pcc_ss_id)
+       return 0;
+ }
+-/* Check if CPPC revision + num_ent combination is supported */
+-static bool is_cppc_supported(int revision, int num_ent)
+-{
+-      int expected_num_ent;
+-
+-      switch (revision) {
+-      case CPPC_V2_REV:
+-              expected_num_ent = CPPC_V2_NUM_ENT;
+-              break;
+-      case CPPC_V3_REV:
+-              expected_num_ent = CPPC_V3_NUM_ENT;
+-              break;
+-      default:
+-              pr_debug("Firmware exports unsupported CPPC revision: %d\n",
+-                      revision);
+-              return false;
+-      }
+-
+-      if (expected_num_ent != num_ent) {
+-              pr_debug("Firmware exports %d entries. Expected: %d for CPPC rev:%d\n",
+-                      num_ent, expected_num_ent, revision);
+-              return false;
+-      }
+-
+-      return true;
+-}
+-
+ /*
+  * An example CPC table looks like the following.
+  *
+@@ -715,7 +688,6 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
+                        cpc_obj->type, pr->id);
+               goto out_free;
+       }
+-      cpc_ptr->num_entries = num_ent;
+       /* Second entry should be revision. */
+       cpc_obj = &out_obj->package.elements[1];
+@@ -726,10 +698,32 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
+                        cpc_obj->type, pr->id);
+               goto out_free;
+       }
+-      cpc_ptr->version = cpc_rev;
+-      if (!is_cppc_supported(cpc_rev, num_ent))
++      if (cpc_rev < CPPC_V2_REV) {
++              pr_debug("Unsupported _CPC Revision (%d) for CPU:%d\n", cpc_rev,
++                       pr->id);
++              goto out_free;
++      }
++
++      /*
++       * Disregard _CPC if the number of entries in the return pachage is not
++       * as expected, but support future revisions being proper supersets of
++       * the v3 and only causing more entries to be returned by _CPC.
++       */
++      if ((cpc_rev == CPPC_V2_REV && num_ent != CPPC_V2_NUM_ENT) ||
++          (cpc_rev == CPPC_V3_REV && num_ent != CPPC_V3_NUM_ENT) ||
++          (cpc_rev > CPPC_V3_REV && num_ent <= CPPC_V3_NUM_ENT)) {
++              pr_debug("Unexpected number of _CPC return package entries (%d) for CPU:%d\n",
++                       num_ent, pr->id);
+               goto out_free;
++      }
++      if (cpc_rev > CPPC_V3_REV) {
++              num_ent = CPPC_V3_NUM_ENT;
++              cpc_rev = CPPC_V3_REV;
++      }
++
++      cpc_ptr->num_entries = num_ent;
++      cpc_ptr->version = cpc_rev;
+       /* Iterate through remaining entries in _CPC */
+       for (i = 2; i < num_ent; i++) {
+diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
+index 181907349b49..a76f8c6b732d 100644
+--- a/include/acpi/cppc_acpi.h
++++ b/include/acpi/cppc_acpi.h
+@@ -17,7 +17,7 @@
+ #include <acpi/pcc.h>
+ #include <acpi/processor.h>
+-/* Support CPPCv2 and CPPCv3  */
++/* CPPCv2 and CPPCv3 support */
+ #define CPPC_V2_REV   2
+ #define CPPC_V3_REV   3
+ #define CPPC_V2_NUM_ENT       21
+-- 
+2.35.1
+
diff --git a/queue-5.18/batman-adv-tracing-use-the-new-__vstring-helper.patch b/queue-5.18/batman-adv-tracing-use-the-new-__vstring-helper.patch
new file mode 100644 (file)
index 0000000..9cc6be7
--- /dev/null
@@ -0,0 +1,69 @@
+From 6e6c54f719b05010a877a1d77f3b5cab7c585471 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 19:16:50 -0400
+Subject: batman-adv: tracing: Use the new __vstring() helper
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit 9abc291812d784bd4a26c01af4ebdbf9f2dbf0bb ]
+
+Instead of open coding a __dynamic_array() with a fixed length (which
+defeats the purpose of the dynamic array in the first place). Use the new
+__vstring() helper that will use a va_list and only write enough of the
+string into the ring buffer that is needed.
+
+Link: https://lkml.kernel.org/r/20220724191650.236b1355@rorschach.local.home
+
+Cc: Marek Lindner <mareklindner@neomailbox.ch>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Simon Wunderlich <sw@simonwunderlich.de>
+Cc: Antonio Quartulli <a@unstable.cc>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: b.a.t.m.a.n@lists.open-mesh.org
+Cc: netdev@vger.kernel.org
+Acked-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/batman-adv/trace.h | 9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
+index d673ebdd0426..31c8f922651d 100644
+--- a/net/batman-adv/trace.h
++++ b/net/batman-adv/trace.h
+@@ -28,8 +28,6 @@
+ #endif /* CONFIG_BATMAN_ADV_TRACING */
+-#define BATADV_MAX_MSG_LEN    256
+-
+ TRACE_EVENT(batadv_dbg,
+           TP_PROTO(struct batadv_priv *bat_priv,
+@@ -40,16 +38,13 @@ TRACE_EVENT(batadv_dbg,
+           TP_STRUCT__entry(
+                   __string(device, bat_priv->soft_iface->name)
+                   __string(driver, KBUILD_MODNAME)
+-                  __dynamic_array(char, msg, BATADV_MAX_MSG_LEN)
++                  __vstring(msg, vaf->fmt, vaf->va)
+           ),
+           TP_fast_assign(
+                   __assign_str(device, bat_priv->soft_iface->name);
+                   __assign_str(driver, KBUILD_MODNAME);
+-                  WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
+-                                         BATADV_MAX_MSG_LEN,
+-                                         vaf->fmt,
+-                                         *vaf->va) >= BATADV_MAX_MSG_LEN);
++                  __assign_vstr(msg, vaf->fmt, vaf->va);
+           ),
+           TP_printk(
+-- 
+2.35.1
+
diff --git a/queue-5.18/block-add-a-bdev_max_zone_append_sectors-helper.patch b/queue-5.18/block-add-a-bdev_max_zone_append_sectors-helper.patch
new file mode 100644 (file)
index 0000000..3a8ba25
--- /dev/null
@@ -0,0 +1,80 @@
+From dd6495e668dadb91e946e5f118bc7f72cc8bed13 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Apr 2022 06:52:46 +0200
+Subject: block: add a bdev_max_zone_append_sectors helper
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 2aba0d19f4d8c8929b4b3b94a9cfde2aa20e6ee2 ]
+
+Add a helper to check the max supported sectors for zone append based on
+the block_device instead of having to poke into the block layer internal
+request_queue.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Link: https://lore.kernel.org/r/20220415045258.199825-16-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/target/zns.c | 3 +--
+ fs/zonefs/super.c         | 3 +--
+ include/linux/blkdev.h    | 6 ++++++
+ 3 files changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
+index e34718b09550..82b61acf7a72 100644
+--- a/drivers/nvme/target/zns.c
++++ b/drivers/nvme/target/zns.c
+@@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct blk_zone *z,
+ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
+ {
+-      struct request_queue *q = ns->bdev->bd_disk->queue;
+-      u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
++      u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev));
+       struct gendisk *bd_disk = ns->bdev->bd_disk;
+       int ret;
+diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
+index 15a4c7c07a3b..b68798a572fc 100644
+--- a/fs/zonefs/super.c
++++ b/fs/zonefs/super.c
+@@ -723,13 +723,12 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct zonefs_inode_info *zi = ZONEFS_I(inode);
+       struct block_device *bdev = inode->i_sb->s_bdev;
+-      unsigned int max;
++      unsigned int max = bdev_max_zone_append_sectors(bdev);
+       struct bio *bio;
+       ssize_t size;
+       int nr_pages;
+       ssize_t ret;
+-      max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
+       max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
+       iov_iter_truncate(from, max);
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index cc6b24a5098f..34f2b88dfd6e 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1190,6 +1190,12 @@ static inline unsigned int queue_max_zone_append_sectors(const struct request_qu
+       return min(l->max_zone_append_sectors, l->max_sectors);
+ }
++static inline unsigned int
++bdev_max_zone_append_sectors(struct block_device *bdev)
++{
++      return queue_max_zone_append_sectors(bdev_get_queue(bdev));
++}
++
+ static inline unsigned queue_logical_block_size(const struct request_queue *q)
+ {
+       int retval = 512;
+-- 
+2.35.1
+
diff --git a/queue-5.18/block-add-bdev_max_segments-helper.patch b/queue-5.18/block-add-bdev_max_segments-helper.patch
new file mode 100644 (file)
index 0000000..420dd71
--- /dev/null
@@ -0,0 +1,40 @@
+From 065935c8b7fbf75e3eb0c7a9d9f88ff921b1c9a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:38 +0900
+Subject: block: add bdev_max_segments() helper
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 65ea1b66482f415d51cd46515b02477257330339 ]
+
+Add bdev_max_segments() like other queue parameters.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/blkdev.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 34f2b88dfd6e..7927480b9cf7 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1196,6 +1196,11 @@ bdev_max_zone_append_sectors(struct block_device *bdev)
+       return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+ }
++static inline unsigned int bdev_max_segments(struct block_device *bdev)
++{
++      return queue_max_segments(bdev_get_queue(bdev));
++}
++
+ static inline unsigned queue_logical_block_size(const struct request_queue *q)
+ {
+       int retval = 512;
+-- 
+2.35.1
+
diff --git a/queue-5.18/block-don-t-allow-the-same-type-rq_qos-add-more-than.patch b/queue-5.18/block-don-t-allow-the-same-type-rq_qos-add-more-than.patch
new file mode 100644 (file)
index 0000000..b578a24
--- /dev/null
@@ -0,0 +1,199 @@
+From 9d6969d9e591d57389ab123fdc51c860fd939781 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 17:36:16 +0800
+Subject: block: don't allow the same type rq_qos add more than once
+
+From: Jinke Han <hanjinke.666@bytedance.com>
+
+[ Upstream commit 14a6e2eb7df5c7897c15b109cba29ab0c4a791b6 ]
+
+In our test of iocost, we encountered some list add/del corruptions of
+inner_walk list in ioc_timer_fn.
+
+The reason can be described as follows:
+
+cpu 0                                  cpu 1
+ioc_qos_write                          ioc_qos_write
+
+ioc = q_to_ioc(queue);
+if (!ioc) {
+        ioc = kzalloc();
+                                       ioc = q_to_ioc(queue);
+                                       if (!ioc) {
+                                               ioc = kzalloc();
+                                               ...
+                                               rq_qos_add(q, rqos);
+                                       }
+        ...
+        rq_qos_add(q, rqos);
+        ...
+}
+
+When the io.cost.qos file is written by two cpus concurrently, rq_qos may
+be added to one disk twice. In that case, there will be two iocs enabled
+and running on one disk. They own different iocgs on their active list. In
+the ioc_timer_fn function, because of the iocgs from two iocs have the
+same root iocg, the root iocg's walk_list may be overwritten by each other
+and this leads to list add/del corruptions in building or destroying the
+inner_walk list.
+
+And so far, the blk-rq-qos framework works in case that one instance for
+one type rq_qos per queue by default. This patch make this explicit and
+also fix the crash above.
+
+Signed-off-by: Jinke Han <hanjinke.666@bytedance.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220720093616.70584-1-hanjinke.666@bytedance.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c    | 20 +++++++++++++-------
+ block/blk-iolatency.c | 18 +++++++++++-------
+ block/blk-rq-qos.h    | 11 ++++++++++-
+ block/blk-wbt.c       | 12 +++++++++++-
+ 4 files changed, 45 insertions(+), 16 deletions(-)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index 16705fbd0699..a19f2db4eeb2 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -2893,15 +2893,21 @@ static int blk_iocost_init(struct request_queue *q)
+        * called before policy activation completion, can't assume that the
+        * target bio has an iocg associated and need to test for NULL iocg.
+        */
+-      rq_qos_add(q, rqos);
++      ret = rq_qos_add(q, rqos);
++      if (ret)
++              goto err_free_ioc;
++
+       ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
+-      if (ret) {
+-              rq_qos_del(q, rqos);
+-              free_percpu(ioc->pcpu_stat);
+-              kfree(ioc);
+-              return ret;
+-      }
++      if (ret)
++              goto err_del_qos;
+       return 0;
++
++err_del_qos:
++      rq_qos_del(q, rqos);
++err_free_ioc:
++      free_percpu(ioc->pcpu_stat);
++      kfree(ioc);
++      return ret;
+ }
+ static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp)
+diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
+index 9568bf8dfe82..7845dca5fcfd 100644
+--- a/block/blk-iolatency.c
++++ b/block/blk-iolatency.c
+@@ -773,19 +773,23 @@ int blk_iolatency_init(struct request_queue *q)
+       rqos->ops = &blkcg_iolatency_ops;
+       rqos->q = q;
+-      rq_qos_add(q, rqos);
+-
++      ret = rq_qos_add(q, rqos);
++      if (ret)
++              goto err_free;
+       ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
+-      if (ret) {
+-              rq_qos_del(q, rqos);
+-              kfree(blkiolat);
+-              return ret;
+-      }
++      if (ret)
++              goto err_qos_del;
+       timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
+       INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
+       return 0;
++
++err_qos_del:
++      rq_qos_del(q, rqos);
++err_free:
++      kfree(blkiolat);
++      return ret;
+ }
+ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 0e46052b018a..08b856570ad1 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
+       init_waitqueue_head(&rq_wait->wait);
+ }
+-static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
++static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+ {
+       /*
+        * No IO can be in-flight when adding rqos, so freeze queue, which
+@@ -98,6 +98,8 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+       blk_mq_freeze_queue(q);
+       spin_lock_irq(&q->queue_lock);
++      if (rq_qos_id(q, rqos->id))
++              goto ebusy;
+       rqos->next = q->rq_qos;
+       q->rq_qos = rqos;
+       spin_unlock_irq(&q->queue_lock);
+@@ -109,6 +111,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+               blk_mq_debugfs_register_rqos(rqos);
+               mutex_unlock(&q->debugfs_mutex);
+       }
++
++      return 0;
++ebusy:
++      spin_unlock_irq(&q->queue_lock);
++      blk_mq_unfreeze_queue(q);
++      return -EBUSY;
++
+ }
+ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index 0c119be0e813..ae6ea0b54579 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -820,6 +820,7 @@ int wbt_init(struct request_queue *q)
+ {
+       struct rq_wb *rwb;
+       int i;
++      int ret;
+       rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
+       if (!rwb)
+@@ -846,7 +847,10 @@ int wbt_init(struct request_queue *q)
+       /*
+        * Assign rwb and add the stats callback.
+        */
+-      rq_qos_add(q, &rwb->rqos);
++      ret = rq_qos_add(q, &rwb->rqos);
++      if (ret)
++              goto err_free;
++
+       blk_stat_add_callback(q, rwb->cb);
+       rwb->min_lat_nsec = wbt_default_latency_nsec(q);
+@@ -855,4 +859,10 @@ int wbt_init(struct request_queue *q)
+       wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+       return 0;
++
++err_free:
++      blk_stat_free_callback(rwb->cb);
++      kfree(rwb);
++      return ret;
++
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.18/block-serialize-all-debugfs-operations-using-q-debug.patch b/queue-5.18/block-serialize-all-debugfs-operations-using-q-debug.patch
new file mode 100644 (file)
index 0000000..49641b4
--- /dev/null
@@ -0,0 +1,334 @@
+From de8ba1b6410a2cffe26c1f98058fd39d39dc0e62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jun 2022 09:48:25 +0200
+Subject: block: serialize all debugfs operations using q->debugfs_mutex
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 5cf9c91ba927119fc6606b938b1895bb2459d3bc ]
+
+Various places like I/O schedulers or the QOS infrastructure try to
+register debugfs files on demans, which can race with creating and
+removing the main queue debugfs directory.  Use the existing
+debugfs_mutex to serialize all debugfs operations that rely on
+q->debugfs_dir or the directories hanging off it.
+
+To make the teardown code a little simpler declare all debugfs dentry
+pointers and not just the main one uncoditionally in blkdev.h.
+
+Move debugfs_mutex next to the dentries that it protects and document
+what it is used for.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20220614074827.458955-3-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-mq-debugfs.c  | 25 ++++++++++++++++++++-----
+ block/blk-mq-debugfs.h  |  5 -----
+ block/blk-mq-sched.c    | 11 +++++++++++
+ block/blk-rq-qos.c      |  2 ++
+ block/blk-rq-qos.h      |  7 ++++++-
+ block/blk-sysfs.c       | 20 +++++++++-----------
+ include/linux/blkdev.h  |  8 ++++----
+ kernel/trace/blktrace.c |  3 ---
+ 8 files changed, 52 insertions(+), 29 deletions(-)
+
+diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
+index 34bee263936c..d491b6eb0ab9 100644
+--- a/block/blk-mq-debugfs.c
++++ b/block/blk-mq-debugfs.c
+@@ -713,11 +713,6 @@ void blk_mq_debugfs_register(struct request_queue *q)
+       }
+ }
+-void blk_mq_debugfs_unregister(struct request_queue *q)
+-{
+-      q->sched_debugfs_dir = NULL;
+-}
+-
+ static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
+                                       struct blk_mq_ctx *ctx)
+ {
+@@ -751,6 +746,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
+ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
+ {
++      if (!hctx->queue->debugfs_dir)
++              return;
+       debugfs_remove_recursive(hctx->debugfs_dir);
+       hctx->sched_debugfs_dir = NULL;
+       hctx->debugfs_dir = NULL;
+@@ -778,6 +775,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
+ {
+       struct elevator_type *e = q->elevator->type;
++      lockdep_assert_held(&q->debugfs_mutex);
++
+       /*
+        * If the parent directory has not been created yet, return, we will be
+        * called again later on and the directory/files will be created then.
+@@ -795,6 +794,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
+ void blk_mq_debugfs_unregister_sched(struct request_queue *q)
+ {
++      lockdep_assert_held(&q->debugfs_mutex);
++
+       debugfs_remove_recursive(q->sched_debugfs_dir);
+       q->sched_debugfs_dir = NULL;
+ }
+@@ -816,6 +817,10 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id)
+ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
+ {
++      lockdep_assert_held(&rqos->q->debugfs_mutex);
++
++      if (!rqos->q->debugfs_dir)
++              return;
+       debugfs_remove_recursive(rqos->debugfs_dir);
+       rqos->debugfs_dir = NULL;
+ }
+@@ -825,6 +830,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+       struct request_queue *q = rqos->q;
+       const char *dir_name = rq_qos_id_to_name(rqos->id);
++      lockdep_assert_held(&q->debugfs_mutex);
++
+       if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs)
+               return;
+@@ -840,6 +847,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+ void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
+ {
++      lockdep_assert_held(&q->debugfs_mutex);
++
+       debugfs_remove_recursive(q->rqos_debugfs_dir);
+       q->rqos_debugfs_dir = NULL;
+ }
+@@ -849,6 +858,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
+ {
+       struct elevator_type *e = q->elevator->type;
++      lockdep_assert_held(&q->debugfs_mutex);
++
+       /*
+        * If the parent debugfs directory has not been created yet, return;
+        * We will be called again later on with appropriate parent debugfs
+@@ -868,6 +879,10 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
+ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
+ {
++      lockdep_assert_held(&hctx->queue->debugfs_mutex);
++
++      if (!hctx->queue->debugfs_dir)
++              return;
+       debugfs_remove_recursive(hctx->sched_debugfs_dir);
+       hctx->sched_debugfs_dir = NULL;
+ }
+diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
+index 69918f4170d6..771d45832878 100644
+--- a/block/blk-mq-debugfs.h
++++ b/block/blk-mq-debugfs.h
+@@ -21,7 +21,6 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq);
+ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
+ void blk_mq_debugfs_register(struct request_queue *q);
+-void blk_mq_debugfs_unregister(struct request_queue *q);
+ void blk_mq_debugfs_register_hctx(struct request_queue *q,
+                                 struct blk_mq_hw_ctx *hctx);
+ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx);
+@@ -42,10 +41,6 @@ static inline void blk_mq_debugfs_register(struct request_queue *q)
+ {
+ }
+-static inline void blk_mq_debugfs_unregister(struct request_queue *q)
+-{
+-}
+-
+ static inline void blk_mq_debugfs_register_hctx(struct request_queue *q,
+                                               struct blk_mq_hw_ctx *hctx)
+ {
+diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
+index 9e56a69422b6..e84bec39fd3a 100644
+--- a/block/blk-mq-sched.c
++++ b/block/blk-mq-sched.c
+@@ -593,7 +593,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
+       if (ret)
+               goto err_free_map_and_rqs;
++      mutex_lock(&q->debugfs_mutex);
+       blk_mq_debugfs_register_sched(q);
++      mutex_unlock(&q->debugfs_mutex);
+       queue_for_each_hw_ctx(q, hctx, i) {
+               if (e->ops.init_hctx) {
+@@ -606,7 +608,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
+                               return ret;
+                       }
+               }
++              mutex_lock(&q->debugfs_mutex);
+               blk_mq_debugfs_register_sched_hctx(q, hctx);
++              mutex_unlock(&q->debugfs_mutex);
+       }
+       return 0;
+@@ -647,14 +651,21 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
+       unsigned int flags = 0;
+       queue_for_each_hw_ctx(q, hctx, i) {
++              mutex_lock(&q->debugfs_mutex);
+               blk_mq_debugfs_unregister_sched_hctx(hctx);
++              mutex_unlock(&q->debugfs_mutex);
++
+               if (e->type->ops.exit_hctx && hctx->sched_data) {
+                       e->type->ops.exit_hctx(hctx, i);
+                       hctx->sched_data = NULL;
+               }
+               flags = hctx->flags;
+       }
++
++      mutex_lock(&q->debugfs_mutex);
+       blk_mq_debugfs_unregister_sched(q);
++      mutex_unlock(&q->debugfs_mutex);
++
+       if (e->type->ops.exit_sched)
+               e->type->ops.exit_sched(e);
+       blk_mq_sched_tags_teardown(q, flags);
+diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
+index e83af7bc7591..249a6f05dd3b 100644
+--- a/block/blk-rq-qos.c
++++ b/block/blk-rq-qos.c
+@@ -294,7 +294,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
+ void rq_qos_exit(struct request_queue *q)
+ {
++      mutex_lock(&q->debugfs_mutex);
+       blk_mq_debugfs_unregister_queue_rqos(q);
++      mutex_unlock(&q->debugfs_mutex);
+       while (q->rq_qos) {
+               struct rq_qos *rqos = q->rq_qos;
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 68267007da1c..0e46052b018a 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -104,8 +104,11 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+       blk_mq_unfreeze_queue(q);
+-      if (rqos->ops->debugfs_attrs)
++      if (rqos->ops->debugfs_attrs) {
++              mutex_lock(&q->debugfs_mutex);
+               blk_mq_debugfs_register_rqos(rqos);
++              mutex_unlock(&q->debugfs_mutex);
++      }
+ }
+ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+@@ -129,7 +132,9 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+       blk_mq_unfreeze_queue(q);
++      mutex_lock(&q->debugfs_mutex);
+       blk_mq_debugfs_unregister_rqos(rqos);
++      mutex_unlock(&q->debugfs_mutex);
+ }
+ typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
+diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
+index 88bd41d4cb59..6e4801b217a7 100644
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -779,14 +779,13 @@ static void blk_release_queue(struct kobject *kobj)
+       if (queue_is_mq(q))
+               blk_mq_release(q);
+-      blk_trace_shutdown(q);
+       mutex_lock(&q->debugfs_mutex);
++      blk_trace_shutdown(q);
+       debugfs_remove_recursive(q->debugfs_dir);
++      q->debugfs_dir = NULL;
++      q->sched_debugfs_dir = NULL;
+       mutex_unlock(&q->debugfs_mutex);
+-      if (queue_is_mq(q))
+-              blk_mq_debugfs_unregister(q);
+-
+       bioset_exit(&q->bio_split);
+       if (blk_queue_has_srcu(q))
+@@ -836,17 +835,16 @@ int blk_register_queue(struct gendisk *disk)
+               goto unlock;
+       }
++      if (queue_is_mq(q))
++              __blk_mq_register_dev(dev, q);
++      mutex_lock(&q->sysfs_lock);
++
+       mutex_lock(&q->debugfs_mutex);
+       q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
+                                           blk_debugfs_root);
+-      mutex_unlock(&q->debugfs_mutex);
+-
+-      if (queue_is_mq(q)) {
+-              __blk_mq_register_dev(dev, q);
++      if (queue_is_mq(q))
+               blk_mq_debugfs_register(q);
+-      }
+-
+-      mutex_lock(&q->sysfs_lock);
++      mutex_unlock(&q->debugfs_mutex);
+       ret = disk_register_independent_access_ranges(disk, NULL);
+       if (ret)
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 108e3d114bfc..cc6b24a5098f 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -466,7 +466,6 @@ struct request_queue {
+ #endif /* CONFIG_BLK_DEV_ZONED */
+       int                     node;
+-      struct mutex            debugfs_mutex;
+ #ifdef CONFIG_BLK_DEV_IO_TRACE
+       struct blk_trace __rcu  *blk_trace;
+ #endif
+@@ -510,11 +509,12 @@ struct request_queue {
+       struct bio_set          bio_split;
+       struct dentry           *debugfs_dir;
+-
+-#ifdef CONFIG_BLK_DEBUG_FS
+       struct dentry           *sched_debugfs_dir;
+       struct dentry           *rqos_debugfs_dir;
+-#endif
++      /*
++       * Serializes all debugfs metadata operations using the above dentries.
++       */
++      struct mutex            debugfs_mutex;
+       bool                    mq_sysfs_init_done;
+diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
+index f22219495541..f0500b5cfefe 100644
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -770,14 +770,11 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+  **/
+ void blk_trace_shutdown(struct request_queue *q)
+ {
+-      mutex_lock(&q->debugfs_mutex);
+       if (rcu_dereference_protected(q->blk_trace,
+                                     lockdep_is_held(&q->debugfs_mutex))) {
+               __blk_trace_startstop(q, 0);
+               __blk_trace_remove(q);
+       }
+-
+-      mutex_unlock(&q->debugfs_mutex);
+ }
+ #ifdef CONFIG_BLK_CGROUP
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-ensure-pages-are-unlocked-on-cow_file_range-fa.patch b/queue-5.18/btrfs-ensure-pages-are-unlocked-on-cow_file_range-fa.patch
new file mode 100644 (file)
index 0000000..314b37a
--- /dev/null
@@ -0,0 +1,196 @@
+From e082d91f540e4ca12f8ed8aad09d7fc71a7d45de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Jun 2022 15:40:59 +0900
+Subject: btrfs: ensure pages are unlocked on cow_file_range() failure
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 9ce7466f372d83054c7494f6b3e4b9abaf3f0355 ]
+
+There is a hung_task report on zoned btrfs like below.
+
+https://github.com/naota/linux/issues/59
+
+  [726.328648] INFO: task rocksdb:high0:11085 blocked for more than 241 seconds.
+  [726.329839]       Not tainted 5.16.0-rc1+ #1
+  [726.330484] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  [726.331603] task:rocksdb:high0   state:D stack:    0 pid:11085 ppid: 11082 flags:0x00000000
+  [726.331608] Call Trace:
+  [726.331611]  <TASK>
+  [726.331614]  __schedule+0x2e5/0x9d0
+  [726.331622]  schedule+0x58/0xd0
+  [726.331626]  io_schedule+0x3f/0x70
+  [726.331629]  __folio_lock+0x125/0x200
+  [726.331634]  ? find_get_entries+0x1bc/0x240
+  [726.331638]  ? filemap_invalidate_unlock_two+0x40/0x40
+  [726.331642]  truncate_inode_pages_range+0x5b2/0x770
+  [726.331649]  truncate_inode_pages_final+0x44/0x50
+  [726.331653]  btrfs_evict_inode+0x67/0x480
+  [726.331658]  evict+0xd0/0x180
+  [726.331661]  iput+0x13f/0x200
+  [726.331664]  do_unlinkat+0x1c0/0x2b0
+  [726.331668]  __x64_sys_unlink+0x23/0x30
+  [726.331670]  do_syscall_64+0x3b/0xc0
+  [726.331674]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+  [726.331677] RIP: 0033:0x7fb9490a171b
+  [726.331681] RSP: 002b:00007fb943ffac68 EFLAGS: 00000246 ORIG_RAX: 0000000000000057
+  [726.331684] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fb9490a171b
+  [726.331686] RDX: 00007fb943ffb040 RSI: 000055a6bbe6ec20 RDI: 00007fb94400d300
+  [726.331687] RBP: 00007fb943ffad00 R08: 0000000000000000 R09: 0000000000000000
+  [726.331688] R10: 0000000000000031 R11: 0000000000000246 R12: 00007fb943ffb000
+  [726.331690] R13: 00007fb943ffb040 R14: 0000000000000000 R15: 00007fb943ffd260
+  [726.331693]  </TASK>
+
+While we debug the issue, we found running fstests generic/551 on 5GB
+non-zoned null_blk device in the emulated zoned mode also had a
+similar hung issue.
+
+Also, we can reproduce the same symptom with an error injected
+cow_file_range() setup.
+
+The hang occurs when cow_file_range() fails in the middle of
+allocation. cow_file_range() called from do_allocation_zoned() can
+split the give region ([start, end]) for allocation depending on
+current block group usages. When btrfs can allocate bytes for one part
+of the split regions but fails for the other region (e.g. because of
+-ENOSPC), we return the error leaving the pages in the succeeded regions
+locked. Technically, this occurs only when @unlock == 0. Otherwise, we
+unlock the pages in an allocated region after creating an ordered
+extent.
+
+Considering the callers of cow_file_range(unlock=0) won't write out
+the pages, we can unlock the pages on error exit from
+cow_file_range(). So, we can ensure all the pages except @locked_page
+are unlocked on error case.
+
+In summary, cow_file_range now behaves like this:
+
+- page_started == 1 (return value)
+  - All the pages are unlocked. IO is started.
+- unlock == 1
+  - All the pages except @locked_page are unlocked in any case
+- unlock == 0
+  - On success, all the pages are locked for writing out them
+  - On failure, all the pages except @locked_page are unlocked
+
+Fixes: 42c011000963 ("btrfs: zoned: introduce dedicated data write path for zoned filesystems")
+CC: stable@vger.kernel.org # 5.12+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 72 ++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 64 insertions(+), 8 deletions(-)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 5d15e374d032..54afa9e538c5 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1097,6 +1097,28 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
+  * *page_started is set to one if we unlock locked_page and do everything
+  * required to start IO on it.  It may be clean and already done with
+  * IO when we return.
++ *
++ * When unlock == 1, we unlock the pages in successfully allocated regions.
++ * When unlock == 0, we leave them locked for writing them out.
++ *
++ * However, we unlock all the pages except @locked_page in case of failure.
++ *
++ * In summary, page locking state will be as follow:
++ *
++ * - page_started == 1 (return value)
++ *     - All the pages are unlocked. IO is started.
++ *     - Note that this can happen only on success
++ * - unlock == 1
++ *     - All the pages except @locked_page are unlocked in any case
++ * - unlock == 0
++ *     - On success, all the pages are locked for writing out them
++ *     - On failure, all the pages except @locked_page are unlocked
++ *
++ * When a failure happens in the second or later iteration of the
++ * while-loop, the ordered extents created in previous iterations are kept
++ * intact. So, the caller must clean them up by calling
++ * btrfs_cleanup_ordered_extents(). See btrfs_run_delalloc_range() for
++ * example.
+  */
+ static noinline int cow_file_range(struct btrfs_inode *inode,
+                                  struct page *locked_page,
+@@ -1106,6 +1128,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       u64 alloc_hint = 0;
++      u64 orig_start = start;
+       u64 num_bytes;
+       unsigned long ram_size;
+       u64 cur_alloc_size = 0;
+@@ -1293,18 +1316,44 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+       btrfs_dec_block_group_reservations(fs_info, ins.objectid);
+       btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
+ out_unlock:
++      /*
++       * Now, we have three regions to clean up:
++       *
++       * |-------(1)----|---(2)---|-------------(3)----------|
++       * `- orig_start  `- start  `- start + cur_alloc_size  `- end
++       *
++       * We process each region below.
++       */
++
+       clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+               EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
+       page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
++
+       /*
+-       * If we reserved an extent for our delalloc range (or a subrange) and
+-       * failed to create the respective ordered extent, then it means that
+-       * when we reserved the extent we decremented the extent's size from
+-       * the data space_info's bytes_may_use counter and incremented the
+-       * space_info's bytes_reserved counter by the same amount. We must make
+-       * sure extent_clear_unlock_delalloc() does not try to decrement again
+-       * the data space_info's bytes_may_use counter, therefore we do not pass
+-       * it the flag EXTENT_CLEAR_DATA_RESV.
++       * For the range (1). We have already instantiated the ordered extents
++       * for this region. They are cleaned up by
++       * btrfs_cleanup_ordered_extents() in e.g,
++       * btrfs_run_delalloc_range(). EXTENT_LOCKED | EXTENT_DELALLOC are
++       * already cleared in the above loop. And, EXTENT_DELALLOC_NEW |
++       * EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV are handled by the cleanup
++       * function.
++       *
++       * However, in case of unlock == 0, we still need to unlock the pages
++       * (except @locked_page) to ensure all the pages are unlocked.
++       */
++      if (!unlock && orig_start < start)
++              extent_clear_unlock_delalloc(inode, orig_start, start - 1,
++                                           locked_page, 0, page_ops);
++
++      /*
++       * For the range (2). If we reserved an extent for our delalloc range
++       * (or a subrange) and failed to create the respective ordered extent,
++       * then it means that when we reserved the extent we decremented the
++       * extent's size from the data space_info's bytes_may_use counter and
++       * incremented the space_info's bytes_reserved counter by the same
++       * amount. We must make sure extent_clear_unlock_delalloc() does not try
++       * to decrement again the data space_info's bytes_may_use counter,
++       * therefore we do not pass it the flag EXTENT_CLEAR_DATA_RESV.
+        */
+       if (extent_reserved) {
+               extent_clear_unlock_delalloc(inode, start,
+@@ -1316,6 +1365,13 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+               if (start >= end)
+                       goto out;
+       }
++
++      /*
++       * For the range (3). We never touched the region. In addition to the
++       * clear_bits above, we add EXTENT_CLEAR_DATA_RESV to release the data
++       * space_info's bytes_may_use counter, reserved in
++       * btrfs_check_data_free_space().
++       */
+       extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                    clear_bits | EXTENT_CLEAR_DATA_RESV,
+                                    page_ops);
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-fix-error-handling-of-fallback-uncompress-writ.patch b/queue-5.18/btrfs-fix-error-handling-of-fallback-uncompress-writ.patch
new file mode 100644 (file)
index 0000000..02c4c2e
--- /dev/null
@@ -0,0 +1,72 @@
+From db0a5d9ef124f104269150d76fb2bcbc29e5293a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Jun 2022 15:41:01 +0900
+Subject: btrfs: fix error handling of fallback uncompress write
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 71aa147b4d9d81fa65afa6016f50d7818b64a54f ]
+
+When cow_file_range() fails in the middle of the allocation loop, it
+unlocks the pages but leaves the ordered extents intact. Thus, we need
+to call btrfs_cleanup_ordered_extents() to finish the created ordered
+extents.
+
+Also, we need to call end_extent_writepage() if locked_page is available
+because btrfs_cleanup_ordered_extents() never processes the region on
+the locked_page.
+
+Furthermore, we need to set the mapping as error if locked_page is
+unavailable before unlocking the pages, so that the errno is properly
+propagated to the user space.
+
+CC: stable@vger.kernel.org # 5.18+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 54afa9e538c5..1e404476fe6a 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -891,8 +891,18 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
+               goto out;
+       }
+       if (ret < 0) {
+-              if (locked_page)
++              btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
++              if (locked_page) {
++                      const u64 page_start = page_offset(locked_page);
++                      const u64 page_end = page_start + PAGE_SIZE - 1;
++
++                      btrfs_page_set_error(inode->root->fs_info, locked_page,
++                                           page_start, PAGE_SIZE);
++                      set_page_writeback(locked_page);
++                      end_page_writeback(locked_page);
++                      end_extent_writepage(locked_page, ret, page_start, page_end);
+                       unlock_page(locked_page);
++              }
+               goto out;
+       }
+@@ -1341,9 +1351,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+        * However, in case of unlock == 0, we still need to unlock the pages
+        * (except @locked_page) to ensure all the pages are unlocked.
+        */
+-      if (!unlock && orig_start < start)
++      if (!unlock && orig_start < start) {
++              if (!locked_page)
++                      mapping_set_error(inode->vfs_inode.i_mapping, ret);
+               extent_clear_unlock_delalloc(inode, orig_start, start - 1,
+                                            locked_page, 0, page_ops);
++      }
+       /*
+        * For the range (2). If we reserved an extent for our delalloc range
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-let-can_allocate_chunk-return-error.patch b/queue-5.18/btrfs-let-can_allocate_chunk-return-error.patch
new file mode 100644 (file)
index 0000000..4e6d94d
--- /dev/null
@@ -0,0 +1,66 @@
+From d95e54fdfd9bad3d0327ac42599359dc8c90ef75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:43 +0900
+Subject: btrfs: let can_allocate_chunk return error
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit bb9950d3df7169a673c594d38fb74e241ed4fb2a ]
+
+For the later patch, convert the return type from bool to int and return
+errors. No functional changes.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index f45ecd939a2c..8bdcbc0c6d60 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3985,12 +3985,12 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
+       }
+ }
+-static bool can_allocate_chunk(struct btrfs_fs_info *fs_info,
+-                             struct find_free_extent_ctl *ffe_ctl)
++static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
++                            struct find_free_extent_ctl *ffe_ctl)
+ {
+       switch (ffe_ctl->policy) {
+       case BTRFS_EXTENT_ALLOC_CLUSTERED:
+-              return true;
++              return 0;
+       case BTRFS_EXTENT_ALLOC_ZONED:
+               /*
+                * If we have enough free space left in an already
+@@ -4000,8 +4000,8 @@ static bool can_allocate_chunk(struct btrfs_fs_info *fs_info,
+                */
+               if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
+                   !btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
+-                      return false;
+-              return true;
++                      return -ENOSPC;
++              return 0;
+       default:
+               BUG();
+       }
+@@ -4083,8 +4083,9 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
+                       int exist = 0;
+                       /*Check if allocation policy allows to create a new chunk */
+-                      if (!can_allocate_chunk(fs_info, ffe_ctl))
+-                              return -ENOSPC;
++                      ret = can_allocate_chunk(fs_info, ffe_ctl);
++                      if (ret)
++                              return ret;
+                       trans = current->journal_info;
+                       if (trans)
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-make-the-bg_reclaim_threshold-per-space-info.patch b/queue-5.18/btrfs-make-the-bg_reclaim_threshold-per-space-info.patch
new file mode 100644 (file)
index 0000000..de2ee52
--- /dev/null
@@ -0,0 +1,176 @@
+From e4ae7bab98014a3d07e59efe09a829cb15cb518f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Mar 2022 01:56:06 -0700
+Subject: btrfs: make the bg_reclaim_threshold per-space info
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit bb5a098d9791f184899499531ff4411089e2a5e0 ]
+
+For non-zoned file systems it's useful to have the auto reclaim feature,
+however there are different use cases for non-zoned, for example we may
+not want to reclaim metadata chunks ever, only data chunks.  Move this
+sysfs flag to per-space_info.  This won't affect current users because
+this tunable only ever did anything for zoned, and that is currently
+hidden behind BTRFS_CONFIG_DEBUG.
+
+Tested-by: Pankaj Raghav <p.raghav@samsung.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+[ jth restore global bg_reclaim_threshold ]
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/free-space-cache.c |  7 +++++--
+ fs/btrfs/space-info.c       |  9 +++++++++
+ fs/btrfs/space-info.h       |  6 ++++++
+ fs/btrfs/sysfs.c            | 37 +++++++++++++++++++++++++++++++++++++
+ fs/btrfs/zoned.h            |  6 +-----
+ 5 files changed, 58 insertions(+), 7 deletions(-)
+
+diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
+index 01a408db5683..ef84bc5030cd 100644
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -2630,16 +2630,19 @@ int __btrfs_add_free_space(struct btrfs_block_group *block_group,
+ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
+                                       u64 bytenr, u64 size, bool used)
+ {
+-      struct btrfs_fs_info *fs_info = block_group->fs_info;
++      struct btrfs_space_info *sinfo = block_group->space_info;
+       struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+       u64 offset = bytenr - block_group->start;
+       u64 to_free, to_unusable;
+-      const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold);
++      int bg_reclaim_threshold = 0;
+       bool initial = (size == block_group->length);
+       u64 reclaimable_unusable;
+       WARN_ON(!initial && offset + size > block_group->zone_capacity);
++      if (!initial)
++              bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
++
+       spin_lock(&ctl->tree_lock);
+       if (!used)
+               to_free = size;
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index 56a7c99fc03e..85608acb9557 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -181,6 +181,12 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+               found->full = 0;
+ }
++/*
++ * Block groups with more than this value (percents) of unusable space will be
++ * scheduled for background reclaim.
++ */
++#define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH                    (75)
++
+ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
+ {
+@@ -203,6 +209,9 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
+       INIT_LIST_HEAD(&space_info->priority_tickets);
+       space_info->clamp = 1;
++      if (btrfs_is_zoned(info))
++              space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
++
+       ret = btrfs_sysfs_add_space_info_type(info, space_info);
+       if (ret)
+               return ret;
+diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
+index d841fed73492..a803e29bd781 100644
+--- a/fs/btrfs/space-info.h
++++ b/fs/btrfs/space-info.h
+@@ -24,6 +24,12 @@ struct btrfs_space_info {
+                                  the space info if we had an ENOSPC in the
+                                  allocator. */
++      /*
++       * Once a block group drops below this threshold (percents) we'll
++       * schedule it for reclaim.
++       */
++      int bg_reclaim_threshold;
++
+       int clamp;              /* Used to scale our threshold for preemptive
+                                  flushing. The value is >> clamp, so turns
+                                  out to be a 2^clamp divisor. */
+diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
+index ba78ca5aabbb..43845cae0c74 100644
+--- a/fs/btrfs/sysfs.c
++++ b/fs/btrfs/sysfs.c
+@@ -722,6 +722,42 @@ SPACE_INFO_ATTR(bytes_zone_unusable);
+ SPACE_INFO_ATTR(disk_used);
+ SPACE_INFO_ATTR(disk_total);
++static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj,
++                                                   struct kobj_attribute *a,
++                                                   char *buf)
++{
++      struct btrfs_space_info *space_info = to_space_info(kobj);
++      ssize_t ret;
++
++      ret = sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold));
++
++      return ret;
++}
++
++static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj,
++                                                    struct kobj_attribute *a,
++                                                    const char *buf, size_t len)
++{
++      struct btrfs_space_info *space_info = to_space_info(kobj);
++      int thresh;
++      int ret;
++
++      ret = kstrtoint(buf, 10, &thresh);
++      if (ret)
++              return ret;
++
++      if (thresh != 0 && (thresh <= 50 || thresh > 100))
++              return -EINVAL;
++
++      WRITE_ONCE(space_info->bg_reclaim_threshold, thresh);
++
++      return len;
++}
++
++BTRFS_ATTR_RW(space_info, bg_reclaim_threshold,
++            btrfs_sinfo_bg_reclaim_threshold_show,
++            btrfs_sinfo_bg_reclaim_threshold_store);
++
+ /*
+  * Allocation information about block group types.
+  *
+@@ -738,6 +774,7 @@ static struct attribute *space_info_attrs[] = {
+       BTRFS_ATTR_PTR(space_info, bytes_zone_unusable),
+       BTRFS_ATTR_PTR(space_info, disk_used),
+       BTRFS_ATTR_PTR(space_info, disk_total),
++      BTRFS_ATTR_PTR(space_info, bg_reclaim_threshold),
+       NULL,
+ };
+ ATTRIBUTE_GROUPS(space_info);
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index c424417e19bb..199b69670fa2 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -10,11 +10,7 @@
+ #include "block-group.h"
+ #include "btrfs_inode.h"
+-/*
+- * Block groups with more than this value (percents) of unusable space will be
+- * scheduled for background reclaim.
+- */
+-#define BTRFS_DEFAULT_RECLAIM_THRESH          75
++#define BTRFS_DEFAULT_RECLAIM_THRESH                                  (75)
+ struct btrfs_zoned_device_info {
+       /*
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-properly-flag-filesystem-with-btrfs_feature_in.patch b/queue-5.18/btrfs-properly-flag-filesystem-with-btrfs_feature_in.patch
new file mode 100644 (file)
index 0000000..795088e
--- /dev/null
@@ -0,0 +1,72 @@
+From 2317054fdc824202a5af22cdb2b8b2f0fcc792d6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jun 2022 10:55:47 +0300
+Subject: btrfs: properly flag filesystem with
+ BTRFS_FEATURE_INCOMPAT_BIG_METADATA
+
+From: Nikolay Borisov <nborisov@suse.com>
+
+[ Upstream commit e26b04c4c91925dba57324db177a24e18e2d0013 ]
+
+Commit 6f93e834fa7c seemingly inadvertently moved the code responsible
+for flagging the filesystem as having BIG_METADATA to a place where
+setting the flag was essentially lost. This means that
+filesystems created with kernels containing this bug (starting with 5.15)
+can potentially be mounted by older (pre-3.4) kernels. In reality
+chances for this happening are low because there are other incompat
+flags introduced in the mean time. Still the correct behavior is to set
+INCOMPAT_BIG_METADATA flag and persist this in the superblock.
+
+Fixes: 6f93e834fa7c ("btrfs: fix upper limit for max_inline for page size 64K")
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index f45470798022..34cd57d799e4 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3577,16 +3577,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+        */
+       fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
+-      /*
+-       * Flag our filesystem as having big metadata blocks if they are bigger
+-       * than the page size.
+-       */
+-      if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
+-              if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
+-                      btrfs_info(fs_info,
+-                              "flagging fs with big metadata feature");
+-              features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
+-      }
+       /* Set up fs_info before parsing mount options */
+       nodesize = btrfs_super_nodesize(disk_super);
+@@ -3627,6 +3617,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+       if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+               btrfs_info(fs_info, "has skinny extents");
++      /*
++       * Flag our filesystem as having big metadata blocks if they are bigger
++       * than the page size.
++       */
++      if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
++              if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
++                      btrfs_info(fs_info,
++                              "flagging fs with big metadata feature");
++              features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
++      }
++
+       /*
+        * mixed block groups end up with duplicate but slightly offset
+        * extent buffers for the same range.  It leads to corruptions
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-replace-btrfs_max_extent_size-with-fs_info-max.patch b/queue-5.18/btrfs-replace-btrfs_max_extent_size-with-fs_info-max.patch
new file mode 100644 (file)
index 0000000..1e01042
--- /dev/null
@@ -0,0 +1,222 @@
+From dcb75d1d2ed081e90e672a228bd75205ce484c3e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:40 +0900
+Subject: btrfs: replace BTRFS_MAX_EXTENT_SIZE with fs_info->max_extent_size
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit f7b12a62f008a3041f42f2426983e59a6a0a3c59 ]
+
+On zoned filesystem, data write out is limited by max_zone_append_size,
+and a large ordered extent is split according the size of a bio. OTOH,
+the number of extents to be written is calculated using
+BTRFS_MAX_EXTENT_SIZE, and that estimated number is used to reserve the
+metadata bytes to update and/or create the metadata items.
+
+The metadata reservation is done at e.g, btrfs_buffered_write() and then
+released according to the estimation changes. Thus, if the number of extent
+increases massively, the reserved metadata can run out.
+
+The increase of the number of extents easily occurs on zoned filesystem
+if BTRFS_MAX_EXTENT_SIZE > max_zone_append_size. And, it causes the
+following warning on a small RAM environment with disabling metadata
+over-commit (in the following patch).
+
+[75721.498492] ------------[ cut here ]------------
+[75721.505624] BTRFS: block rsv 1 returned -28
+[75721.512230] WARNING: CPU: 24 PID: 2327559 at fs/btrfs/block-rsv.c:537 btrfs_use_block_rsv+0x560/0x760 [btrfs]
+[75721.581854] CPU: 24 PID: 2327559 Comm: kworker/u64:10 Kdump: loaded Tainted: G        W         5.18.0-rc2-BTRFS-ZNS+ #109
+[75721.597200] Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.0 02/22/2021
+[75721.607310] Workqueue: btrfs-endio-write btrfs_work_helper [btrfs]
+[75721.616209] RIP: 0010:btrfs_use_block_rsv+0x560/0x760 [btrfs]
+[75721.646649] RSP: 0018:ffffc9000fbdf3e0 EFLAGS: 00010286
+[75721.654126] RAX: 0000000000000000 RBX: 0000000000004000 RCX: 0000000000000000
+[75721.663524] RDX: 0000000000000004 RSI: 0000000000000008 RDI: fffff52001f7be6e
+[75721.672921] RBP: ffffc9000fbdf420 R08: 0000000000000001 R09: ffff889f8d1fc6c7
+[75721.682493] R10: ffffed13f1a3f8d8 R11: 0000000000000001 R12: ffff88980a3c0e28
+[75721.692284] R13: ffff889b66590000 R14: ffff88980a3c0e40 R15: ffff88980a3c0e8a
+[75721.701878] FS:  0000000000000000(0000) GS:ffff889f8d000000(0000) knlGS:0000000000000000
+[75721.712601] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[75721.720726] CR2: 000055d12e05c018 CR3: 0000800193594000 CR4: 0000000000350ee0
+[75721.730499] Call Trace:
+[75721.735166]  <TASK>
+[75721.739886]  btrfs_alloc_tree_block+0x1e1/0x1100 [btrfs]
+[75721.747545]  ? btrfs_alloc_logged_file_extent+0x550/0x550 [btrfs]
+[75721.756145]  ? btrfs_get_32+0xea/0x2d0 [btrfs]
+[75721.762852]  ? btrfs_get_32+0xea/0x2d0 [btrfs]
+[75721.769520]  ? push_leaf_left+0x420/0x620 [btrfs]
+[75721.776431]  ? memcpy+0x4e/0x60
+[75721.781931]  split_leaf+0x433/0x12d0 [btrfs]
+[75721.788392]  ? btrfs_get_token_32+0x580/0x580 [btrfs]
+[75721.795636]  ? push_for_double_split.isra.0+0x420/0x420 [btrfs]
+[75721.803759]  ? leaf_space_used+0x15d/0x1a0 [btrfs]
+[75721.811156]  btrfs_search_slot+0x1bc3/0x2790 [btrfs]
+[75721.818300]  ? lock_downgrade+0x7c0/0x7c0
+[75721.824411]  ? free_extent_buffer.part.0+0x107/0x200 [btrfs]
+[75721.832456]  ? split_leaf+0x12d0/0x12d0 [btrfs]
+[75721.839149]  ? free_extent_buffer.part.0+0x14f/0x200 [btrfs]
+[75721.846945]  ? free_extent_buffer+0x13/0x20 [btrfs]
+[75721.853960]  ? btrfs_release_path+0x4b/0x190 [btrfs]
+[75721.861429]  btrfs_csum_file_blocks+0x85c/0x1500 [btrfs]
+[75721.869313]  ? rcu_read_lock_sched_held+0x16/0x80
+[75721.876085]  ? lock_release+0x552/0xf80
+[75721.881957]  ? btrfs_del_csums+0x8c0/0x8c0 [btrfs]
+[75721.888886]  ? __kasan_check_write+0x14/0x20
+[75721.895152]  ? do_raw_read_unlock+0x44/0x80
+[75721.901323]  ? _raw_write_lock_irq+0x60/0x80
+[75721.907983]  ? btrfs_global_root+0xb9/0xe0 [btrfs]
+[75721.915166]  ? btrfs_csum_root+0x12b/0x180 [btrfs]
+[75721.921918]  ? btrfs_get_global_root+0x820/0x820 [btrfs]
+[75721.929166]  ? _raw_write_unlock+0x23/0x40
+[75721.935116]  ? unpin_extent_cache+0x1e3/0x390 [btrfs]
+[75721.942041]  btrfs_finish_ordered_io.isra.0+0xa0c/0x1dc0 [btrfs]
+[75721.949906]  ? try_to_wake_up+0x30/0x14a0
+[75721.955700]  ? btrfs_unlink_subvol+0xda0/0xda0 [btrfs]
+[75721.962661]  ? rcu_read_lock_sched_held+0x16/0x80
+[75721.969111]  ? lock_acquire+0x41b/0x4c0
+[75721.974982]  finish_ordered_fn+0x15/0x20 [btrfs]
+[75721.981639]  btrfs_work_helper+0x1af/0xa80 [btrfs]
+[75721.988184]  ? _raw_spin_unlock_irq+0x28/0x50
+[75721.994643]  process_one_work+0x815/0x1460
+[75722.000444]  ? pwq_dec_nr_in_flight+0x250/0x250
+[75722.006643]  ? do_raw_spin_trylock+0xbb/0x190
+[75722.013086]  worker_thread+0x59a/0xeb0
+[75722.018511]  kthread+0x2ac/0x360
+[75722.023428]  ? process_one_work+0x1460/0x1460
+[75722.029431]  ? kthread_complete_and_exit+0x30/0x30
+[75722.036044]  ret_from_fork+0x22/0x30
+[75722.041255]  </TASK>
+[75722.045047] irq event stamp: 0
+[75722.049703] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
+[75722.057610] hardirqs last disabled at (0): [<ffffffff8118a94a>] copy_process+0x1c1a/0x66b0
+[75722.067533] softirqs last  enabled at (0): [<ffffffff8118a989>] copy_process+0x1c59/0x66b0
+[75722.077423] softirqs last disabled at (0): [<0000000000000000>] 0x0
+[75722.085335] ---[ end trace 0000000000000000 ]---
+
+To fix the estimation, we need to introduce fs_info->max_extent_size to
+replace BTRFS_MAX_EXTENT_SIZE, which allow setting the different size for
+regular vs zoned filesystem.
+
+Set fs_info->max_extent_size to BTRFS_MAX_EXTENT_SIZE by default. On zoned
+filesystem, it is set to fs_info->max_zone_append_size.
+
+CC: stable@vger.kernel.org # 5.12+
+Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode")
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h     | 6 ++++++
+ fs/btrfs/disk-io.c   | 2 ++
+ fs/btrfs/extent_io.c | 4 +++-
+ fs/btrfs/inode.c     | 6 ++++--
+ fs/btrfs/zoned.c     | 5 ++++-
+ 5 files changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 1c377bcfe787..97f5a3d320ff 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1032,6 +1032,12 @@ struct btrfs_fs_info {
+       u32 csums_per_leaf;
+       u32 stripesize;
++      /*
++       * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
++       * filesystem, on zoned it depends on the device constraints.
++       */
++      u64 max_extent_size;
++
+       /* Block groups and devices containing active swapfiles. */
+       spinlock_t swapfile_pins_lock;
+       struct rb_root swapfile_pins;
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 34cd57d799e4..bf5c6ac67e87 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3246,6 +3246,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
+       fs_info->sectorsize_bits = ilog2(4096);
+       fs_info->stripesize = 4096;
++      fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE;
++
+       spin_lock_init(&fs_info->swapfile_pins_lock);
+       fs_info->swapfile_pins = RB_ROOT;
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index 68ddd90685d9..bfc7d5b31156 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1992,10 +1992,12 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
+                                   struct page *locked_page, u64 *start,
+                                   u64 *end)
+ {
++      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+       const u64 orig_start = *start;
+       const u64 orig_end = *end;
+-      u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
++      /* The sanity tests may not set a valid fs_info. */
++      u64 max_bytes = fs_info ? fs_info->max_extent_size : BTRFS_MAX_EXTENT_SIZE;
+       u64 delalloc_start;
+       u64 delalloc_end;
+       bool found;
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 1e404476fe6a..c50288d90c66 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2102,6 +2102,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
+ void btrfs_split_delalloc_extent(struct inode *inode,
+                                struct extent_state *orig, u64 split)
+ {
++      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       u64 size;
+       /* not delalloc, ignore it */
+@@ -2109,7 +2110,7 @@ void btrfs_split_delalloc_extent(struct inode *inode,
+               return;
+       size = orig->end - orig->start + 1;
+-      if (size > BTRFS_MAX_EXTENT_SIZE) {
++      if (size > fs_info->max_extent_size) {
+               u32 num_extents;
+               u64 new_size;
+@@ -2138,6 +2139,7 @@ void btrfs_split_delalloc_extent(struct inode *inode,
+ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+                                struct extent_state *other)
+ {
++      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       u64 new_size, old_size;
+       u32 num_extents;
+@@ -2151,7 +2153,7 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+               new_size = other->end - new->start + 1;
+       /* we're not bigger than the max, unreserve the space and go */
+-      if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
++      if (new_size <= fs_info->max_extent_size) {
+               spin_lock(&BTRFS_I(inode)->lock);
+               btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
+               spin_unlock(&BTRFS_I(inode)->lock);
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 1d5b9308f5ef..a0bf2c20fa61 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -731,8 +731,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+       }
+       fs_info->zone_size = zone_size;
+-      fs_info->max_zone_append_size = max_zone_append_size;
++      fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size,
++                                                 fs_info->sectorsize);
+       fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
++      if (fs_info->max_zone_append_size < fs_info->max_extent_size)
++              fs_info->max_extent_size = fs_info->max_zone_append_size;
+       /*
+        * Check mount options here, because we might change fs_info->zoned
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-reset-block-group-chunk-force-if-we-have-to-wa.patch b/queue-5.18/btrfs-reset-block-group-chunk-force-if-we-have-to-wa.patch
new file mode 100644 (file)
index 0000000..ab43457
--- /dev/null
@@ -0,0 +1,42 @@
+From 7b8c917f29d18606d01d4f3ae4aab8f10342c9f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jun 2022 18:31:17 -0400
+Subject: btrfs: reset block group chunk force if we have to wait
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit 1314ca78b2c35d3e7d0f097268a2ee6dc0d369ef ]
+
+If you try to force a chunk allocation, but you race with another chunk
+allocation, you will end up waiting on the chunk allocation that just
+occurred and then allocate another chunk.  If you have many threads all
+doing this at once you can way over-allocate chunks.
+
+Fix this by resetting force to NO_FORCE, that way if we think we need to
+allocate we can, otherwise we don't force another chunk allocation if
+one is already happening.
+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+CC: stable@vger.kernel.org # 5.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/block-group.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index 667b7025d503..1deca5164c23 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -3724,6 +3724,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+                        * attempt.
+                        */
+                       wait_for_alloc = true;
++                      force = CHUNK_ALLOC_NO_FORCE;
+                       spin_unlock(&space_info->lock);
+                       mutex_lock(&fs_info->chunk_mutex);
+                       mutex_unlock(&fs_info->chunk_mutex);
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-store-chunk-size-in-space-info-struct.patch b/queue-5.18/btrfs-store-chunk-size-in-space-info-struct.patch
new file mode 100644 (file)
index 0000000..0767c57
--- /dev/null
@@ -0,0 +1,141 @@
+From 43c91308d645899ddb58951c23e4338cb43cb48c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Feb 2022 11:31:20 -0800
+Subject: btrfs: store chunk size in space-info struct
+
+From: Stefan Roesch <shr@fb.com>
+
+[ Upstream commit f6fca3917b4d99d8c13901738afec35f570a3c2f ]
+
+The chunk size is stored in the btrfs_space_info structure.  It is
+initialized at the start and is then used.
+
+A new API is added to update the current chunk size.  This API is used
+to be able to expose the chunk_size as a sysfs setting.
+
+Signed-off-by: Stefan Roesch <shr@fb.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ rename and merge helpers, switch atomic type to u64, style fixes ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/space-info.c | 32 ++++++++++++++++++++++++++++++++
+ fs/btrfs/space-info.h |  4 ++++
+ fs/btrfs/volumes.c    | 28 +++++++++-------------------
+ 3 files changed, 45 insertions(+), 19 deletions(-)
+
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index 85608acb9557..98a84b523be6 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -187,6 +187,37 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+  */
+ #define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH                    (75)
++/*
++ * Calculate chunk size depending on volume type (regular or zoned).
++ */
++static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
++{
++      if (btrfs_is_zoned(fs_info))
++              return fs_info->zone_size;
++
++      ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
++
++      if (flags & BTRFS_BLOCK_GROUP_DATA)
++              return SZ_1G;
++      else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
++              return SZ_32M;
++
++      /* Handle BTRFS_BLOCK_GROUP_METADATA */
++      if (fs_info->fs_devices->total_rw_bytes > 50ULL * SZ_1G)
++              return SZ_1G;
++
++      return SZ_256M;
++}
++
++/*
++ * Update default chunk size.
++ */
++void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
++                                      u64 chunk_size)
++{
++      WRITE_ONCE(space_info->chunk_size, chunk_size);
++}
++
+ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
+ {
+@@ -208,6 +239,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
+       INIT_LIST_HEAD(&space_info->tickets);
+       INIT_LIST_HEAD(&space_info->priority_tickets);
+       space_info->clamp = 1;
++      btrfs_update_space_info_chunk_size(space_info, calc_chunk_size(info, flags));
+       if (btrfs_is_zoned(info))
+               space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
+diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
+index a803e29bd781..137206b8049f 100644
+--- a/fs/btrfs/space-info.h
++++ b/fs/btrfs/space-info.h
+@@ -23,6 +23,8 @@ struct btrfs_space_info {
+       u64 max_extent_size;    /* This will hold the maximum extent size of
+                                  the space info if we had an ENOSPC in the
+                                  allocator. */
++      /* Chunk size in bytes */
++      u64 chunk_size;
+       /*
+        * Once a block group drops below this threshold (percents) we'll
+@@ -121,6 +123,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+                            u64 total_bytes, u64 bytes_used,
+                            u64 bytes_readonly, u64 bytes_zone_unusable,
+                            struct btrfs_space_info **space_info);
++void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
++                                      u64 chunk_size);
+ struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
+                                              u64 flags);
+ u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index 659575526e9f..4bc97e7d8e46 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -5091,26 +5091,16 @@ static void init_alloc_chunk_ctl_policy_regular(
+                               struct btrfs_fs_devices *fs_devices,
+                               struct alloc_chunk_ctl *ctl)
+ {
+-      u64 type = ctl->type;
++      struct btrfs_space_info *space_info;
+-      if (type & BTRFS_BLOCK_GROUP_DATA) {
+-              ctl->max_stripe_size = SZ_1G;
+-              ctl->max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
+-      } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
+-              /* For larger filesystems, use larger metadata chunks */
+-              if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
+-                      ctl->max_stripe_size = SZ_1G;
+-              else
+-                      ctl->max_stripe_size = SZ_256M;
+-              ctl->max_chunk_size = ctl->max_stripe_size;
+-      } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+-              ctl->max_stripe_size = SZ_32M;
+-              ctl->max_chunk_size = 2 * ctl->max_stripe_size;
+-              ctl->devs_max = min_t(int, ctl->devs_max,
+-                                    BTRFS_MAX_DEVS_SYS_CHUNK);
+-      } else {
+-              BUG();
+-      }
++      space_info = btrfs_find_space_info(fs_devices->fs_info, ctl->type);
++      ASSERT(space_info);
++
++      ctl->max_chunk_size = READ_ONCE(space_info->chunk_size);
++      ctl->max_stripe_size = ctl->max_chunk_size;
++
++      if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM)
++              ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK);
+       /* We don't want a chunk larger than 10% of writable space */
+       ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-tree-log-make-the-return-value-for-log-syncing.patch b/queue-5.18/btrfs-tree-log-make-the-return-value-for-log-syncing.patch
new file mode 100644 (file)
index 0000000..3dba67e
--- /dev/null
@@ -0,0 +1,142 @@
+From f844f5c9f2598302a23d546bfe15914ee71b9c29 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jun 2022 15:09:48 -0400
+Subject: btrfs: tree-log: make the return value for log syncing consistent
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit f31f09f6be1c6c1a673e0566e258281a7bbaaa51 ]
+
+Currently we will return 1 or -EAGAIN if we decide we need to commit
+the transaction rather than sync the log.  In practice this doesn't
+really matter, we interpret any !0 and !BTRFS_NO_LOG_SYNC as needing to
+commit the transaction.  However this makes it hard to figure out what
+the correct thing to do is.
+
+Fix this up by defining BTRFS_LOG_FORCE_COMMIT and using this in all the
+places where we want to force the transaction to be committed.
+
+CC: stable@vger.kernel.org # 5.15+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/file.c     |  2 +-
+ fs/btrfs/tree-log.c | 18 +++++++++---------
+ fs/btrfs/tree-log.h |  3 +++
+ 3 files changed, 13 insertions(+), 10 deletions(-)
+
+diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
+index 153920acd226..2d24f2dcc0ea 100644
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2344,7 +2344,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
+       btrfs_release_log_ctx_extents(&ctx);
+       if (ret < 0) {
+               /* Fallthrough and commit/free transaction. */
+-              ret = 1;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+       }
+       /* we've logged all the items and now have a consistent
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index e65633686378..08917069a125 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -171,7 +171,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
+               int index = (root->log_transid + 1) % 2;
+               if (btrfs_need_log_full_commit(trans)) {
+-                      ret = -EAGAIN;
++                      ret = BTRFS_LOG_FORCE_COMMIT;
+                       goto out;
+               }
+@@ -194,7 +194,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
+                * writing.
+                */
+               if (zoned && !created) {
+-                      ret = -EAGAIN;
++                      ret = BTRFS_LOG_FORCE_COMMIT;
+                       goto out;
+               }
+@@ -3122,7 +3122,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+       /* bail out if we need to do a full commit */
+       if (btrfs_need_log_full_commit(trans)) {
+-              ret = -EAGAIN;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               mutex_unlock(&root->log_mutex);
+               goto out;
+       }
+@@ -3223,7 +3223,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+               }
+               btrfs_wait_tree_log_extents(log, mark);
+               mutex_unlock(&log_root_tree->log_mutex);
+-              ret = -EAGAIN;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               goto out;
+       }
+@@ -3262,7 +3262,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+               blk_finish_plug(&plug);
+               btrfs_wait_tree_log_extents(log, mark);
+               mutex_unlock(&log_root_tree->log_mutex);
+-              ret = -EAGAIN;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               goto out_wake_log_root;
+       }
+@@ -5849,7 +5849,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
+           inode_only == LOG_INODE_ALL &&
+           inode->last_unlink_trans >= trans->transid) {
+               btrfs_set_log_full_commit(trans);
+-              ret = 1;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               goto out_unlock;
+       }
+@@ -6563,12 +6563,12 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
+       bool log_dentries = false;
+       if (btrfs_test_opt(fs_info, NOTREELOG)) {
+-              ret = 1;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               goto end_no_trans;
+       }
+       if (btrfs_root_refs(&root->root_item) == 0) {
+-              ret = 1;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               goto end_no_trans;
+       }
+@@ -6666,7 +6666,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
+ end_trans:
+       if (ret < 0) {
+               btrfs_set_log_full_commit(trans);
+-              ret = 1;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+       }
+       if (ret)
+diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
+index 1620f8170629..57ab5f3b8dc7 100644
+--- a/fs/btrfs/tree-log.h
++++ b/fs/btrfs/tree-log.h
+@@ -12,6 +12,9 @@
+ /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
+ #define BTRFS_NO_LOG_SYNC 256
++/* We can't use the tree log for whatever reason, force a transaction commit */
++#define BTRFS_LOG_FORCE_COMMIT                                (1)
++
+ struct btrfs_log_ctx {
+       int log_ret;
+       int log_transid;
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-zoned-activate-metadata-block-group-on-flush_s.patch b/queue-5.18/btrfs-zoned-activate-metadata-block-group-on-flush_s.patch
new file mode 100644 (file)
index 0000000..5446c35
--- /dev/null
@@ -0,0 +1,180 @@
+From d866ac5585bc1afadfcc6bb5649813cd1f0f82ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:47 +0900
+Subject: btrfs: zoned: activate metadata block group on flush_space
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit b0931513913633044ed6e3800334c28433c007b0 ]
+
+For metadata space on zoned filesystem, reaching ALLOC_CHUNK{,_FORCE}
+means we don't have enough space left in the active_total_bytes. Before
+allocating a new chunk, we can try to activate an existing block group
+in this case.
+
+Also, allocating a chunk is not enough to grant a ticket for metadata
+space on zoned filesystem we need to activate the block group to
+increase the active_total_bytes.
+
+btrfs_zoned_activate_one_bg() implements the activation feature. It will
+activate a block group by (maybe) finishing a block group. It will give up
+activating a block group if it cannot finish any block group.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/space-info.c | 30 ++++++++++++++++++++++++
+ fs/btrfs/zoned.c      | 53 +++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/zoned.h      | 10 ++++++++
+ 3 files changed, 93 insertions(+)
+
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index 4867199cf983..104cbc901c0e 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -9,6 +9,7 @@
+ #include "ordered-data.h"
+ #include "transaction.h"
+ #include "block-group.h"
++#include "zoned.h"
+ /*
+  * HOW DOES SPACE RESERVATION WORK
+@@ -724,6 +725,18 @@ static void flush_space(struct btrfs_fs_info *fs_info,
+               break;
+       case ALLOC_CHUNK:
+       case ALLOC_CHUNK_FORCE:
++              /*
++               * For metadata space on zoned filesystem, reaching here means we
++               * don't have enough space left in active_total_bytes. Try to
++               * activate a block group first, because we may have inactive
++               * block group already allocated.
++               */
++              ret = btrfs_zoned_activate_one_bg(fs_info, space_info, false);
++              if (ret < 0)
++                      break;
++              else if (ret == 1)
++                      break;
++
+               trans = btrfs_join_transaction(root);
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+@@ -734,6 +747,23 @@ static void flush_space(struct btrfs_fs_info *fs_info,
+                               (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
+                                       CHUNK_ALLOC_FORCE);
+               btrfs_end_transaction(trans);
++
++              /*
++               * For metadata space on zoned filesystem, allocating a new chunk
++               * is not enough. We still need to activate the block * group.
++               * Active the newly allocated block group by (maybe) finishing
++               * a block group.
++               */
++              if (ret == 1) {
++                      ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true);
++                      /*
++                       * Revert to the original ret regardless we could finish
++                       * one block group or not.
++                       */
++                      if (ret >= 0)
++                              ret = 1;
++              }
++
+               if (ret > 0 || ret == -ENOSPC)
+                       ret = 0;
+               break;
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 2ffc6d50d20d..0c2d81b0e3d3 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2222,3 +2222,56 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
+       return ret < 0 ? ret : 1;
+ }
++
++int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
++                              struct btrfs_space_info *space_info,
++                              bool do_finish)
++{
++      struct btrfs_block_group *bg;
++      int index;
++
++      if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
++              return 0;
++
++      /* No more block groups to activate */
++      if (space_info->active_total_bytes == space_info->total_bytes)
++              return 0;
++
++      for (;;) {
++              int ret;
++              bool need_finish = false;
++
++              down_read(&space_info->groups_sem);
++              for (index = 0; index < BTRFS_NR_RAID_TYPES; index++) {
++                      list_for_each_entry(bg, &space_info->block_groups[index],
++                                          list) {
++                              if (!spin_trylock(&bg->lock))
++                                      continue;
++                              if (btrfs_zoned_bg_is_full(bg) || bg->zone_is_active) {
++                                      spin_unlock(&bg->lock);
++                                      continue;
++                              }
++                              spin_unlock(&bg->lock);
++
++                              if (btrfs_zone_activate(bg)) {
++                                      up_read(&space_info->groups_sem);
++                                      return 1;
++                              }
++
++                              need_finish = true;
++                      }
++              }
++              up_read(&space_info->groups_sem);
++
++              if (!do_finish || !need_finish)
++                      break;
++
++              ret = btrfs_zone_finish_one_bg(fs_info);
++              if (ret == 0)
++                      break;
++              if (ret < 0)
++                      return ret;
++      }
++
++      return 0;
++}
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 0740458894ac..1cac32266276 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -80,6 +80,8 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
+ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
+                                      u64 length);
+ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
++int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
++                              struct btrfs_space_info *space_info, bool do_finish);
+ #else /* CONFIG_BLK_DEV_ZONED */
+ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+                                    struct blk_zone *zone)
+@@ -250,6 +252,14 @@ static inline int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
+       return 1;
+ }
++static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
++                                            struct btrfs_space_info *space_info,
++                                            bool do_finish)
++{
++      /* Consider all the block groups are active */
++      return 0;
++}
++
+ #endif
+ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-zoned-activate-necessary-block-group.patch b/queue-5.18/btrfs-zoned-activate-necessary-block-group.patch
new file mode 100644 (file)
index 0000000..e2166bc
--- /dev/null
@@ -0,0 +1,60 @@
+From eae937137d623d9fd942a7525307563e157bc79a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:48 +0900
+Subject: btrfs: zoned: activate necessary block group
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit b6a98021e4019c562a23ad151a7e40adfa9f91e5 ]
+
+There are two places where allocating a chunk is not enough. These two
+places are trying to ensure the space by allocating a chunk. To meet the
+condition for active_total_bytes, we also need to activate a block group
+there.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/block-group.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index 88f59a2e4113..0c7fe3142d7c 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -2659,6 +2659,14 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
+       ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+       if (ret < 0)
+               goto out;
++      /*
++       * We have allocated a new chunk. We also need to activate that chunk to
++       * grant metadata tickets for zoned filesystem.
++       */
++      ret = btrfs_zoned_activate_one_bg(fs_info, cache->space_info, true);
++      if (ret < 0)
++              goto out;
++
+       ret = inc_block_group_ro(cache, 0);
+       if (ret == -ETXTBSY)
+               goto unlock_out;
+@@ -3853,6 +3861,14 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
+               if (IS_ERR(bg)) {
+                       ret = PTR_ERR(bg);
+               } else {
++                      /*
++                       * We have a new chunk. We also need to activate it for
++                       * zoned filesystem.
++                       */
++                      ret = btrfs_zoned_activate_one_bg(fs_info, info, true);
++                      if (ret < 0)
++                              return;
++
+                       /*
+                        * If we fail to add the chunk item here, we end up
+                        * trying again at phase 2 of chunk allocation, at
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-zoned-disable-metadata-overcommit-for-zoned.patch b/queue-5.18/btrfs-zoned-disable-metadata-overcommit-for-zoned.patch
new file mode 100644 (file)
index 0000000..472b3b8
--- /dev/null
@@ -0,0 +1,46 @@
+From 28f8ac17eaf7d79fae9a491d13a968e609237c54 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:46 +0900
+Subject: btrfs: zoned: disable metadata overcommit for zoned
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 79417d040f4f77b19c701bccc23013b9cdac358d ]
+
+The metadata overcommit makes the space reservation flexible but it is also
+harmful to active zone tracking. Since we cannot finish a block group from
+the metadata allocation context, we might not activate a new block group
+and might not be able to actually write out the overcommit reservations.
+
+So, disable metadata overcommit for zoned filesystems. We will ensure
+the reservations are under active_total_bytes in the following patches.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/space-info.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index b87931a458eb..56a7c99fc03e 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -340,7 +340,10 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+               return 0;
+       used = btrfs_space_info_used(space_info, true);
+-      avail = calc_available_free_space(fs_info, space_info, flush);
++      if (btrfs_is_zoned(fs_info) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
++              avail = 0;
++      else
++              avail = calc_available_free_space(fs_info, space_info, flush);
+       if (used + bytes < space_info->total_bytes + avail)
+               return 1;
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-zoned-finish-least-available-block-group-on-da.patch b/queue-5.18/btrfs-zoned-finish-least-available-block-group-on-da.patch
new file mode 100644 (file)
index 0000000..1e379e7
--- /dev/null
@@ -0,0 +1,188 @@
+From fe36ff205f09ab9fb8e3d7405b25826217ca9aec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:44 +0900
+Subject: btrfs: zoned: finish least available block group on data bg
+ allocation
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 393f646e34c18b85d0f41272bfcbd475ae3a0d34 ]
+
+When we run out of active zones and no sufficient space is left in any
+block groups, we need to finish one block group to make room to activate a
+new block group.
+
+However, we cannot do this for metadata block groups because we can cause a
+deadlock by waiting for a running transaction commit. So, do that only for
+a data block group.
+
+Furthermore, the block group to be finished has two requirements. First,
+the block group must not have reserved bytes left. Having reserved bytes
+means we have an allocated region but did not yet send bios for it. If that
+region is allocated by the thread calling btrfs_zone_finish(), it results
+in a deadlock.
+
+Second, the block group to be finished must not be a SYSTEM block
+group. Finishing a SYSTEM block group easily breaks further chunk
+allocation by nullifying the SYSTEM free space.
+
+In a certain case, we cannot find any zone finish candidate or
+btrfs_zone_finish() may fail. In that case, we fall back to split the
+allocation bytes and fill the last spaces left in the block groups.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 50 +++++++++++++++++++++++++++++++++---------
+ fs/btrfs/zoned.c       | 40 +++++++++++++++++++++++++++++++++
+ fs/btrfs/zoned.h       |  7 ++++++
+ 3 files changed, 87 insertions(+), 10 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 8bdcbc0c6d60..bdebd77f31b4 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3985,6 +3985,45 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
+       }
+ }
++static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
++                                  struct find_free_extent_ctl *ffe_ctl)
++{
++      /* If we can activate new zone, just allocate a chunk and use it */
++      if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
++              return 0;
++
++      /*
++       * We already reached the max active zones. Try to finish one block
++       * group to make a room for a new block group. This is only possible
++       * for a data block group because btrfs_zone_finish() may need to wait
++       * for a running transaction which can cause a deadlock for metadata
++       * allocation.
++       */
++      if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
++              int ret = btrfs_zone_finish_one_bg(fs_info);
++
++              if (ret == 1)
++                      return 0;
++              else if (ret < 0)
++                      return ret;
++      }
++
++      /*
++       * If we have enough free space left in an already active block group
++       * and we can't activate any other zone now, do not allow allocating a
++       * new chunk and let find_free_extent() retry with a smaller size.
++       */
++      if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
++              return -ENOSPC;
++
++      /*
++       * We cannot activate a new block group and no enough space left in any
++       * block groups. So, allocating a new block group may not help. But,
++       * there is nothing to do anyway, so let's go with it.
++       */
++      return 0;
++}
++
+ static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
+                             struct find_free_extent_ctl *ffe_ctl)
+ {
+@@ -3992,16 +4031,7 @@ static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
+       case BTRFS_EXTENT_ALLOC_CLUSTERED:
+               return 0;
+       case BTRFS_EXTENT_ALLOC_ZONED:
+-              /*
+-               * If we have enough free space left in an already
+-               * active block group and we can't activate any other
+-               * zone now, do not allow allocating a new chunk and
+-               * let find_free_extent() retry with a smaller size.
+-               */
+-              if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
+-                  !btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
+-                      return -ENOSPC;
+-              return 0;
++              return can_allocate_chunk_zoned(fs_info, ffe_ctl);
+       default:
+               BUG();
+       }
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index a0bf2c20fa61..0a6a3d6f5af7 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2176,3 +2176,43 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
+       spin_unlock(&block_group->lock);
+       btrfs_put_block_group(block_group);
+ }
++
++int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
++{
++      struct btrfs_block_group *block_group;
++      struct btrfs_block_group *min_bg = NULL;
++      u64 min_avail = U64_MAX;
++      int ret;
++
++      spin_lock(&fs_info->zone_active_bgs_lock);
++      list_for_each_entry(block_group, &fs_info->zone_active_bgs,
++                          active_bg_list) {
++              u64 avail;
++
++              spin_lock(&block_group->lock);
++              if (block_group->reserved ||
++                  (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
++                      spin_unlock(&block_group->lock);
++                      continue;
++              }
++
++              avail = block_group->zone_capacity - block_group->alloc_offset;
++              if (min_avail > avail) {
++                      if (min_bg)
++                              btrfs_put_block_group(min_bg);
++                      min_bg = block_group;
++                      min_avail = avail;
++                      btrfs_get_block_group(min_bg);
++              }
++              spin_unlock(&block_group->lock);
++      }
++      spin_unlock(&fs_info->zone_active_bgs_lock);
++
++      if (!min_bg)
++              return 0;
++
++      ret = btrfs_zone_finish(min_bg);
++      btrfs_put_block_group(min_bg);
++
++      return ret < 0 ? ret : 1;
++}
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 2d6da8f4b55a..c424417e19bb 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -83,6 +83,7 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
+ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
+ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
+                                      u64 length);
++int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
+ #else /* CONFIG_BLK_DEV_ZONED */
+ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+                                    struct blk_zone *zone)
+@@ -247,6 +248,12 @@ static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
+ static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info,
+                                                    u64 logical, u64 length) { }
++
++static inline int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
++{
++      return 1;
++}
++
+ #endif
+ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-zoned-introduce-btrfs_zoned_bg_is_full.patch b/queue-5.18/btrfs-zoned-introduce-btrfs_zoned_bg_is_full.patch
new file mode 100644 (file)
index 0000000..e8cd214
--- /dev/null
@@ -0,0 +1,68 @@
+From 3672acc7c01efcf0c957c7949f4ff6dcaffde3f1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 May 2022 17:48:50 -0700
+Subject: btrfs: zoned: introduce btrfs_zoned_bg_is_full
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 1bfd476754a2d63f899ef9c3e253b17766b8fb73 ]
+
+Introduce a wrapper to check if all the space in a block group is
+allocated or not.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 3 +--
+ fs/btrfs/zoned.c       | 2 +-
+ fs/btrfs/zoned.h       | 6 ++++++
+ 3 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index bdebd77f31b4..56185541e188 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3803,8 +3803,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
+       /* Check RO and no space case before trying to activate it */
+       spin_lock(&block_group->lock);
+-      if (block_group->ro ||
+-          block_group->alloc_offset == block_group->zone_capacity) {
++      if (block_group->ro || btrfs_zoned_bg_is_full(block_group)) {
+               ret = 1;
+               /*
+                * May need to clear fs_info->{treelog,data_reloc}_bg.
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 0a6a3d6f5af7..170681797283 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1859,7 +1859,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+       }
+       /* No space left */
+-      if (block_group->alloc_offset == block_group->zone_capacity) {
++      if (btrfs_zoned_bg_is_full(block_group)) {
+               ret = false;
+               goto out_unlock;
+       }
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 199b69670fa2..0740458894ac 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -384,4 +384,10 @@ static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
+               mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock);
+ }
++static inline bool btrfs_zoned_bg_is_full(const struct btrfs_block_group *bg)
++{
++      ASSERT(btrfs_is_zoned(bg->fs_info));
++      return (bg->alloc_offset == bg->zone_capacity);
++}
++
+ #endif
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-zoned-introduce-space_info-active_total_bytes.patch b/queue-5.18/btrfs-zoned-introduce-space_info-active_total_bytes.patch
new file mode 100644 (file)
index 0000000..d89dee7
--- /dev/null
@@ -0,0 +1,257 @@
+From 24698349d4bc8dc722b1db3ab3979252e0b38fe3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:45 +0900
+Subject: btrfs: zoned: introduce space_info->active_total_bytes
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 6a921de589926a350634e6e279f43fa5b9dbf5ba ]
+
+The active_total_bytes, like the total_bytes, accounts for the total bytes
+of active block groups in the space_info.
+
+With an introduction of active_total_bytes, we can check if the reserved
+bytes can be written to the block groups without activating a new block
+group. The check is necessary for metadata allocation on zoned
+filesystem. We cannot finish a block group, which may require waiting
+for the current transaction, from the metadata allocation context.
+Instead, we need to ensure the ongoing allocation (reserved bytes) fits
+in active block groups.
+
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/block-group.c | 12 +++++++++---
+ fs/btrfs/space-info.c  | 41 ++++++++++++++++++++++++++++++++---------
+ fs/btrfs/space-info.h  |  4 +++-
+ fs/btrfs/zoned.c       |  6 ++++++
+ 4 files changed, 50 insertions(+), 13 deletions(-)
+
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index 1deca5164c23..88f59a2e4113 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -1033,8 +1033,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
+                       < block_group->zone_unusable);
+               WARN_ON(block_group->space_info->disk_total
+                       < block_group->length * factor);
++              WARN_ON(block_group->zone_is_active &&
++                      block_group->space_info->active_total_bytes
++                      < block_group->length);
+       }
+       block_group->space_info->total_bytes -= block_group->length;
++      if (block_group->zone_is_active)
++              block_group->space_info->active_total_bytes -= block_group->length;
+       block_group->space_info->bytes_readonly -=
+               (block_group->length - block_group->zone_unusable);
+       block_group->space_info->bytes_zone_unusable -=
+@@ -2102,7 +2107,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
+       trace_btrfs_add_block_group(info, cache, 0);
+       btrfs_update_space_info(info, cache->flags, cache->length,
+                               cache->used, cache->bytes_super,
+-                              cache->zone_unusable, &space_info);
++                              cache->zone_unusable, cache->zone_is_active,
++                              &space_info);
+       cache->space_info = space_info;
+@@ -2172,7 +2178,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
+               }
+               btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
+-                                      0, 0, &space_info);
++                                      0, 0, false, &space_info);
+               bg->space_info = space_info;
+               link_block_group(bg);
+@@ -2553,7 +2559,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
+       trace_btrfs_add_block_group(fs_info, cache, 1);
+       btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
+                               cache->bytes_super, cache->zone_unusable,
+-                              &cache->space_info);
++                              cache->zone_is_active, &cache->space_info);
+       btrfs_update_global_block_rsv(fs_info);
+       link_block_group(cache);
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index 98a84b523be6..4867199cf983 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -295,7 +295,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
+ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+                            u64 total_bytes, u64 bytes_used,
+                            u64 bytes_readonly, u64 bytes_zone_unusable,
+-                           struct btrfs_space_info **space_info)
++                           bool active, struct btrfs_space_info **space_info)
+ {
+       struct btrfs_space_info *found;
+       int factor;
+@@ -306,6 +306,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+       ASSERT(found);
+       spin_lock(&found->lock);
+       found->total_bytes += total_bytes;
++      if (active)
++              found->active_total_bytes += total_bytes;
+       found->disk_total += total_bytes * factor;
+       found->bytes_used += bytes_used;
+       found->disk_used += bytes_used * factor;
+@@ -369,6 +371,22 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
+       return avail;
+ }
++static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info,
++                                     struct btrfs_space_info *space_info)
++{
++      /*
++       * On regular filesystem, all total_bytes are always writable. On zoned
++       * filesystem, there may be a limitation imposed by max_active_zones.
++       * For metadata allocation, we cannot finish an existing active block
++       * group to avoid a deadlock. Thus, we need to consider only the active
++       * groups to be writable for metadata space.
++       */
++      if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
++              return space_info->total_bytes;
++
++      return space_info->active_total_bytes;
++}
++
+ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+                        struct btrfs_space_info *space_info, u64 bytes,
+                        enum btrfs_reserve_flush_enum flush)
+@@ -386,7 +404,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+       else
+               avail = calc_available_free_space(fs_info, space_info, flush);
+-      if (used + bytes < space_info->total_bytes + avail)
++      if (used + bytes < writable_total_bytes(fs_info, space_info) + avail)
+               return 1;
+       return 0;
+ }
+@@ -422,7 +440,7 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
+               ticket = list_first_entry(head, struct reserve_ticket, list);
+               /* Check and see if our ticket can be satisfied now. */
+-              if ((used + ticket->bytes <= space_info->total_bytes) ||
++              if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) ||
+                   btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
+                                        flush)) {
+                       btrfs_space_info_update_bytes_may_use(fs_info,
+@@ -753,6 +771,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+ {
+       u64 used;
+       u64 avail;
++      u64 total;
+       u64 to_reclaim = space_info->reclaim_size;
+       lockdep_assert_held(&space_info->lock);
+@@ -767,8 +786,9 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+        * space.  If that's the case add in our overage so we make sure to put
+        * appropriate pressure on the flushing state machine.
+        */
+-      if (space_info->total_bytes + avail < used)
+-              to_reclaim += used - (space_info->total_bytes + avail);
++      total = writable_total_bytes(fs_info, space_info);
++      if (total + avail < used)
++              to_reclaim += used - (total + avail);
+       return to_reclaim;
+ }
+@@ -778,9 +798,12 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
+ {
+       u64 global_rsv_size = fs_info->global_block_rsv.reserved;
+       u64 ordered, delalloc;
+-      u64 thresh = div_factor_fine(space_info->total_bytes, 90);
++      u64 total = writable_total_bytes(fs_info, space_info);
++      u64 thresh;
+       u64 used;
++      thresh = div_factor_fine(total, 90);
++
+       lockdep_assert_held(&space_info->lock);
+       /* If we're just plain full then async reclaim just slows us down. */
+@@ -842,8 +865,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
+                                          BTRFS_RESERVE_FLUSH_ALL);
+       used = space_info->bytes_used + space_info->bytes_reserved +
+              space_info->bytes_readonly + global_rsv_size;
+-      if (used < space_info->total_bytes)
+-              thresh += space_info->total_bytes - used;
++      if (used < total)
++              thresh += total - used;
+       thresh >>= space_info->clamp;
+       used = space_info->bytes_pinned;
+@@ -1560,7 +1583,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
+        * can_overcommit() to ensure we can overcommit to continue.
+        */
+       if (!pending_tickets &&
+-          ((used + orig_bytes <= space_info->total_bytes) ||
++          ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) ||
+            btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
+               btrfs_space_info_update_bytes_may_use(fs_info, space_info,
+                                                     orig_bytes);
+diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
+index 137206b8049f..b8cee27df213 100644
+--- a/fs/btrfs/space-info.h
++++ b/fs/btrfs/space-info.h
+@@ -17,6 +17,8 @@ struct btrfs_space_info {
+       u64 bytes_may_use;      /* number of bytes that may be used for
+                                  delalloc/allocations */
+       u64 bytes_readonly;     /* total bytes that are read only */
++      /* Total bytes in the space, but only accounts active block groups. */
++      u64 active_total_bytes;
+       u64 bytes_zone_unusable;        /* total bytes that are unusable until
+                                          resetting the device zone */
+@@ -122,7 +124,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
+ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+                            u64 total_bytes, u64 bytes_used,
+                            u64 bytes_readonly, u64 bytes_zone_unusable,
+-                           struct btrfs_space_info **space_info);
++                           bool active, struct btrfs_space_info **space_info);
+ void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
+                                       u64 chunk_size);
+ struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 170681797283..2ffc6d50d20d 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1841,6 +1841,7 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
+ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+ {
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
++      struct btrfs_space_info *space_info = block_group->space_info;
+       struct map_lookup *map;
+       struct btrfs_device *device;
+       u64 physical;
+@@ -1852,6 +1853,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+       map = block_group->physical_map;
++      spin_lock(&space_info->lock);
+       spin_lock(&block_group->lock);
+       if (block_group->zone_is_active) {
+               ret = true;
+@@ -1880,7 +1882,10 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+       /* Successfully activated all the zones */
+       block_group->zone_is_active = 1;
++      space_info->active_total_bytes += block_group->length;
+       spin_unlock(&block_group->lock);
++      btrfs_try_granting_tickets(fs_info, space_info);
++      spin_unlock(&space_info->lock);
+       /* For the active block group list */
+       btrfs_get_block_group(block_group);
+@@ -1893,6 +1898,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+ out_unlock:
+       spin_unlock(&block_group->lock);
++      spin_unlock(&space_info->lock);
+       return ret;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-zoned-revive-max_zone_append_bytes.patch b/queue-5.18/btrfs-zoned-revive-max_zone_append_bytes.patch
new file mode 100644 (file)
index 0000000..641ef5e
--- /dev/null
@@ -0,0 +1,108 @@
+From 33903083ab3fd9bee96c3d8ade9c045e2b61060a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:39 +0900
+Subject: btrfs: zoned: revive max_zone_append_bytes
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit c2ae7b772ef4e86c5ddf3fd47bf59045ae96a414 ]
+
+This patch is basically a revert of commit 5a80d1c6a270 ("btrfs: zoned:
+remove max_zone_append_size logic"), but without unnecessary ASSERT and
+check. The max_zone_append_size will be used as a hint to estimate the
+number of extents to cover delalloc/writeback region in the later commits.
+
+The size of a ZONE APPEND bio is also limited by queue_max_segments(), so
+this commit considers it to calculate max_zone_append_size. Technically, a
+bio can be larger than queue_max_segments() * PAGE_SIZE if the pages are
+contiguous. But, it is safe to consider "queue_max_segments() * PAGE_SIZE"
+as an upper limit of an extent size to calculate the number of extents
+needed to write data.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h |  2 ++
+ fs/btrfs/zoned.c | 17 +++++++++++++++++
+ fs/btrfs/zoned.h |  1 +
+ 3 files changed, 20 insertions(+)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 077c95e9baa5..1c377bcfe787 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1050,6 +1050,8 @@ struct btrfs_fs_info {
+               u64 zoned;
+       };
++      /* Max size to emit ZONE_APPEND write command */
++      u64 max_zone_append_size;
+       struct mutex zoned_meta_io_lock;
+       spinlock_t treelog_bg_lock;
+       u64 treelog_bg;
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 84b6d39509bd..1d5b9308f5ef 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -407,6 +407,16 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
+       nr_sectors = bdev_nr_sectors(bdev);
+       zone_info->zone_size_shift = ilog2(zone_info->zone_size);
+       zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
++      /*
++       * We limit max_zone_append_size also by max_segments *
++       * PAGE_SIZE. Technically, we can have multiple pages per segment. But,
++       * since btrfs adds the pages one by one to a bio, and btrfs cannot
++       * increase the metadata reservation even if it increases the number of
++       * extents, it is safe to stick with the limit.
++       */
++      zone_info->max_zone_append_size =
++              min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
++                    (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
+       if (!IS_ALIGNED(nr_sectors, zone_sectors))
+               zone_info->nr_zones++;
+@@ -632,6 +642,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+       u64 zoned_devices = 0;
+       u64 nr_devices = 0;
+       u64 zone_size = 0;
++      u64 max_zone_append_size = 0;
+       const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED);
+       int ret = 0;
+@@ -666,6 +677,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+                               ret = -EINVAL;
+                               goto out;
+                       }
++                      if (!max_zone_append_size ||
++                          (zone_info->max_zone_append_size &&
++                           zone_info->max_zone_append_size < max_zone_append_size))
++                              max_zone_append_size =
++                                      zone_info->max_zone_append_size;
+               }
+               nr_devices++;
+       }
+@@ -715,6 +731,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+       }
+       fs_info->zone_size = zone_size;
++      fs_info->max_zone_append_size = max_zone_append_size;
+       fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
+       /*
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index cf6320feef46..2d6da8f4b55a 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -23,6 +23,7 @@ struct btrfs_zoned_device_info {
+        */
+       u64 zone_size;
+       u8  zone_size_shift;
++      u64 max_zone_append_size;
+       u32 nr_zones;
+       unsigned int max_active_zones;
+       atomic_t active_zones_left;
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-zoned-wait-until-zone-is-finished-when-allocat.patch b/queue-5.18/btrfs-zoned-wait-until-zone-is-finished-when-allocat.patch
new file mode 100644 (file)
index 0000000..454c96a
--- /dev/null
@@ -0,0 +1,114 @@
+From bfdbe30121dfbc58093980adb193543c02b438ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:50 +0900
+Subject: btrfs: zoned: wait until zone is finished when allocation didn't
+ progress
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 2ce543f478433a0eec0f72090d7e814f1d53d456 ]
+
+When the allocated position doesn't progress, we cannot submit IOs to
+finish a block group, but there should be ongoing IOs that will finish a
+block group. So, in that case, we wait for a zone to be finished and retry
+the allocation after that.
+
+Introduce a new flag BTRFS_FS_NEED_ZONE_FINISH for fs_info->flags to
+indicate we need a zone finish to have proceeded. The flag is set when the
+allocator detected it cannot activate a new block group. And, it is cleared
+once a zone is finished.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h   | 5 +++++
+ fs/btrfs/disk-io.c | 1 +
+ fs/btrfs/inode.c   | 9 +++++++--
+ fs/btrfs/zoned.c   | 6 ++++++
+ 4 files changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 97f5a3d320ff..76fbe4cf2a28 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -635,6 +635,9 @@ enum {
+       /* Indicate we have half completed snapshot deletions pending. */
+       BTRFS_FS_UNFINISHED_DROPS,
++      /* Indicate we have to finish a zone to do next allocation. */
++      BTRFS_FS_NEED_ZONE_FINISH,
++
+ #if BITS_PER_LONG == 32
+       /* Indicate if we have error/warn message printed on 32bit systems */
+       BTRFS_FS_32BIT_ERROR,
+@@ -1074,6 +1077,8 @@ struct btrfs_fs_info {
+       spinlock_t zone_active_bgs_lock;
+       struct list_head zone_active_bgs;
++      /* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
++      wait_queue_head_t zone_finish_wait;
+ #ifdef CONFIG_BTRFS_FS_REF_VERIFY
+       spinlock_t ref_verify_lock;
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index bf5c6ac67e87..59fa7bf3a2e5 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3239,6 +3239,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
+       init_waitqueue_head(&fs_info->transaction_blocked_wait);
+       init_waitqueue_head(&fs_info->async_submit_wait);
+       init_waitqueue_head(&fs_info->delayed_iputs_wait);
++      init_waitqueue_head(&fs_info->zone_finish_wait);
+       /* Usable values until the real ones are cached from the superblock */
+       fs_info->nodesize = 4096;
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 9753fc47e488..64d310ecbb84 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1606,8 +1606,13 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
+               if (ret == 0)
+                       done_offset = end;
+-              if (done_offset == start)
+-                      return -ENOSPC;
++              if (done_offset == start) {
++                      struct btrfs_fs_info *info = inode->root->fs_info;
++
++                      wait_var_event(&info->zone_finish_wait,
++                                     !test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags));
++                      continue;
++              }
+               if (!locked_page_done) {
+                       __set_page_dirty_nobuffers(locked_page);
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 0c2d81b0e3d3..45e29b8c705c 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1993,6 +1993,9 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
+       /* For active_bg_list */
+       btrfs_put_block_group(block_group);
++      clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
++      wake_up_all(&fs_info->zone_finish_wait);
++
+       return 0;
+ }
+@@ -2021,6 +2024,9 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
+       }
+       mutex_unlock(&fs_info->chunk_mutex);
++      if (!ret)
++              set_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
++
+       return ret;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.18/btrfs-zoned-write-out-partially-allocated-region.patch b/queue-5.18/btrfs-zoned-write-out-partially-allocated-region.patch
new file mode 100644 (file)
index 0000000..30718fd
--- /dev/null
@@ -0,0 +1,186 @@
+From e3d143c47bd2fd49137af62c599571f2ab7620d6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:49 +0900
+Subject: btrfs: zoned: write out partially allocated region
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 898793d992c23dac6126a6a94ad893eae1a2c9df ]
+
+cow_file_range() works in an all-or-nothing way: if it fails to allocate an
+extent for a part of the given region, it gives up all the region including
+the successfully allocated parts. On cow_file_range(), run_delalloc_zoned()
+writes data for the region only when it successfully allocate all the
+region.
+
+This all-or-nothing allocation and write-out are problematic when available
+space in all the block groups are get tight with the active zone
+restriction. btrfs_reserve_extent() try hard to utilize the left space in
+the active block groups and gives up finally and fails with
+-ENOSPC. However, if we send IOs for the successfully allocated region, we
+can finish a zone and can continue on the rest of the allocation on a newly
+allocated block group.
+
+This patch implements the partial write-out for run_delalloc_zoned(). With
+this patch applied, cow_file_range() returns -EAGAIN to tell the caller to
+do something to progress the further allocation, and tells the successfully
+allocated region with done_offset. Furthermore, the zoned extent allocator
+returns -EAGAIN to tell cow_file_range() going back to the caller side.
+
+Actually, we still need to wait for an IO to complete to continue the
+allocation. The next patch implements that part.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 10 +++++++
+ fs/btrfs/inode.c       | 63 ++++++++++++++++++++++++++++++++----------
+ 2 files changed, 59 insertions(+), 14 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 56185541e188..eee68a6f2be7 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4015,6 +4015,16 @@ static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
+       if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
+               return -ENOSPC;
++      /*
++       * Even min_alloc_size is not left in any block groups. Since we cannot
++       * activate a new block group, allocating it may not help. Let's tell a
++       * caller to try again and hope it progress something by writing some
++       * parts of the region. That is only possible for data block groups,
++       * where a part of the region can be written.
++       */
++      if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
++              return -EAGAIN;
++
+       /*
+        * We cannot activate a new block group and no enough space left in any
+        * block groups. So, allocating a new block group may not help. But,
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index c50288d90c66..9753fc47e488 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -92,7 +92,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
+ static noinline int cow_file_range(struct btrfs_inode *inode,
+                                  struct page *locked_page,
+                                  u64 start, u64 end, int *page_started,
+-                                 unsigned long *nr_written, int unlock);
++                                 unsigned long *nr_written, int unlock,
++                                 u64 *done_offset);
+ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
+                                      u64 len, u64 orig_start, u64 block_start,
+                                      u64 block_len, u64 orig_block_len,
+@@ -884,7 +885,7 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
+        * can directly submit them without interruption.
+        */
+       ret = cow_file_range(inode, locked_page, start, end, &page_started,
+-                           &nr_written, 0);
++                           &nr_written, 0, NULL);
+       /* Inline extent inserted, page gets unlocked and everything is done */
+       if (page_started) {
+               ret = 0;
+@@ -1133,7 +1134,8 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
+ static noinline int cow_file_range(struct btrfs_inode *inode,
+                                  struct page *locked_page,
+                                  u64 start, u64 end, int *page_started,
+-                                 unsigned long *nr_written, int unlock)
++                                 unsigned long *nr_written, int unlock,
++                                 u64 *done_offset)
+ {
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+@@ -1326,6 +1328,21 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+       btrfs_dec_block_group_reservations(fs_info, ins.objectid);
+       btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
+ out_unlock:
++      /*
++       * If done_offset is non-NULL and ret == -EAGAIN, we expect the
++       * caller to write out the successfully allocated region and retry.
++       */
++      if (done_offset && ret == -EAGAIN) {
++              if (orig_start < start)
++                      *done_offset = start - 1;
++              else
++                      *done_offset = start;
++              return ret;
++      } else if (ret == -EAGAIN) {
++              /* Convert to -ENOSPC since the caller cannot retry. */
++              ret = -ENOSPC;
++      }
++
+       /*
+        * Now, we have three regions to clean up:
+        *
+@@ -1571,19 +1588,37 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
+                                      u64 end, int *page_started,
+                                      unsigned long *nr_written)
+ {
++      u64 done_offset = end;
+       int ret;
++      bool locked_page_done = false;
+-      ret = cow_file_range(inode, locked_page, start, end, page_started,
+-                           nr_written, 0);
+-      if (ret)
+-              return ret;
++      while (start <= end) {
++              ret = cow_file_range(inode, locked_page, start, end, page_started,
++                                   nr_written, 0, &done_offset);
++              if (ret && ret != -EAGAIN)
++                      return ret;
+-      if (*page_started)
+-              return 0;
++              if (*page_started) {
++                      ASSERT(ret == 0);
++                      return 0;
++              }
++
++              if (ret == 0)
++                      done_offset = end;
++
++              if (done_offset == start)
++                      return -ENOSPC;
++
++              if (!locked_page_done) {
++                      __set_page_dirty_nobuffers(locked_page);
++                      account_page_redirty(locked_page);
++              }
++              locked_page_done = true;
++              extent_write_locked_range(&inode->vfs_inode, start, done_offset);
++
++              start = done_offset + 1;
++      }
+-      __set_page_dirty_nobuffers(locked_page);
+-      account_page_redirty(locked_page);
+-      extent_write_locked_range(&inode->vfs_inode, start, end);
+       *page_started = 1;
+       return 0;
+@@ -1675,7 +1710,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
+       }
+       return cow_file_range(inode, locked_page, start, end, page_started,
+-                            nr_written, 1);
++                            nr_written, 1, NULL);
+ }
+ /*
+@@ -2086,7 +2121,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
+                                                page_started, nr_written);
+               else
+                       ret = cow_file_range(inode, locked_page, start, end,
+-                                           page_started, nr_written, 1);
++                                           page_started, nr_written, 1, NULL);
+       } else {
+               set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
+               ret = cow_file_range_async(inode, wbc, locked_page, start, end,
+-- 
+2.35.1
+
diff --git a/queue-5.18/crypto-blake2s-remove-shash-module.patch b/queue-5.18/crypto-blake2s-remove-shash-module.patch
new file mode 100644 (file)
index 0000000..8f4b10a
--- /dev/null
@@ -0,0 +1,957 @@
+From 808c1dca59fc32bd267c25d387bbc56a55144ccb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 28 May 2022 21:44:07 +0200
+Subject: crypto: blake2s - remove shash module
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+[ Upstream commit 2d16803c562ecc644803d42ba98a8e0aef9c014e ]
+
+BLAKE2s has no currently known use as an shash. Just remove all of this
+unnecessary plumbing. Removing this shash was something we talked about
+back when we were making BLAKE2s a built-in, but I simply never got
+around to doing it. So this completes that project.
+
+Importantly, this fixs a bug in which the lib code depends on
+crypto_simd_disabled_for_test, causing linker errors.
+
+Also add more alignment tests to the selftests and compare SIMD and
+non-SIMD compression functions, to make up for what we lose from
+testmgr.c.
+
+Reported-by: gaochao <gaochao49@huawei.com>
+Cc: Eric Biggers <ebiggers@kernel.org>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: stable@vger.kernel.org
+Fixes: 6048fdcc5f26 ("lib/crypto: blake2s: include as built-in")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/crypto/Kconfig           |   2 +-
+ arch/arm/crypto/Makefile          |   4 +-
+ arch/arm/crypto/blake2s-shash.c   |  75 -----------
+ arch/x86/crypto/Makefile          |   4 +-
+ arch/x86/crypto/blake2s-glue.c    |   3 +-
+ arch/x86/crypto/blake2s-shash.c   |  77 -----------
+ crypto/Kconfig                    |  20 +--
+ crypto/Makefile                   |   1 -
+ crypto/blake2s_generic.c          |  75 -----------
+ crypto/tcrypt.c                   |  12 --
+ crypto/testmgr.c                  |  24 ----
+ crypto/testmgr.h                  | 217 ------------------------------
+ include/crypto/internal/blake2s.h | 108 ---------------
+ lib/crypto/blake2s-selftest.c     |  41 ++++++
+ lib/crypto/blake2s.c              |  37 ++++-
+ 15 files changed, 76 insertions(+), 624 deletions(-)
+ delete mode 100644 arch/arm/crypto/blake2s-shash.c
+ delete mode 100644 arch/x86/crypto/blake2s-shash.c
+ delete mode 100644 crypto/blake2s_generic.c
+
+diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
+index e4dba5461cb3..149a5bd6b88c 100644
+--- a/arch/arm/crypto/Kconfig
++++ b/arch/arm/crypto/Kconfig
+@@ -63,7 +63,7 @@ config CRYPTO_SHA512_ARM
+         using optimized ARM assembler and NEON, when available.
+ config CRYPTO_BLAKE2S_ARM
+-      tristate "BLAKE2s digest algorithm (ARM)"
++      bool "BLAKE2s digest algorithm (ARM)"
+       select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+       help
+         BLAKE2s digest algorithm optimized with ARM scalar instructions.  This
+diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
+index 0274f81cc8ea..971e74546fb1 100644
+--- a/arch/arm/crypto/Makefile
++++ b/arch/arm/crypto/Makefile
+@@ -9,8 +9,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
+ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
+ obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
+ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
+-obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o
+-obj-$(if $(CONFIG_CRYPTO_BLAKE2S_ARM),y) += libblake2s-arm.o
++obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o
+ obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
+ obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+ obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
+@@ -32,7 +31,6 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
+ sha256-arm-y  := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
+ sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
+ sha512-arm-y  := sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
+-blake2s-arm-y   := blake2s-shash.o
+ libblake2s-arm-y:= blake2s-core.o blake2s-glue.o
+ blake2b-neon-y  := blake2b-neon-core.o blake2b-neon-glue.o
+ sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
+diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c
+deleted file mode 100644
+index 763c73beea2d..000000000000
+--- a/arch/arm/crypto/blake2s-shash.c
++++ /dev/null
+@@ -1,75 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * BLAKE2s digest algorithm, ARM scalar implementation
+- *
+- * Copyright 2020 Google LLC
+- */
+-
+-#include <crypto/internal/blake2s.h>
+-#include <crypto/internal/hash.h>
+-
+-#include <linux/module.h>
+-
+-static int crypto_blake2s_update_arm(struct shash_desc *desc,
+-                                   const u8 *in, unsigned int inlen)
+-{
+-      return crypto_blake2s_update(desc, in, inlen, false);
+-}
+-
+-static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
+-{
+-      return crypto_blake2s_final(desc, out, false);
+-}
+-
+-#define BLAKE2S_ALG(name, driver_name, digest_size)                   \
+-      {                                                               \
+-              .base.cra_name          = name,                         \
+-              .base.cra_driver_name   = driver_name,                  \
+-              .base.cra_priority      = 200,                          \
+-              .base.cra_flags         = CRYPTO_ALG_OPTIONAL_KEY,      \
+-              .base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,           \
+-              .base.cra_ctxsize       = sizeof(struct blake2s_tfm_ctx), \
+-              .base.cra_module        = THIS_MODULE,                  \
+-              .digestsize             = digest_size,                  \
+-              .setkey                 = crypto_blake2s_setkey,        \
+-              .init                   = crypto_blake2s_init,          \
+-              .update                 = crypto_blake2s_update_arm,    \
+-              .final                  = crypto_blake2s_final_arm,     \
+-              .descsize               = sizeof(struct blake2s_state), \
+-      }
+-
+-static struct shash_alg blake2s_arm_algs[] = {
+-      BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE),
+-};
+-
+-static int __init blake2s_arm_mod_init(void)
+-{
+-      return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+-              crypto_register_shashes(blake2s_arm_algs,
+-                                      ARRAY_SIZE(blake2s_arm_algs)) : 0;
+-}
+-
+-static void __exit blake2s_arm_mod_exit(void)
+-{
+-      if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
+-              crypto_unregister_shashes(blake2s_arm_algs,
+-                                        ARRAY_SIZE(blake2s_arm_algs));
+-}
+-
+-module_init(blake2s_arm_mod_init);
+-module_exit(blake2s_arm_mod_exit);
+-
+-MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation");
+-MODULE_LICENSE("GPL");
+-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+-MODULE_ALIAS_CRYPTO("blake2s-128");
+-MODULE_ALIAS_CRYPTO("blake2s-128-arm");
+-MODULE_ALIAS_CRYPTO("blake2s-160");
+-MODULE_ALIAS_CRYPTO("blake2s-160-arm");
+-MODULE_ALIAS_CRYPTO("blake2s-224");
+-MODULE_ALIAS_CRYPTO("blake2s-224-arm");
+-MODULE_ALIAS_CRYPTO("blake2s-256");
+-MODULE_ALIAS_CRYPTO("blake2s-256-arm");
+diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
+index 2831685adf6f..8ed4597fdf6a 100644
+--- a/arch/x86/crypto/Makefile
++++ b/arch/x86/crypto/Makefile
+@@ -61,9 +61,7 @@ sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o
+ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
+ sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
+-obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
+-blake2s-x86_64-y := blake2s-shash.o
+-obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o
++obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o
+ libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o
+ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
+diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
+index 69853c13e8fb..aaba21230528 100644
+--- a/arch/x86/crypto/blake2s-glue.c
++++ b/arch/x86/crypto/blake2s-glue.c
+@@ -4,7 +4,6 @@
+  */
+ #include <crypto/internal/blake2s.h>
+-#include <crypto/internal/simd.h>
+ #include <linux/types.h>
+ #include <linux/jump_label.h>
+@@ -33,7 +32,7 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
+       /* SIMD disables preemption, so relax after processing each page. */
+       BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
+-      if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
++      if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
+               blake2s_compress_generic(state, block, nblocks, inc);
+               return;
+       }
+diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c
+deleted file mode 100644
+index 59ae28abe35c..000000000000
+--- a/arch/x86/crypto/blake2s-shash.c
++++ /dev/null
+@@ -1,77 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0 OR MIT
+-/*
+- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+- */
+-
+-#include <crypto/internal/blake2s.h>
+-#include <crypto/internal/simd.h>
+-#include <crypto/internal/hash.h>
+-
+-#include <linux/types.h>
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/sizes.h>
+-
+-#include <asm/cpufeature.h>
+-#include <asm/processor.h>
+-
+-static int crypto_blake2s_update_x86(struct shash_desc *desc,
+-                                   const u8 *in, unsigned int inlen)
+-{
+-      return crypto_blake2s_update(desc, in, inlen, false);
+-}
+-
+-static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
+-{
+-      return crypto_blake2s_final(desc, out, false);
+-}
+-
+-#define BLAKE2S_ALG(name, driver_name, digest_size)                   \
+-      {                                                               \
+-              .base.cra_name          = name,                         \
+-              .base.cra_driver_name   = driver_name,                  \
+-              .base.cra_priority      = 200,                          \
+-              .base.cra_flags         = CRYPTO_ALG_OPTIONAL_KEY,      \
+-              .base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,           \
+-              .base.cra_ctxsize       = sizeof(struct blake2s_tfm_ctx), \
+-              .base.cra_module        = THIS_MODULE,                  \
+-              .digestsize             = digest_size,                  \
+-              .setkey                 = crypto_blake2s_setkey,        \
+-              .init                   = crypto_blake2s_init,          \
+-              .update                 = crypto_blake2s_update_x86,    \
+-              .final                  = crypto_blake2s_final_x86,     \
+-              .descsize               = sizeof(struct blake2s_state), \
+-      }
+-
+-static struct shash_alg blake2s_algs[] = {
+-      BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE),
+-};
+-
+-static int __init blake2s_mod_init(void)
+-{
+-      if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
+-              return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+-      return 0;
+-}
+-
+-static void __exit blake2s_mod_exit(void)
+-{
+-      if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
+-              crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+-}
+-
+-module_init(blake2s_mod_init);
+-module_exit(blake2s_mod_exit);
+-
+-MODULE_ALIAS_CRYPTO("blake2s-128");
+-MODULE_ALIAS_CRYPTO("blake2s-128-x86");
+-MODULE_ALIAS_CRYPTO("blake2s-160");
+-MODULE_ALIAS_CRYPTO("blake2s-160-x86");
+-MODULE_ALIAS_CRYPTO("blake2s-224");
+-MODULE_ALIAS_CRYPTO("blake2s-224-x86");
+-MODULE_ALIAS_CRYPTO("blake2s-256");
+-MODULE_ALIAS_CRYPTO("blake2s-256-x86");
+-MODULE_LICENSE("GPL v2");
+diff --git a/crypto/Kconfig b/crypto/Kconfig
+index b4e00a7a046b..38601a072b99 100644
+--- a/crypto/Kconfig
++++ b/crypto/Kconfig
+@@ -692,26 +692,8 @@ config CRYPTO_BLAKE2B
+         See https://blake2.net for further information.
+-config CRYPTO_BLAKE2S
+-      tristate "BLAKE2s digest algorithm"
+-      select CRYPTO_LIB_BLAKE2S_GENERIC
+-      select CRYPTO_HASH
+-      help
+-        Implementation of cryptographic hash function BLAKE2s
+-        optimized for 8-32bit platforms and can produce digests of any size
+-        between 1 to 32.  The keyed hash is also implemented.
+-
+-        This module provides the following algorithms:
+-
+-        - blake2s-128
+-        - blake2s-160
+-        - blake2s-224
+-        - blake2s-256
+-
+-        See https://blake2.net for further information.
+-
+ config CRYPTO_BLAKE2S_X86
+-      tristate "BLAKE2s digest algorithm (x86 accelerated version)"
++      bool "BLAKE2s digest algorithm (x86 accelerated version)"
+       depends on X86 && 64BIT
+       select CRYPTO_LIB_BLAKE2S_GENERIC
+       select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+diff --git a/crypto/Makefile b/crypto/Makefile
+index a40e6d5fb2c8..dbfa53567c92 100644
+--- a/crypto/Makefile
++++ b/crypto/Makefile
+@@ -83,7 +83,6 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
+ obj-$(CONFIG_CRYPTO_WP512) += wp512.o
+ CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
+ obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
+-obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
+ obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
+ obj-$(CONFIG_CRYPTO_ECB) += ecb.o
+ obj-$(CONFIG_CRYPTO_CBC) += cbc.o
+diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c
+deleted file mode 100644
+index 5f96a21f8788..000000000000
+--- a/crypto/blake2s_generic.c
++++ /dev/null
+@@ -1,75 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0 OR MIT
+-/*
+- * shash interface to the generic implementation of BLAKE2s
+- *
+- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+- */
+-
+-#include <crypto/internal/blake2s.h>
+-#include <crypto/internal/hash.h>
+-
+-#include <linux/types.h>
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-
+-static int crypto_blake2s_update_generic(struct shash_desc *desc,
+-                                       const u8 *in, unsigned int inlen)
+-{
+-      return crypto_blake2s_update(desc, in, inlen, true);
+-}
+-
+-static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out)
+-{
+-      return crypto_blake2s_final(desc, out, true);
+-}
+-
+-#define BLAKE2S_ALG(name, driver_name, digest_size)                   \
+-      {                                                               \
+-              .base.cra_name          = name,                         \
+-              .base.cra_driver_name   = driver_name,                  \
+-              .base.cra_priority      = 100,                          \
+-              .base.cra_flags         = CRYPTO_ALG_OPTIONAL_KEY,      \
+-              .base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,           \
+-              .base.cra_ctxsize       = sizeof(struct blake2s_tfm_ctx), \
+-              .base.cra_module        = THIS_MODULE,                  \
+-              .digestsize             = digest_size,                  \
+-              .setkey                 = crypto_blake2s_setkey,        \
+-              .init                   = crypto_blake2s_init,          \
+-              .update                 = crypto_blake2s_update_generic, \
+-              .final                  = crypto_blake2s_final_generic, \
+-              .descsize               = sizeof(struct blake2s_state), \
+-      }
+-
+-static struct shash_alg blake2s_algs[] = {
+-      BLAKE2S_ALG("blake2s-128", "blake2s-128-generic",
+-                  BLAKE2S_128_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-160", "blake2s-160-generic",
+-                  BLAKE2S_160_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-224", "blake2s-224-generic",
+-                  BLAKE2S_224_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-256", "blake2s-256-generic",
+-                  BLAKE2S_256_HASH_SIZE),
+-};
+-
+-static int __init blake2s_mod_init(void)
+-{
+-      return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+-}
+-
+-static void __exit blake2s_mod_exit(void)
+-{
+-      crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+-}
+-
+-subsys_initcall(blake2s_mod_init);
+-module_exit(blake2s_mod_exit);
+-
+-MODULE_ALIAS_CRYPTO("blake2s-128");
+-MODULE_ALIAS_CRYPTO("blake2s-128-generic");
+-MODULE_ALIAS_CRYPTO("blake2s-160");
+-MODULE_ALIAS_CRYPTO("blake2s-160-generic");
+-MODULE_ALIAS_CRYPTO("blake2s-224");
+-MODULE_ALIAS_CRYPTO("blake2s-224-generic");
+-MODULE_ALIAS_CRYPTO("blake2s-256");
+-MODULE_ALIAS_CRYPTO("blake2s-256-generic");
+-MODULE_LICENSE("GPL v2");
+diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
+index 2bacf8384f59..66b7ca1ccb23 100644
+--- a/crypto/tcrypt.c
++++ b/crypto/tcrypt.c
+@@ -1669,10 +1669,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
+               ret += tcrypt_test("rmd160");
+               break;
+-      case 41:
+-              ret += tcrypt_test("blake2s-256");
+-              break;
+-
+       case 42:
+               ret += tcrypt_test("blake2b-512");
+               break;
+@@ -2240,10 +2236,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
+               test_hash_speed("rmd160", sec, generic_hash_speed_template);
+               if (mode > 300 && mode < 400) break;
+               fallthrough;
+-      case 316:
+-              test_hash_speed("blake2s-256", sec, generic_hash_speed_template);
+-              if (mode > 300 && mode < 400) break;
+-              fallthrough;
+       case 317:
+               test_hash_speed("blake2b-512", sec, generic_hash_speed_template);
+               if (mode > 300 && mode < 400) break;
+@@ -2352,10 +2344,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
+               test_ahash_speed("rmd160", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+               fallthrough;
+-      case 416:
+-              test_ahash_speed("blake2s-256", sec, generic_hash_speed_template);
+-              if (mode > 400 && mode < 500) break;
+-              fallthrough;
+       case 417:
+               test_ahash_speed("blake2b-512", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+diff --git a/crypto/testmgr.c b/crypto/testmgr.c
+index 4948201065cc..56facdb63843 100644
+--- a/crypto/testmgr.c
++++ b/crypto/testmgr.c
+@@ -4324,30 +4324,6 @@ static const struct alg_test_desc alg_test_descs[] = {
+               .suite = {
+                       .hash = __VECS(blake2b_512_tv_template)
+               }
+-      }, {
+-              .alg = "blake2s-128",
+-              .test = alg_test_hash,
+-              .suite = {
+-                      .hash = __VECS(blakes2s_128_tv_template)
+-              }
+-      }, {
+-              .alg = "blake2s-160",
+-              .test = alg_test_hash,
+-              .suite = {
+-                      .hash = __VECS(blakes2s_160_tv_template)
+-              }
+-      }, {
+-              .alg = "blake2s-224",
+-              .test = alg_test_hash,
+-              .suite = {
+-                      .hash = __VECS(blakes2s_224_tv_template)
+-              }
+-      }, {
+-              .alg = "blake2s-256",
+-              .test = alg_test_hash,
+-              .suite = {
+-                      .hash = __VECS(blakes2s_256_tv_template)
+-              }
+       }, {
+               .alg = "cbc(aes)",
+               .test = alg_test_skcipher,
+diff --git a/crypto/testmgr.h b/crypto/testmgr.h
+index 4d7449fc6a65..c29658337d96 100644
+--- a/crypto/testmgr.h
++++ b/crypto/testmgr.h
+@@ -34034,221 +34034,4 @@ static const struct hash_testvec blake2b_512_tv_template[] = {{
+                         0xae, 0x15, 0x81, 0x15, 0xd0, 0x88, 0xa0, 0x3c, },
+ }};
+-static const struct hash_testvec blakes2s_128_tv_template[] = {{
+-      .digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01,
+-                        0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, },
+-}, {
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 64,
+-      .digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01,
+-                        0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 1,
+-      .digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82,
+-                        0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 7,
+-      .digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd,
+-                        0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 15,
+-      .digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2,
+-                        0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 247,
+-      .digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe,
+-                        0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 256,
+-      .digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6,
+-                        0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, },
+-}};
+-
+-static const struct hash_testvec blakes2s_160_tv_template[] = {{
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 7,
+-      .digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e,
+-                        0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62,
+-                        0xe3, 0xf2, 0x84, 0xff, },
+-}, {
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 256,
+-      .digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2,
+-                        0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1,
+-                        0x9b, 0x2d, 0x35, 0x05, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3,
+-                        0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1,
+-                        0x79, 0x65, 0x32, 0x93, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 1,
+-      .digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71,
+-                        0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef,
+-                        0xa2, 0x3a, 0x56, 0x9c, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 15,
+-      .digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19,
+-                        0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34,
+-                        0x83, 0x39, 0x0f, 0x30, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 64,
+-      .digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5,
+-                        0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d,
+-                        0xac, 0xa6, 0x81, 0x63, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 247,
+-      .digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01,
+-                        0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10,
+-                        0x0a, 0xf6, 0x73, 0xe8, },
+-}};
+-
+-static const struct hash_testvec blakes2s_224_tv_template[] = {{
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 1,
+-      .digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91,
+-                        0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12,
+-                        0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc,
+-                        0x48, 0x21, 0x97, 0xbb, },
+-}, {
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 247,
+-      .digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e,
+-                        0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4,
+-                        0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc,
+-                        0x2b, 0xa4, 0xd5, 0xf6, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f,
+-                        0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3,
+-                        0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32,
+-                        0xa7, 0x19, 0xfc, 0xb8, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 7,
+-      .digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76,
+-                        0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6,
+-                        0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8,
+-                        0x7b, 0x45, 0xfe, 0x05, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 15,
+-      .digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36,
+-                        0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43,
+-                        0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e,
+-                        0x25, 0xab, 0xc5, 0x02, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 64,
+-      .digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7,
+-                        0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c,
+-                        0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93,
+-                        0x6a, 0x31, 0x83, 0xb5, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 256,
+-      .digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86,
+-                        0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2,
+-                        0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45,
+-                        0xb3, 0xd7, 0xec, 0xcc, },
+-}};
+-
+-static const struct hash_testvec blakes2s_256_tv_template[] = {{
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 15,
+-      .digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
+-                        0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
+-                        0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
+-                        0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
+-                        0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
+-                        0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
+-                        0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 1,
+-      .digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03,
+-                        0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18,
+-                        0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b,
+-                        0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 7,
+-      .digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03,
+-                        0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15,
+-                        0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34,
+-                        0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 64,
+-      .digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
+-                        0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
+-                        0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
+-                        0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 247,
+-      .digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84,
+-                        0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66,
+-                        0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0,
+-                        0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 256,
+-      .digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b,
+-                        0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1,
+-                        0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed,
+-                        0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, },
+-}};
+-
+ #endif        /* _CRYPTO_TESTMGR_H */
+diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
+index 52363eee2b20..506d56530ca9 100644
+--- a/include/crypto/internal/blake2s.h
++++ b/include/crypto/internal/blake2s.h
+@@ -8,7 +8,6 @@
+ #define _CRYPTO_INTERNAL_BLAKE2S_H
+ #include <crypto/blake2s.h>
+-#include <crypto/internal/hash.h>
+ #include <linux/string.h>
+ void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
+@@ -19,111 +18,4 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
+ bool blake2s_selftest(void);
+-static inline void blake2s_set_lastblock(struct blake2s_state *state)
+-{
+-      state->f[0] = -1;
+-}
+-
+-/* Helper functions for BLAKE2s shared by the library and shash APIs */
+-
+-static __always_inline void
+-__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen,
+-               bool force_generic)
+-{
+-      const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+-
+-      if (unlikely(!inlen))
+-              return;
+-      if (inlen > fill) {
+-              memcpy(state->buf + state->buflen, in, fill);
+-              if (force_generic)
+-                      blake2s_compress_generic(state, state->buf, 1,
+-                                               BLAKE2S_BLOCK_SIZE);
+-              else
+-                      blake2s_compress(state, state->buf, 1,
+-                                       BLAKE2S_BLOCK_SIZE);
+-              state->buflen = 0;
+-              in += fill;
+-              inlen -= fill;
+-      }
+-      if (inlen > BLAKE2S_BLOCK_SIZE) {
+-              const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+-              /* Hash one less (full) block than strictly possible */
+-              if (force_generic)
+-                      blake2s_compress_generic(state, in, nblocks - 1,
+-                                               BLAKE2S_BLOCK_SIZE);
+-              else
+-                      blake2s_compress(state, in, nblocks - 1,
+-                                       BLAKE2S_BLOCK_SIZE);
+-              in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+-              inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+-      }
+-      memcpy(state->buf + state->buflen, in, inlen);
+-      state->buflen += inlen;
+-}
+-
+-static __always_inline void
+-__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic)
+-{
+-      blake2s_set_lastblock(state);
+-      memset(state->buf + state->buflen, 0,
+-             BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+-      if (force_generic)
+-              blake2s_compress_generic(state, state->buf, 1, state->buflen);
+-      else
+-              blake2s_compress(state, state->buf, 1, state->buflen);
+-      cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+-      memcpy(out, state->h, state->outlen);
+-}
+-
+-/* Helper functions for shash implementations of BLAKE2s */
+-
+-struct blake2s_tfm_ctx {
+-      u8 key[BLAKE2S_KEY_SIZE];
+-      unsigned int keylen;
+-};
+-
+-static inline int crypto_blake2s_setkey(struct crypto_shash *tfm,
+-                                      const u8 *key, unsigned int keylen)
+-{
+-      struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+-
+-      if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE)
+-              return -EINVAL;
+-
+-      memcpy(tctx->key, key, keylen);
+-      tctx->keylen = keylen;
+-
+-      return 0;
+-}
+-
+-static inline int crypto_blake2s_init(struct shash_desc *desc)
+-{
+-      const struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+-      struct blake2s_state *state = shash_desc_ctx(desc);
+-      unsigned int outlen = crypto_shash_digestsize(desc->tfm);
+-
+-      __blake2s_init(state, outlen, tctx->key, tctx->keylen);
+-      return 0;
+-}
+-
+-static inline int crypto_blake2s_update(struct shash_desc *desc,
+-                                      const u8 *in, unsigned int inlen,
+-                                      bool force_generic)
+-{
+-      struct blake2s_state *state = shash_desc_ctx(desc);
+-
+-      __blake2s_update(state, in, inlen, force_generic);
+-      return 0;
+-}
+-
+-static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out,
+-                                     bool force_generic)
+-{
+-      struct blake2s_state *state = shash_desc_ctx(desc);
+-
+-      __blake2s_final(state, out, force_generic);
+-      return 0;
+-}
+-
+ #endif /* _CRYPTO_INTERNAL_BLAKE2S_H */
+diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c
+index 409e4b728770..66f505220f43 100644
+--- a/lib/crypto/blake2s-selftest.c
++++ b/lib/crypto/blake2s-selftest.c
+@@ -4,6 +4,8 @@
+  */
+ #include <crypto/internal/blake2s.h>
++#include <linux/kernel.h>
++#include <linux/random.h>
+ #include <linux/string.h>
+ /*
+@@ -587,5 +589,44 @@ bool __init blake2s_selftest(void)
+               }
+       }
++      for (i = 0; i < 32; ++i) {
++              enum { TEST_ALIGNMENT = 16 };
++              u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1]
++                                      __aligned(TEST_ALIGNMENT);
++              u8 blocks[BLAKE2S_BLOCK_SIZE * 3];
++              struct blake2s_state state1, state2;
++
++              get_random_bytes(blocks, sizeof(blocks));
++              get_random_bytes(&state, sizeof(state));
++
++#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \
++    defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)
++              memcpy(&state1, &state, sizeof(state1));
++              memcpy(&state2, &state, sizeof(state2));
++              blake2s_compress(&state1, blocks, 3, BLAKE2S_BLOCK_SIZE);
++              blake2s_compress_generic(&state2, blocks, 3, BLAKE2S_BLOCK_SIZE);
++              if (memcmp(&state1, &state2, sizeof(state1))) {
++                      pr_err("blake2s random compress self-test %d: FAIL\n",
++                             i + 1);
++                      success = false;
++              }
++#endif
++
++              memcpy(&state1, &state, sizeof(state1));
++              blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE);
++              for (l = 1; l < TEST_ALIGNMENT; ++l) {
++                      memcpy(unaligned_block + l, blocks,
++                             BLAKE2S_BLOCK_SIZE);
++                      memcpy(&state2, &state, sizeof(state2));
++                      blake2s_compress(&state2, unaligned_block + l, 1,
++                                       BLAKE2S_BLOCK_SIZE);
++                      if (memcmp(&state1, &state2, sizeof(state1))) {
++                              pr_err("blake2s random compress align %d self-test %d: FAIL\n",
++                                     l, i + 1);
++                              success = false;
++                      }
++              }
++      }
++
+       return success;
+ }
+diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
+index c71c09621c09..98e688c6d891 100644
+--- a/lib/crypto/blake2s.c
++++ b/lib/crypto/blake2s.c
+@@ -16,16 +16,44 @@
+ #include <linux/init.h>
+ #include <linux/bug.h>
++static inline void blake2s_set_lastblock(struct blake2s_state *state)
++{
++      state->f[0] = -1;
++}
++
+ void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
+ {
+-      __blake2s_update(state, in, inlen, false);
++      const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
++
++      if (unlikely(!inlen))
++              return;
++      if (inlen > fill) {
++              memcpy(state->buf + state->buflen, in, fill);
++              blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
++              state->buflen = 0;
++              in += fill;
++              inlen -= fill;
++      }
++      if (inlen > BLAKE2S_BLOCK_SIZE) {
++              const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
++              blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
++              in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
++              inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
++      }
++      memcpy(state->buf + state->buflen, in, inlen);
++      state->buflen += inlen;
+ }
+ EXPORT_SYMBOL(blake2s_update);
+ void blake2s_final(struct blake2s_state *state, u8 *out)
+ {
+       WARN_ON(IS_ENABLED(DEBUG) && !out);
+-      __blake2s_final(state, out, false);
++      blake2s_set_lastblock(state);
++      memset(state->buf + state->buflen, 0,
++             BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
++      blake2s_compress(state, state->buf, 1, state->buflen);
++      cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
++      memcpy(out, state->h, state->outlen);
+       memzero_explicit(state, sizeof(*state));
+ }
+ EXPORT_SYMBOL(blake2s_final);
+@@ -38,12 +66,7 @@ static int __init blake2s_mod_init(void)
+       return 0;
+ }
+-static void __exit blake2s_mod_exit(void)
+-{
+-}
+-
+ module_init(blake2s_mod_init);
+-module_exit(blake2s_mod_exit);
+ MODULE_LICENSE("GPL v2");
+ MODULE_DESCRIPTION("BLAKE2s hash function");
+ MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+-- 
+2.35.1
+
diff --git a/queue-5.18/dm-raid-fix-address-sanitizer-warning-in-raid_resume.patch b/queue-5.18/dm-raid-fix-address-sanitizer-warning-in-raid_resume.patch
new file mode 100644 (file)
index 0000000..1482b50
--- /dev/null
@@ -0,0 +1,38 @@
+From 737fa72cb41c624004f0ab3e93689a3a9cfa1b17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 14:33:52 -0400
+Subject: dm raid: fix address sanitizer warning in raid_resume
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit 7dad24db59d2d2803576f2e3645728866a056dab ]
+
+There is a KASAN warning in raid_resume when running the lvm test
+lvconvert-raid.sh. The reason for the warning is that mddev->raid_disks
+is greater than rs->raid_disks, so the loop touches one entry beyond
+the allocated length.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-raid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
+index 92e6b731f9d6..a55d6f6f294b 100644
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -3824,7 +3824,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
+       memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
+-      for (i = 0; i < mddev->raid_disks; i++) {
++      for (i = 0; i < rs->raid_disks; i++) {
+               r = &rs->dev[i].rdev;
+               /* HM FIXME: enhance journal device recovery processing */
+               if (test_bit(Journal, &r->flags))
+-- 
+2.35.1
+
diff --git a/queue-5.18/dm-raid-fix-address-sanitizer-warning-in-raid_status.patch b/queue-5.18/dm-raid-fix-address-sanitizer-warning-in-raid_status.patch
new file mode 100644 (file)
index 0000000..a6a9ac2
--- /dev/null
@@ -0,0 +1,68 @@
+From 78121652de18db6e02f29395b05f1f73c1cf3fd2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 14:31:35 -0400
+Subject: dm raid: fix address sanitizer warning in raid_status
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit 1fbeea217d8f297fe0e0956a1516d14ba97d0396 ]
+
+There is this warning when using a kernel with the address sanitizer
+and running this testsuite:
+https://gitlab.com/cki-project/kernel-tests/-/tree/main/storage/swraid/scsi_raid
+
+==================================================================
+BUG: KASAN: slab-out-of-bounds in raid_status+0x1747/0x2820 [dm_raid]
+Read of size 4 at addr ffff888079d2c7e8 by task lvcreate/13319
+CPU: 0 PID: 13319 Comm: lvcreate Not tainted 5.18.0-0.rc3.<snip> #1
+Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x6a/0x9c
+ print_address_description.constprop.0+0x1f/0x1e0
+ print_report.cold+0x55/0x244
+ kasan_report+0xc9/0x100
+ raid_status+0x1747/0x2820 [dm_raid]
+ dm_ima_measure_on_table_load+0x4b8/0xca0 [dm_mod]
+ table_load+0x35c/0x630 [dm_mod]
+ ctl_ioctl+0x411/0x630 [dm_mod]
+ dm_ctl_ioctl+0xa/0x10 [dm_mod]
+ __x64_sys_ioctl+0x12a/0x1a0
+ do_syscall_64+0x5b/0x80
+
+The warning is caused by reading conf->max_nr_stripes in raid_status. The
+code in raid_status reads mddev->private, casts it to struct r5conf and
+reads the entry max_nr_stripes.
+
+However, if we have different raid type than 4/5/6, mddev->private
+doesn't point to struct r5conf; it may point to struct r0conf, struct
+r1conf, struct r10conf or struct mpconf. If we cast a pointer to one
+of these structs to struct r5conf, we will be reading invalid memory
+and KASAN warns about it.
+
+Fix this bug by reading struct r5conf only if raid type is 4, 5 or 6.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-raid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
+index e362a7471512..92e6b731f9d6 100644
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -3514,7 +3514,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
+ {
+       struct raid_set *rs = ti->private;
+       struct mddev *mddev = &rs->md;
+-      struct r5conf *conf = mddev->private;
++      struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL;
+       int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
+       unsigned long recovery;
+       unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
+-- 
+2.35.1
+
diff --git a/queue-5.18/dm-thin-fix-use-after-free-crash-in-dm_sm_register_t.patch b/queue-5.18/dm-thin-fix-use-after-free-crash-in-dm_sm_register_t.patch
new file mode 100644 (file)
index 0000000..79fc929
--- /dev/null
@@ -0,0 +1,96 @@
+From 185d3911adcea01b2082c14635a2a8071134384b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 19:28:25 +0800
+Subject: dm thin: fix use-after-free crash in
+ dm_sm_register_threshold_callback
+
+From: Luo Meng <luomeng12@huawei.com>
+
+[ Upstream commit 3534e5a5ed2997ca1b00f44a0378a075bd05e8a3 ]
+
+Fault inject on pool metadata device reports:
+  BUG: KASAN: use-after-free in dm_pool_register_metadata_threshold+0x40/0x80
+  Read of size 8 at addr ffff8881b9d50068 by task dmsetup/950
+
+  CPU: 7 PID: 950 Comm: dmsetup Tainted: G        W         5.19.0-rc6 #1
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014
+  Call Trace:
+   <TASK>
+   dump_stack_lvl+0x34/0x44
+   print_address_description.constprop.0.cold+0xeb/0x3f4
+   kasan_report.cold+0xe6/0x147
+   dm_pool_register_metadata_threshold+0x40/0x80
+   pool_ctr+0xa0a/0x1150
+   dm_table_add_target+0x2c8/0x640
+   table_load+0x1fd/0x430
+   ctl_ioctl+0x2c4/0x5a0
+   dm_ctl_ioctl+0xa/0x10
+   __x64_sys_ioctl+0xb3/0xd0
+   do_syscall_64+0x35/0x80
+   entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+This can be easily reproduced using:
+  echo offline > /sys/block/sda/device/state
+  dd if=/dev/zero of=/dev/mapper/thin bs=4k count=10
+  dmsetup load pool --table "0 20971520 thin-pool /dev/sda /dev/sdb 128 0 0"
+
+If a metadata commit fails, the transaction will be aborted and the
+metadata space maps will be destroyed. If a DM table reload then
+happens for this failed thin-pool, a use-after-free will occur in
+dm_sm_register_threshold_callback (called from
+dm_pool_register_metadata_threshold).
+
+Fix this by in dm_pool_register_metadata_threshold() by returning the
+-EINVAL error if the thin-pool is in fail mode. Also fail pool_ctr()
+with a new error message: "Error registering metadata threshold".
+
+Fixes: ac8c3f3df65e4 ("dm thin: generate event when metadata threshold passed")
+Cc: stable@vger.kernel.org
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-thin-metadata.c | 7 +++++--
+ drivers/md/dm-thin.c          | 4 +++-
+ 2 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
+index 2db7030aba00..a27395c8621f 100644
+--- a/drivers/md/dm-thin-metadata.c
++++ b/drivers/md/dm-thin-metadata.c
+@@ -2045,10 +2045,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
+                                       dm_sm_threshold_fn fn,
+                                       void *context)
+ {
+-      int r;
++      int r = -EINVAL;
+       pmd_write_lock_in_core(pmd);
+-      r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
++      if (!pmd->fail_io) {
++              r = dm_sm_register_threshold_callback(pmd->metadata_sm,
++                                                    threshold, fn, context);
++      }
+       pmd_write_unlock(pmd);
+       return r;
+diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
+index 4d25d0e27031..53ac6ae870ac 100644
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -3382,8 +3382,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
+                                               calc_metadata_threshold(pt),
+                                               metadata_low_callback,
+                                               pool);
+-      if (r)
++      if (r) {
++              ti->error = "Error registering metadata threshold";
+               goto out_flags_changed;
++      }
+       dm_pool_register_pre_commit_callback(pool->pmd,
+                                            metadata_pre_commit_callback, pool);
+-- 
+2.35.1
+
diff --git a/queue-5.18/dm-writecache-set-a-default-max_writeback_jobs.patch b/queue-5.18/dm-writecache-set-a-default-max_writeback_jobs.patch
new file mode 100644 (file)
index 0000000..99eadb5
--- /dev/null
@@ -0,0 +1,41 @@
+From 36bc440738ef1c764c40ac40f0a1d699ee8d1d3f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 07:09:04 -0400
+Subject: dm writecache: set a default MAX_WRITEBACK_JOBS
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit ca7dc242e358e46d963b32f9d9dd829785a9e957 ]
+
+dm-writecache has the capability to limit the number of writeback jobs
+in progress. However, this feature was off by default. As such there
+were some out-of-memory crashes observed when lowering the low
+watermark while the cache is full.
+
+This commit enables writeback limit by default. It is set to 256MiB or
+1/16 of total system memory, whichever is smaller.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-writecache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
+index e5acb393f70b..27557b852c94 100644
+--- a/drivers/md/dm-writecache.c
++++ b/drivers/md/dm-writecache.c
+@@ -22,7 +22,7 @@
+ #define HIGH_WATERMARK                        50
+ #define LOW_WATERMARK                 45
+-#define MAX_WRITEBACK_JOBS            0
++#define MAX_WRITEBACK_JOBS            min(0x10000000 / PAGE_SIZE, totalram_pages() / 16)
+ #define ENDIO_LATENCY                 16
+ #define WRITEBACK_LATENCY             64
+ #define AUTOCOMMIT_BLOCKS_SSD         65536
+-- 
+2.35.1
+
diff --git a/queue-5.18/drivers-base-fix-userspace-break-from-using-bin_attr.patch b/queue-5.18/drivers-base-fix-userspace-break-from-using-bin_attr.patch
new file mode 100644 (file)
index 0000000..8278cc9
--- /dev/null
@@ -0,0 +1,188 @@
+From cb546eb4a3db40281eb2d2b70ec132d20dba301e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 09:49:24 -0400
+Subject: drivers/base: fix userspace break from using bin_attributes for
+ cpumap and cpulist
+
+From: Phil Auld <pauld@redhat.com>
+
+[ Upstream commit 7ee951acd31a88f941fd6535fbdee3a1567f1d63 ]
+
+Using bin_attributes with a 0 size causes fstat and friends to return that
+0 size. This breaks userspace code that retrieves the size before reading
+the file. Rather than reverting 75bd50fa841 ("drivers/base/node.c: use
+bin_attribute to break the size limitation of cpumap ABI") let's put in a
+size value at compile time.
+
+For cpulist the maximum size is on the order of
+       NR_CPUS * (ceil(log10(NR_CPUS)) + 1)/2
+
+which for 8192 is 20480 (8192 * 5)/2. In order to get near that you'd need
+a system with every other CPU on one node. For example: (0,2,4,8, ... ).
+To simplify the math and support larger NR_CPUS in the future we are using
+(NR_CPUS * 7)/2. We also set it to a min of PAGE_SIZE to retain the older
+behavior for smaller NR_CPUS.
+
+The cpumap file the size works out to be NR_CPUS/4 + NR_CPUS/32 - 1
+(or NR_CPUS * 9/32 - 1) including the ","s.
+
+Add a set of macros for these values to cpumask.h so they can be used in
+multiple places. Apply these to the handful of such files in
+drivers/base/topology.c as well as node.c.
+
+As an example, on an 80 cpu 4-node system (NR_CPUS == 8192):
+
+before:
+
+-r--r--r--. 1 root root 0 Jul 12 14:08 system/node/node0/cpulist
+-r--r--r--. 1 root root 0 Jul 11 17:25 system/node/node0/cpumap
+
+after:
+
+-r--r--r--. 1 root root 28672 Jul 13 11:32 system/node/node0/cpulist
+-r--r--r--. 1 root root  4096 Jul 13 11:31 system/node/node0/cpumap
+
+CONFIG_NR_CPUS = 16384
+-r--r--r--. 1 root root 57344 Jul 13 14:03 system/node/node0/cpulist
+-r--r--r--. 1 root root  4607 Jul 13 14:02 system/node/node0/cpumap
+
+The actual number of cpus doesn't matter for the reported size since they
+are based on NR_CPUS.
+
+Fixes: 75bd50fa841d ("drivers/base/node.c: use bin_attribute to break the size limitation of cpumap ABI")
+Fixes: bb9ec13d156e ("topology: use bin_attribute to break the size limitation of cpumap ABI")
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: "Rafael J. Wysocki" <rafael@kernel.org>
+Cc: Yury Norov <yury.norov@gmail.com>
+Cc: stable@vger.kernel.org
+Acked-by: Yury Norov <yury.norov@gmail.com> (for include/linux/cpumask.h)
+Signed-off-by: Phil Auld <pauld@redhat.com>
+Link: https://lore.kernel.org/r/20220715134924.3466194-1-pauld@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/node.c     |  4 ++--
+ drivers/base/topology.c | 32 ++++++++++++++++----------------
+ include/linux/cpumask.h | 18 ++++++++++++++++++
+ 3 files changed, 36 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/base/node.c b/drivers/base/node.c
+index 0ac6376ef7a1..eb0f43784c2b 100644
+--- a/drivers/base/node.c
++++ b/drivers/base/node.c
+@@ -45,7 +45,7 @@ static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj,
+       return n;
+ }
+-static BIN_ATTR_RO(cpumap, 0);
++static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES);
+ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
+                                  struct bin_attribute *attr, char *buf,
+@@ -66,7 +66,7 @@ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
+       return n;
+ }
+-static BIN_ATTR_RO(cpulist, 0);
++static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES);
+ /**
+  * struct node_access_nodes - Access class device to hold user visible
+diff --git a/drivers/base/topology.c b/drivers/base/topology.c
+index ac6ad9ab67f9..89f98be5c5b9 100644
+--- a/drivers/base/topology.c
++++ b/drivers/base/topology.c
+@@ -62,47 +62,47 @@ define_id_show_func(ppin, "0x%llx");
+ static DEVICE_ATTR_ADMIN_RO(ppin);
+ define_siblings_read_func(thread_siblings, sibling_cpumask);
+-static BIN_ATTR_RO(thread_siblings, 0);
+-static BIN_ATTR_RO(thread_siblings_list, 0);
++static BIN_ATTR_RO(thread_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(thread_siblings_list, CPULIST_FILE_MAX_BYTES);
+ define_siblings_read_func(core_cpus, sibling_cpumask);
+-static BIN_ATTR_RO(core_cpus, 0);
+-static BIN_ATTR_RO(core_cpus_list, 0);
++static BIN_ATTR_RO(core_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(core_cpus_list, CPULIST_FILE_MAX_BYTES);
+ define_siblings_read_func(core_siblings, core_cpumask);
+-static BIN_ATTR_RO(core_siblings, 0);
+-static BIN_ATTR_RO(core_siblings_list, 0);
++static BIN_ATTR_RO(core_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(core_siblings_list, CPULIST_FILE_MAX_BYTES);
+ #ifdef TOPOLOGY_CLUSTER_SYSFS
+ define_siblings_read_func(cluster_cpus, cluster_cpumask);
+-static BIN_ATTR_RO(cluster_cpus, 0);
+-static BIN_ATTR_RO(cluster_cpus_list, 0);
++static BIN_ATTR_RO(cluster_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(cluster_cpus_list, CPULIST_FILE_MAX_BYTES);
+ #endif
+ #ifdef TOPOLOGY_DIE_SYSFS
+ define_siblings_read_func(die_cpus, die_cpumask);
+-static BIN_ATTR_RO(die_cpus, 0);
+-static BIN_ATTR_RO(die_cpus_list, 0);
++static BIN_ATTR_RO(die_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(die_cpus_list, CPULIST_FILE_MAX_BYTES);
+ #endif
+ define_siblings_read_func(package_cpus, core_cpumask);
+-static BIN_ATTR_RO(package_cpus, 0);
+-static BIN_ATTR_RO(package_cpus_list, 0);
++static BIN_ATTR_RO(package_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(package_cpus_list, CPULIST_FILE_MAX_BYTES);
+ #ifdef TOPOLOGY_BOOK_SYSFS
+ define_id_show_func(book_id, "%d");
+ static DEVICE_ATTR_RO(book_id);
+ define_siblings_read_func(book_siblings, book_cpumask);
+-static BIN_ATTR_RO(book_siblings, 0);
+-static BIN_ATTR_RO(book_siblings_list, 0);
++static BIN_ATTR_RO(book_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(book_siblings_list, CPULIST_FILE_MAX_BYTES);
+ #endif
+ #ifdef TOPOLOGY_DRAWER_SYSFS
+ define_id_show_func(drawer_id, "%d");
+ static DEVICE_ATTR_RO(drawer_id);
+ define_siblings_read_func(drawer_siblings, drawer_cpumask);
+-static BIN_ATTR_RO(drawer_siblings, 0);
+-static BIN_ATTR_RO(drawer_siblings_list, 0);
++static BIN_ATTR_RO(drawer_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(drawer_siblings_list, CPULIST_FILE_MAX_BYTES);
+ #endif
+ static struct bin_attribute *bin_attrs[] = {
+diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
+index fe29ac7cc469..4592d0845941 100644
+--- a/include/linux/cpumask.h
++++ b/include/linux/cpumask.h
+@@ -1071,4 +1071,22 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
+       [0] =  1UL                                                      \
+ } }
++/*
++ * Provide a valid theoretical max size for cpumap and cpulist sysfs files
++ * to avoid breaking userspace which may allocate a buffer based on the size
++ * reported by e.g. fstat.
++ *
++ * for cpumap NR_CPUS * 9/32 - 1 should be an exact length.
++ *
++ * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up
++ * to 2 orders of magnitude larger than 8192. And then we divide by 2 to
++ * cover a worst-case of every other cpu being on one of two nodes for a
++ * very large NR_CPUS.
++ *
++ *  Use PAGE_SIZE as a minimum for smaller configurations.
++ */
++#define CPUMAP_FILE_MAX_BYTES  ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \
++                                      ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE)
++#define CPULIST_FILE_MAX_BYTES  (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE)
++
+ #endif /* __LINUX_CPUMASK_H */
+-- 
+2.35.1
+
diff --git a/queue-5.18/drm-dp-mst-read-the-extended-dpcd-capabilities-durin.patch b/queue-5.18/drm-dp-mst-read-the-extended-dpcd-capabilities-durin.patch
new file mode 100644 (file)
index 0000000..59fc59a
--- /dev/null
@@ -0,0 +1,57 @@
+From c73a45d0946910af4186cf5c6152e0792eaaabd2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jun 2022 12:45:37 +0300
+Subject: drm/dp/mst: Read the extended DPCD capabilities during system resume
+
+From: Imre Deak <imre.deak@intel.com>
+
+[ Upstream commit 7a710a8bc909313951eb9252d8419924c771d7c2 ]
+
+The WD22TB4 Thunderbolt dock at least will revert its DP_MAX_LINK_RATE
+from HBR3 to HBR2 after system suspend/resume if the DP_DP13_DPCD_REV
+registers are not read subsequently also as required.
+
+Fix this by reading DP_DP13_DPCD_REV registers as well, matching what is
+done during connector detection. While at it also fix up the same call
+in drm_dp_mst_dump_topology().
+
+Cc: Lyude Paul <lyude@redhat.com>
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5292
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Reviewed-by: Jani Nikula <jani.nikula@intel.com>
+Cc: <stable@vger.kernel.org> # v5.14+
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220614094537.885472-1-imre.deak@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/dp/drm_dp_mst_topology.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/dp/drm_dp_mst_topology.c b/drivers/gpu/drm/dp/drm_dp_mst_topology.c
+index 7a7cc44686f9..96869875390f 100644
+--- a/drivers/gpu/drm/dp/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/dp/drm_dp_mst_topology.c
+@@ -3861,9 +3861,7 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr,
+       if (!mgr->mst_primary)
+               goto out_fail;
+-      ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd,
+-                             DP_RECEIVER_CAP_SIZE);
+-      if (ret != DP_RECEIVER_CAP_SIZE) {
++      if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) {
+               drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
+               goto out_fail;
+       }
+@@ -4912,8 +4910,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m,
+               u8 buf[DP_PAYLOAD_TABLE_SIZE];
+               int ret;
+-              ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, buf, DP_RECEIVER_CAP_SIZE);
+-              if (ret) {
++              if (drm_dp_read_dpcd_caps(mgr->aux, buf) < 0) {
+                       seq_printf(m, "dpcd read failed\n");
+                       goto out;
+               }
+-- 
+2.35.1
+
diff --git a/queue-5.18/drm-mediatek-keep-dsi-as-lp00-before-dcs-cmds-transf.patch b/queue-5.18/drm-mediatek-keep-dsi-as-lp00-before-dcs-cmds-transf.patch
new file mode 100644 (file)
index 0000000..1adb3c8
--- /dev/null
@@ -0,0 +1,116 @@
+From 72b19d4c277cf659673dc5b4b02f03f6ea4a746e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 May 2022 10:00:06 +0800
+Subject: drm/mediatek: Keep dsi as LP00 before dcs cmds transfer
+
+From: Jitao Shi <jitao.shi@mediatek.com>
+
+[ Upstream commit 39e8d062b03c3dc257d880d82bd55cdd9e185a3b ]
+
+To comply with the panel sequence, hold the mipi signal to LP00 before
+the dcs cmds transmission, and pull the mipi signal high from LP00 to
+LP11 until the start of the dcs cmds transmission.
+
+The normal panel timing is :
+(1) pp1800 DC pull up
+(2) avdd & avee AC pull high
+(3) lcm_reset pull high -> pull low -> pull high
+(4) Pull MIPI signal high (LP11) -> initial code -> send video data
+    (HS mode)
+
+The power-off sequence is reversed.
+If dsi is not in cmd mode, then dsi will pull the mipi signal high in
+the mtk_output_dsi_enable function. The delay in lane_ready func is
+the reaction time of dsi_rx after pulling up the mipi signal.
+
+Fixes: 2dd8075d2185 ("drm/mediatek: mtk_dsi: Use the drm_panel_bridge API")
+
+Link: https://patchwork.kernel.org/project/linux-mediatek/patch/1653012007-11854-4-git-send-email-xinlei.lee@mediatek.com/
+Cc: <stable@vger.kernel.org> # 5.10.x: 7f6335c6a258: drm/mediatek: Modify dsi funcs to atomic operations
+Cc: <stable@vger.kernel.org> # 5.10.x: cde7e2e35c28: drm/mediatek: Separate poweron/poweroff from enable/disable and define new funcs
+Cc: <stable@vger.kernel.org> # 5.10.x
+Signed-off-by: Jitao Shi <jitao.shi@mediatek.com>
+Signed-off-by: Xinlei Lee <xinlei.lee@mediatek.com>
+Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Reviewed-by: Rex-BC Chen <rex-bc.chen@mediatek.com>
+Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/mediatek/mtk_dsi.c | 28 +++++++++++++++++++++-------
+ 1 file changed, 21 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
+index f0f523bdafb8..e0a2d5ea40af 100644
+--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
++++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
+@@ -203,6 +203,7 @@ struct mtk_dsi {
+       struct mtk_phy_timing phy_timing;
+       int refcount;
+       bool enabled;
++      bool lanes_ready;
+       u32 irq_data;
+       wait_queue_head_t irq_wait_queue;
+       const struct mtk_dsi_driver_data *driver_data;
+@@ -649,18 +650,11 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi)
+       mtk_dsi_reset_engine(dsi);
+       mtk_dsi_phy_timconfig(dsi);
+-      mtk_dsi_rxtx_control(dsi);
+-      usleep_range(30, 100);
+-      mtk_dsi_reset_dphy(dsi);
+       mtk_dsi_ps_control_vact(dsi);
+       mtk_dsi_set_vm_cmd(dsi);
+       mtk_dsi_config_vdo_timing(dsi);
+       mtk_dsi_set_interrupt_enable(dsi);
+-      mtk_dsi_clk_ulp_mode_leave(dsi);
+-      mtk_dsi_lane0_ulp_mode_leave(dsi);
+-      mtk_dsi_clk_hs_mode(dsi, 0);
+-
+       return 0;
+ err_disable_engine_clk:
+       clk_disable_unprepare(dsi->engine_clk);
+@@ -691,6 +685,23 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
+       clk_disable_unprepare(dsi->digital_clk);
+       phy_power_off(dsi->phy);
++
++      dsi->lanes_ready = false;
++}
++
++static void mtk_dsi_lane_ready(struct mtk_dsi *dsi)
++{
++      if (!dsi->lanes_ready) {
++              dsi->lanes_ready = true;
++              mtk_dsi_rxtx_control(dsi);
++              usleep_range(30, 100);
++              mtk_dsi_reset_dphy(dsi);
++              mtk_dsi_clk_ulp_mode_leave(dsi);
++              mtk_dsi_lane0_ulp_mode_leave(dsi);
++              mtk_dsi_clk_hs_mode(dsi, 0);
++              msleep(20);
++              /* The reaction time after pulling up the mipi signal for dsi_rx */
++      }
+ }
+ static void mtk_output_dsi_enable(struct mtk_dsi *dsi)
+@@ -698,6 +709,7 @@ static void mtk_output_dsi_enable(struct mtk_dsi *dsi)
+       if (dsi->enabled)
+               return;
++      mtk_dsi_lane_ready(dsi);
+       mtk_dsi_set_mode(dsi);
+       mtk_dsi_clk_hs_mode(dsi, 1);
+@@ -1007,6 +1019,8 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host,
+       if (MTK_DSI_HOST_IS_READ(msg->type))
+               irq_flag |= LPRX_RD_RDY_INT_FLAG;
++      mtk_dsi_lane_ready(dsi);
++
+       ret = mtk_dsi_host_send_cmd(dsi, msg, irq_flag);
+       if (ret)
+               goto restore_dsi_mode;
+-- 
+2.35.1
+
diff --git a/queue-5.18/drm-vc4-drv-adopt-the-dma-configuration-from-the-hvs.patch b/queue-5.18/drm-vc4-drv-adopt-the-dma-configuration-from-the-hvs.patch
new file mode 100644 (file)
index 0000000..186edb8
--- /dev/null
@@ -0,0 +1,68 @@
+From eb33b7125fa369b35e05a9779b1ee772cfe928cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jun 2022 16:47:28 +0200
+Subject: drm/vc4: drv: Adopt the dma configuration from the HVS or V3D
+ component
+
+From: Dave Stevenson <dave.stevenson@raspberrypi.com>
+
+[ Upstream commit da8e393e23efb60eba8959856c7df88f9859f6eb ]
+
+vc4_drv isn't necessarily under the /soc node in DT as it is a
+virtual device, but it is the one that does the allocations.
+The DMA addresses are consumed by primarily the HVS or V3D, and
+those require VideoCore cache alias address mapping, and so will be
+under /soc.
+
+During probe find the a suitable device node for HVS or V3D,
+and adopt the DMA configuration of that node.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
+Link: https://lore.kernel.org/r/20220613144800.326124-2-maxime@cerno.tech
+Signed-off-by: Maxime Ripard <maxime@cerno.tech>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/vc4/vc4_drv.c | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
+index 162bc18e7497..14a7d529144d 100644
+--- a/drivers/gpu/drm/vc4/vc4_drv.c
++++ b/drivers/gpu/drm/vc4/vc4_drv.c
+@@ -209,6 +209,15 @@ static void vc4_match_add_drivers(struct device *dev,
+       }
+ }
++const struct of_device_id vc4_dma_range_matches[] = {
++      { .compatible = "brcm,bcm2711-hvs" },
++      { .compatible = "brcm,bcm2835-hvs" },
++      { .compatible = "brcm,bcm2835-v3d" },
++      { .compatible = "brcm,cygnus-v3d" },
++      { .compatible = "brcm,vc4-v3d" },
++      {}
++};
++
+ static int vc4_drm_bind(struct device *dev)
+ {
+       struct platform_device *pdev = to_platform_device(dev);
+@@ -227,6 +236,16 @@ static int vc4_drm_bind(struct device *dev)
+               vc4_drm_driver.driver_features &= ~DRIVER_RENDER;
+       of_node_put(node);
++      node = of_find_matching_node_and_match(NULL, vc4_dma_range_matches,
++                                             NULL);
++      if (node) {
++              ret = of_dma_configure(dev, node, true);
++              of_node_put(node);
++
++              if (ret)
++                      return ret;
++      }
++
+       vc4 = devm_drm_dev_alloc(dev, &vc4_drm_driver, struct vc4_dev, base);
+       if (IS_ERR(vc4))
+               return PTR_ERR(vc4);
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-add-ext4_inode_has_xattr_space-macro-in-xattr.h.patch b/queue-5.18/ext4-add-ext4_inode_has_xattr_space-macro-in-xattr.h.patch
new file mode 100644 (file)
index 0000000..e6f9e36
--- /dev/null
@@ -0,0 +1,50 @@
+From cc80213fb3767c2c9e67a78719c06cb0b2f32c15 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jun 2022 10:13:55 +0800
+Subject: ext4: add EXT4_INODE_HAS_XATTR_SPACE macro in xattr.h
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 179b14152dcb6a24c3415200603aebca70ff13af ]
+
+When adding an xattr to an inode, we must ensure that the inode_size is
+not less than EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad. Otherwise,
+the end position may be greater than the start position, resulting in UAF.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20220616021358.2504451-2-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.h | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
+index 77efb9a627ad..f885f362add4 100644
+--- a/fs/ext4/xattr.h
++++ b/fs/ext4/xattr.h
+@@ -95,6 +95,19 @@ struct ext4_xattr_entry {
+ #define EXT4_ZERO_XATTR_VALUE ((void *)-1)
++/*
++ * If we want to add an xattr to the inode, we should make sure that
++ * i_extra_isize is not 0 and that the inode size is not less than
++ * EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad.
++ *   EXT4_GOOD_OLD_INODE_SIZE   extra_isize header   entry   pad  data
++ * |--------------------------|------------|------|---------|---|-------|
++ */
++#define EXT4_INODE_HAS_XATTR_SPACE(inode)                             \
++      ((EXT4_I(inode)->i_extra_isize != 0) &&                         \
++       (EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize +     \
++        sizeof(struct ext4_xattr_ibody_header) + EXT4_XATTR_PAD <=    \
++        EXT4_INODE_SIZE((inode)->i_sb)))
++
+ struct ext4_xattr_info {
+       const char *name;
+       const void *value;
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-check-if-directory-block-is-within-i_size.patch b/queue-5.18/ext4-check-if-directory-block-is-within-i_size.patch
new file mode 100644 (file)
index 0000000..429ff0e
--- /dev/null
@@ -0,0 +1,56 @@
+From 6770bf434d6a397d0ac1762555133b5cf6e7a3e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Jul 2022 16:27:20 +0200
+Subject: ext4: check if directory block is within i_size
+
+From: Lukas Czerner <lczerner@redhat.com>
+
+[ Upstream commit 65f8ea4cd57dbd46ea13b41dc8bac03176b04233 ]
+
+Currently ext4 directory handling code implicitly assumes that the
+directory blocks are always within the i_size. In fact ext4_append()
+will attempt to allocate next directory block based solely on i_size and
+the i_size is then appropriately increased after a successful
+allocation.
+
+However, for this to work it requires i_size to be correct. If, for any
+reason, the directory inode i_size is corrupted in a way that the
+directory tree refers to a valid directory block past i_size, we could
+end up corrupting parts of the directory tree structure by overwriting
+already used directory blocks when modifying the directory.
+
+Fix it by catching the corruption early in __ext4_read_dirblock().
+
+Addresses Red-Hat-Bugzilla: #2070205
+CVE: CVE-2022-1184
+Signed-off-by: Lukas Czerner <lczerner@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20220704142721.157985-1-lczerner@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/namei.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 4f0420b1ff3e..2bc3e4b27204 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -110,6 +110,13 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
+       struct ext4_dir_entry *dirent;
+       int is_dx_block = 0;
++      if (block >= inode->i_size) {
++              ext4_error_inode(inode, func, line, block,
++                     "Attempting to read directory block (%u) that is past i_size (%llu)",
++                     block, inode->i_size);
++              return ERR_PTR(-EFSCORRUPTED);
++      }
++
+       if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
+               bh = ERR_PTR(-EIO);
+       else
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-correct-max_inline_xattr_value_size-computing.patch b/queue-5.18/ext4-correct-max_inline_xattr_value_size-computing.patch
new file mode 100644 (file)
index 0000000..67a7e46
--- /dev/null
@@ -0,0 +1,41 @@
+From e4206366d1ea91bfb593c5f1c297f97d95cc09b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jun 2022 10:13:57 +0800
+Subject: ext4: correct max_inline_xattr_value_size computing
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit c9fd167d57133c5b748d16913c4eabc55e531c73 ]
+
+If the ext4 inode does not have xattr space, 0 is returned in the
+get_max_inline_xattr_value_size function. Otherwise, the function returns
+a negative value when the inode does not contain EXT4_STATE_XATTR.
+
+Cc: stable@kernel.org
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220616021358.2504451-4-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inline.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
+index e9ef5cf30969..84fcd06a8e8a 100644
+--- a/fs/ext4/inline.c
++++ b/fs/ext4/inline.c
+@@ -35,6 +35,9 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
+       struct ext4_inode *raw_inode;
+       int free, min_offs;
++      if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
++              return 0;
++
+       min_offs = EXT4_SB(inode->i_sb)->s_inode_size -
+                       EXT4_GOOD_OLD_INODE_SIZE -
+                       EXT4_I(inode)->i_extra_isize -
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-correct-the-misjudgment-in-ext4_iget_extra_inod.patch b/queue-5.18/ext4-correct-the-misjudgment-in-ext4_iget_extra_inod.patch
new file mode 100644 (file)
index 0000000..45ae7da
--- /dev/null
@@ -0,0 +1,40 @@
+From bb87c08d8958bcedb747138758192ad8471a7f14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jun 2022 10:13:58 +0800
+Subject: ext4: correct the misjudgment in ext4_iget_extra_inode
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit fd7e672ea98b95b9d4c9dae316639f03c16a749d ]
+
+Use the EXT4_INODE_HAS_XATTR_SPACE macro to more accurately
+determine whether the inode have xattr space.
+
+Cc: stable@kernel.org
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220616021358.2504451-5-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 826e2deb10f8..e478cac3b8f2 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4681,8 +4681,7 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
+       __le32 *magic = (void *)raw_inode +
+                       EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
+-      if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
+-          EXT4_INODE_SIZE(inode->i_sb) &&
++      if (EXT4_INODE_HAS_XATTR_SPACE(inode)  &&
+           *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+               ext4_set_inode_state(inode, EXT4_STATE_XATTR);
+               return ext4_find_inline_data_nolock(inode);
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-fix-extent-status-tree-race-in-writeback-error-.patch b/queue-5.18/ext4-fix-extent-status-tree-race-in-writeback-error-.patch
new file mode 100644 (file)
index 0000000..b190977
--- /dev/null
@@ -0,0 +1,57 @@
+From dc4a02f5902dad0bd2ac7063ad9cc82a29f579f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 12:05:30 -0400
+Subject: ext4: fix extent status tree race in writeback error recovery path
+
+From: Eric Whitney <enwlinux@gmail.com>
+
+[ Upstream commit 7f0d8e1d607c1a4fa9a27362a108921d82230874 ]
+
+A race can occur in the unlikely event ext4 is unable to allocate a
+physical cluster for a delayed allocation in a bigalloc file system
+during writeback.  Failure to allocate a cluster forces error recovery
+that includes a call to mpage_release_unused_pages().  That function
+removes any corresponding delayed allocated blocks from the extent
+status tree.  If a new delayed write is in progress on the same cluster
+simultaneously, resulting in the addition of an new extent containing
+one or more blocks in that cluster to the extent status tree, delayed
+block accounting can be thrown off if that delayed write then encounters
+a similar cluster allocation failure during future writeback.
+
+Write lock the i_data_sem in mpage_release_unused_pages() to fix this
+problem.  Ext4's block/cluster accounting code for bigalloc relies on
+i_data_sem for mutual exclusion, as is found in the delayed write path,
+and the locking in mpage_release_unused_pages() is missing.
+
+Cc: stable@kernel.org
+Reported-by: Ye Bin <yebin10@huawei.com>
+Signed-off-by: Eric Whitney <enwlinux@gmail.com>
+Link: https://lore.kernel.org/r/20220615160530.1928801-1-enwlinux@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index beed9e32571c..826e2deb10f8 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1559,7 +1559,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
+               ext4_lblk_t start, last;
+               start = index << (PAGE_SHIFT - inode->i_blkbits);
+               last = end << (PAGE_SHIFT - inode->i_blkbits);
++
++              /*
++               * avoid racing with extent status tree scans made by
++               * ext4_insert_delayed_block()
++               */
++              down_write(&EXT4_I(inode)->i_data_sem);
+               ext4_es_remove_extent(inode, start, last - start + 1);
++              up_write(&EXT4_I(inode)->i_data_sem);
+       }
+       pagevec_init(&pvec);
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-fix-race-when-reusing-xattr-blocks.patch b/queue-5.18/ext4-fix-race-when-reusing-xattr-blocks.patch
new file mode 100644 (file)
index 0000000..e1120bd
--- /dev/null
@@ -0,0 +1,179 @@
+From 7e96f9358fc2312891b448cce6dd758e684e2b80 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:24 +0200
+Subject: ext4: fix race when reusing xattr blocks
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b ]
+
+When ext4_xattr_block_set() decides to remove xattr block the following
+race can happen:
+
+CPU1                                    CPU2
+ext4_xattr_block_set()                  ext4_xattr_release_block()
+  new_bh = ext4_xattr_block_cache_find()
+
+                                          lock_buffer(bh);
+                                          ref = le32_to_cpu(BHDR(bh)->h_refcount);
+                                          if (ref == 1) {
+                                            ...
+                                            mb_cache_entry_delete();
+                                            unlock_buffer(bh);
+                                            ext4_free_blocks();
+                                              ...
+                                              ext4_forget(..., bh, ...);
+                                                jbd2_journal_revoke(..., bh);
+
+  ext4_journal_get_write_access(..., new_bh, ...)
+    do_get_write_access()
+      jbd2_journal_cancel_revoke(..., new_bh);
+
+Later the code in ext4_xattr_block_set() finds out the block got freed
+and cancels reusal of the block but the revoke stays canceled and so in
+case of block reuse and journal replay the filesystem can get corrupted.
+If the race works out slightly differently, we can also hit assertions
+in the jbd2 code.
+
+Fix the problem by making sure that once matching mbcache entry is
+found, code dropping the last xattr block reference (or trying to modify
+xattr block in place) waits until the mbcache entry reference is
+dropped. This way code trying to reuse xattr block is protected from
+someone trying to drop the last reference to xattr block.
+
+Reported-and-tested-by: Ritesh Harjani <ritesh.list@gmail.com>
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 67 +++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 45 insertions(+), 22 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index a25942a74929..533216e80fa2 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ /* Remove entry from mbcache when EA inode is getting evicted */
+ void ext4_evict_ea_inode(struct inode *inode)
+ {
+-      if (EA_INODE_CACHE(inode))
+-              mb_cache_entry_delete(EA_INODE_CACHE(inode),
+-                      ext4_xattr_inode_get_hash(inode), inode->i_ino);
++      struct mb_cache_entry *oe;
++
++      if (!EA_INODE_CACHE(inode))
++              return;
++      /* Wait for entry to get unused so that we can remove it */
++      while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
++                      ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
++              mb_cache_entry_wait_unused(oe);
++              mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
++      }
+ }
+ static int
+@@ -1229,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+       if (error)
+               goto out;
++retry_ref:
+       lock_buffer(bh);
+       hash = le32_to_cpu(BHDR(bh)->h_hash);
+       ref = le32_to_cpu(BHDR(bh)->h_refcount);
+@@ -1238,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+                * This must happen under buffer lock for
+                * ext4_xattr_block_set() to reliably detect freed block
+                */
+-              if (ea_block_cache)
+-                      mb_cache_entry_delete(ea_block_cache, hash,
+-                                            bh->b_blocknr);
++              if (ea_block_cache) {
++                      struct mb_cache_entry *oe;
++
++                      oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
++                                                        bh->b_blocknr);
++                      if (oe) {
++                              unlock_buffer(bh);
++                              mb_cache_entry_wait_unused(oe);
++                              mb_cache_entry_put(ea_block_cache, oe);
++                              goto retry_ref;
++                      }
++              }
+               get_bh(bh);
+               unlock_buffer(bh);
+@@ -1867,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                        * ext4_xattr_block_set() to reliably detect modified
+                        * block
+                        */
+-                      if (ea_block_cache)
+-                              mb_cache_entry_delete(ea_block_cache, hash,
+-                                                    bs->bh->b_blocknr);
++                      if (ea_block_cache) {
++                              struct mb_cache_entry *oe;
++
++                              oe = mb_cache_entry_delete_or_get(ea_block_cache,
++                                      hash, bs->bh->b_blocknr);
++                              if (oe) {
++                                      /*
++                                       * Xattr block is getting reused. Leave
++                                       * it alone.
++                                       */
++                                      mb_cache_entry_put(ea_block_cache, oe);
++                                      goto clone_block;
++                              }
++                      }
+                       ea_bdebug(bs->bh, "modifying in-place");
+                       error = ext4_xattr_set_entry(i, s, handle, inode,
+                                                    true /* is_block */);
+@@ -1885,6 +1913,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                               goto cleanup;
+                       goto inserted;
+               }
++clone_block:
+               unlock_buffer(bs->bh);
+               ea_bdebug(bs->bh, "cloning");
+               s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+@@ -1990,18 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                               lock_buffer(new_bh);
+                               /*
+                                * We have to be careful about races with
+-                               * freeing, rehashing or adding references to
+-                               * xattr block. Once we hold buffer lock xattr
+-                               * block's state is stable so we can check
+-                               * whether the block got freed / rehashed or
+-                               * not.  Since we unhash mbcache entry under
+-                               * buffer lock when freeing / rehashing xattr
+-                               * block, checking whether entry is still
+-                               * hashed is reliable. Same rules hold for
+-                               * e_reusable handling.
++                               * adding references to xattr block. Once we
++                               * hold buffer lock xattr block's state is
++                               * stable so we can check the additional
++                               * reference fits.
+                                */
+-                              if (hlist_bl_unhashed(&ce->e_hash_list) ||
+-                                  !ce->e_reusable) {
++                              ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
++                              if (ref > EXT4_XATTR_REFCOUNT_MAX) {
+                                       /*
+                                        * Undo everything and check mbcache
+                                        * again.
+@@ -2016,9 +2040,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                                       new_bh = NULL;
+                                       goto inserted;
+                               }
+-                              ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+                               BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+-                              if (ref >= EXT4_XATTR_REFCOUNT_MAX)
++                              if (ref == EXT4_XATTR_REFCOUNT_MAX)
+                                       ce->e_reusable = 0;
+                               ea_bdebug(new_bh, "reusing; refcount now=%d",
+                                         ref);
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-fix-use-after-free-in-ext4_xattr_set_entry.patch b/queue-5.18/ext4-fix-use-after-free-in-ext4_xattr_set_entry.patch
new file mode 100644 (file)
index 0000000..77b18e6
--- /dev/null
@@ -0,0 +1,128 @@
+From a366d09886e9e7ed9fa6ffb4207af76a69c861ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jun 2022 10:13:56 +0800
+Subject: ext4: fix use-after-free in ext4_xattr_set_entry
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 67d7d8ad99beccd9fe92d585b87f1760dc9018e3 ]
+
+Hulk Robot reported a issue:
+==================================================================
+BUG: KASAN: use-after-free in ext4_xattr_set_entry+0x18ab/0x3500
+Write of size 4105 at addr ffff8881675ef5f4 by task syz-executor.0/7092
+
+CPU: 1 PID: 7092 Comm: syz-executor.0 Not tainted 4.19.90-dirty #17
+Call Trace:
+[...]
+ memcpy+0x34/0x50 mm/kasan/kasan.c:303
+ ext4_xattr_set_entry+0x18ab/0x3500 fs/ext4/xattr.c:1747
+ ext4_xattr_ibody_inline_set+0x86/0x2a0 fs/ext4/xattr.c:2205
+ ext4_xattr_set_handle+0x940/0x1300 fs/ext4/xattr.c:2386
+ ext4_xattr_set+0x1da/0x300 fs/ext4/xattr.c:2498
+ __vfs_setxattr+0x112/0x170 fs/xattr.c:149
+ __vfs_setxattr_noperm+0x11b/0x2a0 fs/xattr.c:180
+ __vfs_setxattr_locked+0x17b/0x250 fs/xattr.c:238
+ vfs_setxattr+0xed/0x270 fs/xattr.c:255
+ setxattr+0x235/0x330 fs/xattr.c:520
+ path_setxattr+0x176/0x190 fs/xattr.c:539
+ __do_sys_lsetxattr fs/xattr.c:561 [inline]
+ __se_sys_lsetxattr fs/xattr.c:557 [inline]
+ __x64_sys_lsetxattr+0xc2/0x160 fs/xattr.c:557
+ do_syscall_64+0xdf/0x530 arch/x86/entry/common.c:298
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+RIP: 0033:0x459fe9
+RSP: 002b:00007fa5e54b4c08 EFLAGS: 00000246 ORIG_RAX: 00000000000000bd
+RAX: ffffffffffffffda RBX: 000000000051bf60 RCX: 0000000000459fe9
+RDX: 00000000200003c0 RSI: 0000000020000180 RDI: 0000000020000140
+RBP: 000000000051bf60 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000001009 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007ffc73c93fc0 R14: 000000000051bf60 R15: 00007fa5e54b4d80
+[...]
+==================================================================
+
+Above issue may happen as follows:
+-------------------------------------
+ext4_xattr_set
+  ext4_xattr_set_handle
+    ext4_xattr_ibody_find
+      >> s->end < s->base
+      >> no EXT4_STATE_XATTR
+      >> xattr_check_inode is not executed
+    ext4_xattr_ibody_set
+      ext4_xattr_set_entry
+       >> size_t min_offs = s->end - s->base
+       >> UAF in memcpy
+
+we can easily reproduce this problem with the following commands:
+    mkfs.ext4 -F /dev/sda
+    mount -o debug_want_extra_isize=128 /dev/sda /mnt
+    touch /mnt/file
+    setfattr -n user.cat -v `seq -s z 4096|tr -d '[:digit:]'` /mnt/file
+
+In ext4_xattr_ibody_find, we have the following assignment logic:
+  header = IHDR(inode, raw_inode)
+         = raw_inode + EXT4_GOOD_OLD_INODE_SIZE + i_extra_isize
+  is->s.base = IFIRST(header)
+             = header + sizeof(struct ext4_xattr_ibody_header)
+  is->s.end = raw_inode + s_inode_size
+
+In ext4_xattr_set_entry
+  min_offs = s->end - s->base
+           = s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - i_extra_isize -
+            sizeof(struct ext4_xattr_ibody_header)
+  last = s->first
+  free = min_offs - ((void *)last - s->base) - sizeof(__u32)
+       = s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - i_extra_isize -
+         sizeof(struct ext4_xattr_ibody_header) - sizeof(__u32)
+
+In the calculation formula, all values except s_inode_size and
+i_extra_size are fixed values. When i_extra_size is the maximum value
+s_inode_size - EXT4_GOOD_OLD_INODE_SIZE, min_offs is -4 and free is -8.
+The value overflows. As a result, the preceding issue is triggered when
+memcpy is executed.
+
+Therefore, when finding xattr or setting xattr, check whether
+there is space for storing xattr in the inode to resolve this issue.
+
+Cc: stable@kernel.org
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220616021358.2504451-3-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 042325349098..c3c3194f3ee1 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -2176,8 +2176,9 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
+       struct ext4_inode *raw_inode;
+       int error;
+-      if (EXT4_I(inode)->i_extra_isize == 0)
++      if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
+               return 0;
++
+       raw_inode = ext4_raw_inode(&is->iloc);
+       header = IHDR(inode, raw_inode);
+       is->s.base = is->s.first = IFIRST(header);
+@@ -2205,8 +2206,9 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+       struct ext4_xattr_search *s = &is->s;
+       int error;
+-      if (EXT4_I(inode)->i_extra_isize == 0)
++      if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
+               return -ENOSPC;
++
+       error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
+       if (error)
+               return error;
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-fix-warning-in-ext4_iomap_begin-as-race-between.patch b/queue-5.18/ext4-fix-warning-in-ext4_iomap_begin-as-race-between.patch
new file mode 100644 (file)
index 0000000..da45295
--- /dev/null
@@ -0,0 +1,103 @@
+From 122a6fffdeebf14f28ad593406efaa4e52613ea6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Jun 2022 09:39:35 +0800
+Subject: ext4: fix warning in ext4_iomap_begin as race between bmap and write
+
+From: Ye Bin <yebin10@huawei.com>
+
+[ Upstream commit 51ae846cff568c8c29921b1b28eb2dfbcd4ac12d ]
+
+We got issue as follows:
+------------[ cut here ]------------
+WARNING: CPU: 3 PID: 9310 at fs/ext4/inode.c:3441 ext4_iomap_begin+0x182/0x5d0
+RIP: 0010:ext4_iomap_begin+0x182/0x5d0
+RSP: 0018:ffff88812460fa08 EFLAGS: 00010293
+RAX: ffff88811f168000 RBX: 0000000000000000 RCX: ffffffff97793c12
+RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003
+RBP: ffff88812c669160 R08: ffff88811f168000 R09: ffffed10258cd20f
+R10: ffff88812c669077 R11: ffffed10258cd20e R12: 0000000000000001
+R13: 00000000000000a4 R14: 000000000000000c R15: ffff88812c6691ee
+FS:  00007fd0d6ff3740(0000) GS:ffff8883af180000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fd0d6dda290 CR3: 0000000104a62000 CR4: 00000000000006e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ iomap_apply+0x119/0x570
+ iomap_bmap+0x124/0x150
+ ext4_bmap+0x14f/0x250
+ bmap+0x55/0x80
+ do_vfs_ioctl+0x952/0xbd0
+ __x64_sys_ioctl+0xc6/0x170
+ do_syscall_64+0x33/0x40
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Above issue may happen as follows:
+          bmap                    write
+bmap
+  ext4_bmap
+    iomap_bmap
+      ext4_iomap_begin
+                            ext4_file_write_iter
+                             ext4_buffered_write_iter
+                               generic_perform_write
+                                 ext4_da_write_begin
+                                   ext4_da_write_inline_data_begin
+                                     ext4_prepare_inline_data
+                                       ext4_create_inline_data
+                                         ext4_set_inode_flag(inode,
+                                               EXT4_INODE_INLINE_DATA);
+      if (WARN_ON_ONCE(ext4_has_inline_data(inode))) ->trigger bug_on
+
+To solved above issue hold inode lock in ext4_bamp.
+
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Link: https://lore.kernel.org/r/20220617013935.397596-1-yebin10@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index e478cac3b8f2..9ef6f41a5250 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -3137,13 +3137,15 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
+ {
+       struct inode *inode = mapping->host;
+       journal_t *journal;
++      sector_t ret = 0;
+       int err;
++      inode_lock_shared(inode);
+       /*
+        * We can get here for an inline file via the FIBMAP ioctl
+        */
+       if (ext4_has_inline_data(inode))
+-              return 0;
++              goto out;
+       if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
+                       test_opt(inode->i_sb, DELALLOC)) {
+@@ -3182,10 +3184,14 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
+               jbd2_journal_unlock_updates(journal);
+               if (err)
+-                      return 0;
++                      goto out;
+       }
+-      return iomap_bmap(mapping, block, &ext4_iomap_ops);
++      ret = iomap_bmap(mapping, block, &ext4_iomap_ops);
++
++out:
++      inode_unlock_shared(inode);
++      return ret;
+ }
+ static int ext4_readpage(struct file *file, struct page *page)
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-make-sure-ext4_append-always-allocates-new-bloc.patch b/queue-5.18/ext4-make-sure-ext4_append-always-allocates-new-bloc.patch
new file mode 100644 (file)
index 0000000..0cd8a40
--- /dev/null
@@ -0,0 +1,63 @@
+From 103d5a38ce71f77d0a0ab8577c7e93d52421b67c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Jul 2022 16:27:21 +0200
+Subject: ext4: make sure ext4_append() always allocates new block
+
+From: Lukas Czerner <lczerner@redhat.com>
+
+[ Upstream commit b8a04fe77ef1360fbf73c80fddbdfeaa9407ed1b ]
+
+ext4_append() must always allocate a new block, otherwise we run the
+risk of overwriting existing directory block corrupting the directory
+tree in the process resulting in all manner of problems later on.
+
+Add a sanity check to see if the logical block is already allocated and
+error out if it is.
+
+Cc: stable@kernel.org
+Signed-off-by: Lukas Czerner <lczerner@redhat.com>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20220704142721.157985-2-lczerner@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/namei.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 2bc3e4b27204..13b6265848c2 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -54,6 +54,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
+                                       struct inode *inode,
+                                       ext4_lblk_t *block)
+ {
++      struct ext4_map_blocks map;
+       struct buffer_head *bh;
+       int err;
+@@ -63,6 +64,21 @@ static struct buffer_head *ext4_append(handle_t *handle,
+               return ERR_PTR(-ENOSPC);
+       *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
++      map.m_lblk = *block;
++      map.m_len = 1;
++
++      /*
++       * We're appending new directory block. Make sure the block is not
++       * allocated yet, otherwise we will end up corrupting the
++       * directory.
++       */
++      err = ext4_map_blocks(NULL, inode, &map, 0);
++      if (err < 0)
++              return ERR_PTR(err);
++      if (err) {
++              EXT4_ERROR_INODE(inode, "Logical block already allocated");
++              return ERR_PTR(-EFSCORRUPTED);
++      }
+       bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
+       if (IS_ERR(bh))
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch b/queue-5.18/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch
new file mode 100644 (file)
index 0000000..dfd57e5
--- /dev/null
@@ -0,0 +1,116 @@
+From 61a993fc39a5ec6129c04ea49d65c2173d1071a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:22 +0200
+Subject: ext4: remove EA inode entry from mbcache on inode eviction
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 6bc0d63dad7f9f54d381925ee855b402f652fa39 ]
+
+Currently we remove EA inode from mbcache as soon as its xattr refcount
+drops to zero. However there can be pending attempts to reuse the inode
+and thus refcount handling code has to handle the situation when
+refcount increases from zero anyway. So save some work and just keep EA
+inode in mbcache until it is getting evicted. At that moment we are sure
+following iget() of EA inode will fail anyway (or wait for eviction to
+finish and load things from the disk again) and so removing mbcache
+entry at that moment is fine and simplifies the code a bit.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-3-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c |  2 ++
+ fs/ext4/xattr.c | 24 ++++++++----------------
+ fs/ext4/xattr.h |  1 +
+ 3 files changed, 11 insertions(+), 16 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 9ef6f41a5250..e94ec798dce1 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -178,6 +178,8 @@ void ext4_evict_inode(struct inode *inode)
+       trace_ext4_evict_inode(inode);
++      if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
++              ext4_evict_ea_inode(inode);
+       if (inode->i_nlink) {
+               /*
+                * When journalling data dirty buffers are tracked only in the
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index c3c3194f3ee1..b57fd07fbdba 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -436,6 +436,14 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+       return err;
+ }
++/* Remove entry from mbcache when EA inode is getting evicted */
++void ext4_evict_ea_inode(struct inode *inode)
++{
++      if (EA_INODE_CACHE(inode))
++              mb_cache_entry_delete(EA_INODE_CACHE(inode),
++                      ext4_xattr_inode_get_hash(inode), inode->i_ino);
++}
++
+ static int
+ ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
+                              struct ext4_xattr_entry *entry, void *buffer,
+@@ -976,10 +984,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
+ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+                                      int ref_change)
+ {
+-      struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
+       struct ext4_iloc iloc;
+       s64 ref_count;
+-      u32 hash;
+       int ret;
+       inode_lock(ea_inode);
+@@ -1002,14 +1008,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+                       set_nlink(ea_inode, 1);
+                       ext4_orphan_del(handle, ea_inode);
+-
+-                      if (ea_inode_cache) {
+-                              hash = ext4_xattr_inode_get_hash(ea_inode);
+-                              mb_cache_entry_create(ea_inode_cache,
+-                                                    GFP_NOFS, hash,
+-                                                    ea_inode->i_ino,
+-                                                    true /* reusable */);
+-                      }
+               }
+       } else {
+               WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
+@@ -1022,12 +1020,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+                       clear_nlink(ea_inode);
+                       ext4_orphan_add(handle, ea_inode);
+-
+-                      if (ea_inode_cache) {
+-                              hash = ext4_xattr_inode_get_hash(ea_inode);
+-                              mb_cache_entry_delete(ea_inode_cache, hash,
+-                                                    ea_inode->i_ino);
+-                      }
+               }
+       }
+diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
+index f885f362add4..e5e36bd11f05 100644
+--- a/fs/ext4/xattr.h
++++ b/fs/ext4/xattr.h
+@@ -191,6 +191,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
+ extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+                           struct ext4_inode *raw_inode, handle_t *handle);
++extern void ext4_evict_ea_inode(struct inode *inode);
+ extern const struct xattr_handler *ext4_xattr_handlers[];
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch b/queue-5.18/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch
new file mode 100644 (file)
index 0000000..f1293e3
--- /dev/null
@@ -0,0 +1,125 @@
+From 08077db477ddbef197b91281d9078043da9adb08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:23 +0200
+Subject: ext4: unindent codeblock in ext4_xattr_block_set()
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit fd48e9acdf26d0cbd80051de07d4a735d05d29b2 ]
+
+Remove unnecessary else (and thus indentation level) from a code block
+in ext4_xattr_block_set(). It will also make following code changes
+easier. No functional changes.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-4-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 77 ++++++++++++++++++++++++-------------------------
+ 1 file changed, 38 insertions(+), 39 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index d92d50de5a01..a25942a74929 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1850,6 +1850,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ #define header(x) ((struct ext4_xattr_header *)(x))
+       if (s->base) {
++              int offset = (char *)s->here - bs->bh->b_data;
++
+               BUFFER_TRACE(bs->bh, "get_write_access");
+               error = ext4_journal_get_write_access(handle, sb, bs->bh,
+                                                     EXT4_JTR_NONE);
+@@ -1882,49 +1884,46 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                       if (error)
+                               goto cleanup;
+                       goto inserted;
+-              } else {
+-                      int offset = (char *)s->here - bs->bh->b_data;
++              }
++              unlock_buffer(bs->bh);
++              ea_bdebug(bs->bh, "cloning");
++              s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
++              error = -ENOMEM;
++              if (s->base == NULL)
++                      goto cleanup;
++              s->first = ENTRY(header(s->base)+1);
++              header(s->base)->h_refcount = cpu_to_le32(1);
++              s->here = ENTRY(s->base + offset);
++              s->end = s->base + bs->bh->b_size;
+-                      unlock_buffer(bs->bh);
+-                      ea_bdebug(bs->bh, "cloning");
+-                      s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+-                      error = -ENOMEM;
+-                      if (s->base == NULL)
++              /*
++               * If existing entry points to an xattr inode, we need
++               * to prevent ext4_xattr_set_entry() from decrementing
++               * ref count on it because the reference belongs to the
++               * original block. In this case, make the entry look
++               * like it has an empty value.
++               */
++              if (!s->not_found && s->here->e_value_inum) {
++                      ea_ino = le32_to_cpu(s->here->e_value_inum);
++                      error = ext4_xattr_inode_iget(inode, ea_ino,
++                                    le32_to_cpu(s->here->e_hash),
++                                    &tmp_inode);
++                      if (error)
+                               goto cleanup;
+-                      s->first = ENTRY(header(s->base)+1);
+-                      header(s->base)->h_refcount = cpu_to_le32(1);
+-                      s->here = ENTRY(s->base + offset);
+-                      s->end = s->base + bs->bh->b_size;
+-                      /*
+-                       * If existing entry points to an xattr inode, we need
+-                       * to prevent ext4_xattr_set_entry() from decrementing
+-                       * ref count on it because the reference belongs to the
+-                       * original block. In this case, make the entry look
+-                       * like it has an empty value.
+-                       */
+-                      if (!s->not_found && s->here->e_value_inum) {
+-                              ea_ino = le32_to_cpu(s->here->e_value_inum);
+-                              error = ext4_xattr_inode_iget(inode, ea_ino,
+-                                            le32_to_cpu(s->here->e_hash),
+-                                            &tmp_inode);
+-                              if (error)
+-                                      goto cleanup;
+-
+-                              if (!ext4_test_inode_state(tmp_inode,
+-                                              EXT4_STATE_LUSTRE_EA_INODE)) {
+-                                      /*
+-                                       * Defer quota free call for previous
+-                                       * inode until success is guaranteed.
+-                                       */
+-                                      old_ea_inode_quota = le32_to_cpu(
+-                                                      s->here->e_value_size);
+-                              }
+-                              iput(tmp_inode);
+-
+-                              s->here->e_value_inum = 0;
+-                              s->here->e_value_size = 0;
++                      if (!ext4_test_inode_state(tmp_inode,
++                                      EXT4_STATE_LUSTRE_EA_INODE)) {
++                              /*
++                               * Defer quota free call for previous
++                               * inode until success is guaranteed.
++                               */
++                              old_ea_inode_quota = le32_to_cpu(
++                                              s->here->e_value_size);
+                       }
++                      iput(tmp_inode);
++
++                      s->here->e_value_inum = 0;
++                      s->here->e_value_size = 0;
+               }
+       } else {
+               /* Allocate a buffer where we construct the new block. */
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-update-s_overhead_clusters-in-the-superblock-du.patch b/queue-5.18/ext4-update-s_overhead_clusters-in-the-superblock-du.patch
new file mode 100644 (file)
index 0000000..0493632
--- /dev/null
@@ -0,0 +1,51 @@
+From 17944868e9c621e876595423dbbf4c4660b651fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Jun 2022 00:00:25 -0400
+Subject: ext4: update s_overhead_clusters in the superblock during an on-line
+ resize
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+[ Upstream commit de394a86658ffe4e89e5328fd4993abfe41b7435 ]
+
+When doing an online resize, the on-disk superblock on-disk wasn't
+updated.  This means that when the file system is unmounted and
+remounted, and the on-disk overhead value is non-zero, this would
+result in the results of statfs(2) to be incorrect.
+
+This was partially fixed by Commits 10b01ee92df5 ("ext4: fix overhead
+calculation to account for the reserved gdt blocks"), 85d825dbf489
+("ext4: force overhead calculation if the s_overhead_cluster makes no
+sense"), and eb7054212eac ("ext4: update the cached overhead value in
+the superblock").
+
+However, since it was too expensive to forcibly recalculate the
+overhead for bigalloc file systems at every mount, this didn't fix the
+problem for bigalloc file systems.  This commit should address the
+problem when resizing file systems with the bigalloc feature enabled.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20220629040026.112371-1-tytso@mit.edu
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/resize.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
+index 8b70a4701293..e5c2713aa11a 100644
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -1484,6 +1484,7 @@ static void ext4_update_super(struct super_block *sb,
+        * Update the fs overhead information
+        */
+       ext4_calculate_overhead(sb);
++      es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
+       if (test_opt(sb, DEBUG))
+               printk(KERN_DEBUG "EXT4-fs: added group %u:"
+-- 
+2.35.1
+
diff --git a/queue-5.18/ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch b/queue-5.18/ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch
new file mode 100644 (file)
index 0000000..3d0c9ac
--- /dev/null
@@ -0,0 +1,40 @@
+From 7f40ec1ab39f18dae16aba7df11a741b3bc968a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 May 2022 11:01:20 +0800
+Subject: ext4: use kmemdup() to replace kmalloc + memcpy
+
+From: Shuqi Zhang <zhangshuqi3@huawei.com>
+
+[ Upstream commit 4efd9f0d120c55b08852ee5605dbb02a77089a5d ]
+
+Replace kmalloc + memcpy with kmemdup()
+
+Signed-off-by: Shuqi Zhang <zhangshuqi3@huawei.com>
+Reviewed-by: Ritesh Harjani <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20220525030120.803330-1-zhangshuqi3@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index b57fd07fbdba..d92d50de5a01 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1887,11 +1887,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                       unlock_buffer(bs->bh);
+                       ea_bdebug(bs->bh, "cloning");
+-                      s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
++                      s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+                       error = -ENOMEM;
+                       if (s->base == NULL)
+                               goto cleanup;
+-                      memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
+                       s->first = ENTRY(header(s->base)+1);
+                       header(s->base)->h_refcount = cpu_to_le32(1);
+                       s->here = ENTRY(s->base + offset);
+-- 
+2.35.1
+
diff --git a/queue-5.18/firmware-arm_scpi-ensure-scpi_info-is-not-assigned-i.patch b/queue-5.18/firmware-arm_scpi-ensure-scpi_info-is-not-assigned-i.patch
new file mode 100644 (file)
index 0000000..fa70e82
--- /dev/null
@@ -0,0 +1,156 @@
+From 20eafffdcdc724c0cbbd8582b9bdedc47de0f785 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Jul 2022 17:03:10 +0100
+Subject: firmware: arm_scpi: Ensure scpi_info is not assigned if the probe
+ fails
+
+From: Sudeep Holla <sudeep.holla@arm.com>
+
+[ Upstream commit 689640efc0a2c4e07e6f88affe6d42cd40cc3f85 ]
+
+When scpi probe fails, at any point, we need to ensure that the scpi_info
+is not set and will remain NULL until the probe succeeds. If it is not
+taken care, then it could result use-after-free as the value is exported
+via get_scpi_ops() and could refer to a memory allocated via devm_kzalloc()
+but freed when the probe fails.
+
+Link: https://lore.kernel.org/r/20220701160310.148344-1-sudeep.holla@arm.com
+Cc: stable@vger.kernel.org # 4.19+
+Reported-by: huhai <huhai@kylinos.cn>
+Reviewed-by: Jackie Liu <liuyun01@kylinos.cn>
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_scpi.c | 61 +++++++++++++++++++++----------------
+ 1 file changed, 35 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
+index ddf0b9ff9e15..435d0e2658a4 100644
+--- a/drivers/firmware/arm_scpi.c
++++ b/drivers/firmware/arm_scpi.c
+@@ -815,7 +815,7 @@ static int scpi_init_versions(struct scpi_drvinfo *info)
+               info->firmware_version = le32_to_cpu(caps.platform_version);
+       }
+       /* Ignore error if not implemented */
+-      if (scpi_info->is_legacy && ret == -EOPNOTSUPP)
++      if (info->is_legacy && ret == -EOPNOTSUPP)
+               return 0;
+       return ret;
+@@ -913,13 +913,14 @@ static int scpi_probe(struct platform_device *pdev)
+       struct resource res;
+       struct device *dev = &pdev->dev;
+       struct device_node *np = dev->of_node;
++      struct scpi_drvinfo *scpi_drvinfo;
+-      scpi_info = devm_kzalloc(dev, sizeof(*scpi_info), GFP_KERNEL);
+-      if (!scpi_info)
++      scpi_drvinfo = devm_kzalloc(dev, sizeof(*scpi_drvinfo), GFP_KERNEL);
++      if (!scpi_drvinfo)
+               return -ENOMEM;
+       if (of_match_device(legacy_scpi_of_match, &pdev->dev))
+-              scpi_info->is_legacy = true;
++              scpi_drvinfo->is_legacy = true;
+       count = of_count_phandle_with_args(np, "mboxes", "#mbox-cells");
+       if (count < 0) {
+@@ -927,19 +928,19 @@ static int scpi_probe(struct platform_device *pdev)
+               return -ENODEV;
+       }
+-      scpi_info->channels = devm_kcalloc(dev, count, sizeof(struct scpi_chan),
+-                                         GFP_KERNEL);
+-      if (!scpi_info->channels)
++      scpi_drvinfo->channels =
++              devm_kcalloc(dev, count, sizeof(struct scpi_chan), GFP_KERNEL);
++      if (!scpi_drvinfo->channels)
+               return -ENOMEM;
+-      ret = devm_add_action(dev, scpi_free_channels, scpi_info);
++      ret = devm_add_action(dev, scpi_free_channels, scpi_drvinfo);
+       if (ret)
+               return ret;
+-      for (; scpi_info->num_chans < count; scpi_info->num_chans++) {
++      for (; scpi_drvinfo->num_chans < count; scpi_drvinfo->num_chans++) {
+               resource_size_t size;
+-              int idx = scpi_info->num_chans;
+-              struct scpi_chan *pchan = scpi_info->channels + idx;
++              int idx = scpi_drvinfo->num_chans;
++              struct scpi_chan *pchan = scpi_drvinfo->channels + idx;
+               struct mbox_client *cl = &pchan->cl;
+               struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
+@@ -986,45 +987,53 @@ static int scpi_probe(struct platform_device *pdev)
+               return ret;
+       }
+-      scpi_info->commands = scpi_std_commands;
++      scpi_drvinfo->commands = scpi_std_commands;
+-      platform_set_drvdata(pdev, scpi_info);
++      platform_set_drvdata(pdev, scpi_drvinfo);
+-      if (scpi_info->is_legacy) {
++      if (scpi_drvinfo->is_legacy) {
+               /* Replace with legacy variants */
+               scpi_ops.clk_set_val = legacy_scpi_clk_set_val;
+-              scpi_info->commands = scpi_legacy_commands;
++              scpi_drvinfo->commands = scpi_legacy_commands;
+               /* Fill priority bitmap */
+               for (idx = 0; idx < ARRAY_SIZE(legacy_hpriority_cmds); idx++)
+                       set_bit(legacy_hpriority_cmds[idx],
+-                              scpi_info->cmd_priority);
++                              scpi_drvinfo->cmd_priority);
+       }
+-      ret = scpi_init_versions(scpi_info);
++      scpi_info = scpi_drvinfo;
++
++      ret = scpi_init_versions(scpi_drvinfo);
+       if (ret) {
+               dev_err(dev, "incorrect or no SCP firmware found\n");
++              scpi_info = NULL;
+               return ret;
+       }
+-      if (scpi_info->is_legacy && !scpi_info->protocol_version &&
+-          !scpi_info->firmware_version)
++      if (scpi_drvinfo->is_legacy && !scpi_drvinfo->protocol_version &&
++          !scpi_drvinfo->firmware_version)
+               dev_info(dev, "SCP Protocol legacy pre-1.0 firmware\n");
+       else
+               dev_info(dev, "SCP Protocol %lu.%lu Firmware %lu.%lu.%lu version\n",
+                        FIELD_GET(PROTO_REV_MAJOR_MASK,
+-                                 scpi_info->protocol_version),
++                                 scpi_drvinfo->protocol_version),
+                        FIELD_GET(PROTO_REV_MINOR_MASK,
+-                                 scpi_info->protocol_version),
++                                 scpi_drvinfo->protocol_version),
+                        FIELD_GET(FW_REV_MAJOR_MASK,
+-                                 scpi_info->firmware_version),
++                                 scpi_drvinfo->firmware_version),
+                        FIELD_GET(FW_REV_MINOR_MASK,
+-                                 scpi_info->firmware_version),
++                                 scpi_drvinfo->firmware_version),
+                        FIELD_GET(FW_REV_PATCH_MASK,
+-                                 scpi_info->firmware_version));
+-      scpi_info->scpi_ops = &scpi_ops;
++                                 scpi_drvinfo->firmware_version));
++
++      scpi_drvinfo->scpi_ops = &scpi_ops;
+-      return devm_of_platform_populate(dev);
++      ret = devm_of_platform_populate(dev);
++      if (ret)
++              scpi_info = NULL;
++
++      return ret;
+ }
+ static const struct of_device_id scpi_of_match[] = {
+-- 
+2.35.1
+
diff --git a/queue-5.18/ftrace-x86-add-back-ftrace_expected-assignment.patch-2936 b/queue-5.18/ftrace-x86-add-back-ftrace_expected-assignment.patch-2936
new file mode 100644 (file)
index 0000000..5be2f7e
--- /dev/null
@@ -0,0 +1,49 @@
+From e881657d8ff0377cfd861e664e7e198f7d0ca102 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 10:18:51 -0400
+Subject: ftrace/x86: Add back ftrace_expected assignment
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit ac6c1b2ca77e722a1e5d651f12f437f2f237e658 ]
+
+When a ftrace_bug happens (where ftrace fails to modify a location) it is
+helpful to have what was at that location as well as what was expected to
+be there.
+
+But with the conversion to text_poke() the variable that assigns the
+expected for debugging was dropped. Unfortunately, I noticed this when I
+needed it. Add it back.
+
+Link: https://lkml.kernel.org/r/20220726101851.069d2e70@gandalf.local.home
+
+Cc: "x86@kernel.org" <x86@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Fixes: 768ae4406a5c ("x86/ftrace: Use text_poke()")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/ftrace.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
+index 6892ca67d9c6..b6d7ece7bf51 100644
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -93,6 +93,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code)
+       /* Make sure it is what we expect it to be */
+       if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) {
++              ftrace_expected = old_code;
+               WARN_ON(1);
+               return -EINVAL;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.18/hugetlb_cgroup-fix-wrong-hugetlb-cgroup-numa-stat.patch b/queue-5.18/hugetlb_cgroup-fix-wrong-hugetlb-cgroup-numa-stat.patch
new file mode 100644 (file)
index 0000000..8acc9d4
--- /dev/null
@@ -0,0 +1,43 @@
+From 2cd3814758463e87405049bb89f7940da1dc7c1b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Jul 2022 15:38:04 +0800
+Subject: hugetlb_cgroup: fix wrong hugetlb cgroup numa stat
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit 2727cfe4072a35ce813e3708f74c135de7da8897 ]
+
+We forget to set cft->private for numa stat file.  As a result, numa stat
+of hstates[0] is always showed for all hstates.  Encode the hstates index
+into cft->private to fix this issue.
+
+Link: https://lkml.kernel.org/r/20220723073804.53035-1-linmiaohe@huawei.com
+Fixes: f47761999052 ("hugetlb: add hugetlb.*.numa_stat file")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Acked-by: Muchun Song <songmuchun@bytedance.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Mina Almasry <almasrymina@google.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/hugetlb_cgroup.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
+index f9942841df18..c86691c431fd 100644
+--- a/mm/hugetlb_cgroup.c
++++ b/mm/hugetlb_cgroup.c
+@@ -772,6 +772,7 @@ static void __init __hugetlb_cgroup_file_dfl_init(int idx)
+       /* Add the numa stat file */
+       cft = &h->cgroup_files_dfl[6];
+       snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf);
++      cft->private = MEMFILE_PRIVATE(idx, 0);
+       cft->seq_show = hugetlb_cgroup_read_numa_stat;
+       cft->flags = CFTYPE_NOT_ON_ROOT;
+-- 
+2.35.1
+
diff --git a/queue-5.18/input-gscps2-check-return-value-of-ioremap-in-gscps2.patch b/queue-5.18/input-gscps2-check-return-value-of-ioremap-in-gscps2.patch
new file mode 100644 (file)
index 0000000..f07477d
--- /dev/null
@@ -0,0 +1,40 @@
+From 6674953f1a89b966ac5c94dac089f2b80d9e9932 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Aug 2022 15:20:33 +0800
+Subject: Input: gscps2 - check return value of ioremap() in gscps2_probe()
+
+From: Xie Shaowen <studentxswpy@163.com>
+
+[ Upstream commit e61b3125a4f036b3c6b87ffd656fc1ab00440ae9 ]
+
+The function ioremap() in gscps2_probe() can fail, so
+its return value should be checked.
+
+Fixes: 4bdc0d676a643 ("remove ioremap_nocache and devm_ioremap_nocache")
+Cc: <stable@vger.kernel.org> # v5.6+
+Reported-by: Hacash Robot <hacashRobot@santino.com>
+Signed-off-by: Xie Shaowen <studentxswpy@163.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/input/serio/gscps2.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/input/serio/gscps2.c b/drivers/input/serio/gscps2.c
+index a9065c6ab550..da2c67cb8642 100644
+--- a/drivers/input/serio/gscps2.c
++++ b/drivers/input/serio/gscps2.c
+@@ -350,6 +350,10 @@ static int __init gscps2_probe(struct parisc_device *dev)
+       ps2port->port = serio;
+       ps2port->padev = dev;
+       ps2port->addr = ioremap(hpa, GSC_STATUS + 4);
++      if (!ps2port->addr) {
++              ret = -ENOMEM;
++              goto fail_nomem;
++      }
+       spin_lock_init(&ps2port->lock);
+       gscps2_reset(ps2port);
+-- 
+2.35.1
+
diff --git a/queue-5.18/intel_idle-add-alderlake-support.patch b/queue-5.18/intel_idle-add-alderlake-support.patch
new file mode 100644 (file)
index 0000000..56730c3
--- /dev/null
@@ -0,0 +1,213 @@
+From c13505c36b2642150596f12cc6b74f630ad22b45 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Apr 2022 17:39:51 +0800
+Subject: intel_idle: Add AlderLake support
+
+From: Zhang Rui <rui.zhang@intel.com>
+
+[ Upstream commit d1cf8bbfed1edc5108220342ab39e4544d55fbc3 ]
+
+Similar to SPR, the C1 and C1E states on ADL are mutually exclusive.
+Only one of them can be enabled at a time.
+
+But contrast to SPR, which usually has a strong latency requirement
+as a Xeon processor, C1E is preferred on ADL for better energy
+efficiency.
+
+Add custom C-state tables for ADL with both C1 and C1E, and
+
+ 1. Enable the "C1E promotion" bit in MSR_IA32_POWER_CTL and mark C1
+    with the CPUIDLE_FLAG_UNUSABLE flag, so C1 is not available by
+    default.
+
+ 2. Add support for the "preferred_cstates" module parameter, so that
+    users can choose to use C1 instead of C1E by booting with
+    "intel_idle.preferred_cstates=2".
+
+Separate custom C-state tables are introduced for the ADL mobile and
+desktop processors, because of the exit latency differences between
+these two variants, especially with respect to PC10.
+
+Signed-off-by: Zhang Rui <rui.zhang@intel.com>
+[ rjw: Changelog edits, code rearrangement ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/idle/intel_idle.c | 133 ++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 133 insertions(+)
+
+diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
+index 47b68c6071be..907700d1e78e 100644
+--- a/drivers/idle/intel_idle.c
++++ b/drivers/idle/intel_idle.c
+@@ -811,6 +811,106 @@ static struct cpuidle_state icx_cstates[] __initdata = {
+               .enter = NULL }
+ };
++/*
++ * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
++ * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
++ * But in this case there is effectively no C1, because C1 requests are
++ * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
++ * and C1E requests end up with C1, so there is effectively no C1E.
++ *
++ * By default we enable C1E and disable C1 by marking it with
++ * 'CPUIDLE_FLAG_UNUSABLE'.
++ */
++static struct cpuidle_state adl_cstates[] __initdata = {
++      {
++              .name = "C1",
++              .desc = "MWAIT 0x00",
++              .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
++              .exit_latency = 1,
++              .target_residency = 1,
++              .enter = &intel_idle,
++              .enter_s2idle = intel_idle_s2idle, },
++      {
++              .name = "C1E",
++              .desc = "MWAIT 0x01",
++              .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
++              .exit_latency = 2,
++              .target_residency = 4,
++              .enter = &intel_idle,
++              .enter_s2idle = intel_idle_s2idle, },
++      {
++              .name = "C6",
++              .desc = "MWAIT 0x20",
++              .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .exit_latency = 220,
++              .target_residency = 600,
++              .enter = &intel_idle,
++              .enter_s2idle = intel_idle_s2idle, },
++      {
++              .name = "C8",
++              .desc = "MWAIT 0x40",
++              .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .exit_latency = 280,
++              .target_residency = 800,
++              .enter = &intel_idle,
++              .enter_s2idle = intel_idle_s2idle, },
++      {
++              .name = "C10",
++              .desc = "MWAIT 0x60",
++              .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .exit_latency = 680,
++              .target_residency = 2000,
++              .enter = &intel_idle,
++              .enter_s2idle = intel_idle_s2idle, },
++      {
++              .enter = NULL }
++};
++
++static struct cpuidle_state adl_l_cstates[] __initdata = {
++      {
++              .name = "C1",
++              .desc = "MWAIT 0x00",
++              .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
++              .exit_latency = 1,
++              .target_residency = 1,
++              .enter = &intel_idle,
++              .enter_s2idle = intel_idle_s2idle, },
++      {
++              .name = "C1E",
++              .desc = "MWAIT 0x01",
++              .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
++              .exit_latency = 2,
++              .target_residency = 4,
++              .enter = &intel_idle,
++              .enter_s2idle = intel_idle_s2idle, },
++      {
++              .name = "C6",
++              .desc = "MWAIT 0x20",
++              .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .exit_latency = 170,
++              .target_residency = 500,
++              .enter = &intel_idle,
++              .enter_s2idle = intel_idle_s2idle, },
++      {
++              .name = "C8",
++              .desc = "MWAIT 0x40",
++              .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .exit_latency = 200,
++              .target_residency = 600,
++              .enter = &intel_idle,
++              .enter_s2idle = intel_idle_s2idle, },
++      {
++              .name = "C10",
++              .desc = "MWAIT 0x60",
++              .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
++              .exit_latency = 230,
++              .target_residency = 700,
++              .enter = &intel_idle,
++              .enter_s2idle = intel_idle_s2idle, },
++      {
++              .enter = NULL }
++};
++
+ /*
+  * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
+  * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
+@@ -1194,6 +1294,14 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
+       .use_acpi = true,
+ };
++static const struct idle_cpu idle_cpu_adl __initconst = {
++      .state_table = adl_cstates,
++};
++
++static const struct idle_cpu idle_cpu_adl_l __initconst = {
++      .state_table = adl_l_cstates,
++};
++
+ static const struct idle_cpu idle_cpu_spr __initconst = {
+       .state_table = spr_cstates,
+       .disable_promotion_to_c1e = true,
+@@ -1262,6 +1370,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
+       X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &idle_cpu_skx),
+       X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &idle_cpu_icx),
+       X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &idle_cpu_icx),
++      X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &idle_cpu_adl),
++      X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &idle_cpu_adl_l),
+       X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &idle_cpu_spr),
+       X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &idle_cpu_knl),
+       X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &idle_cpu_knl),
+@@ -1620,6 +1730,25 @@ static void __init skx_idle_state_table_update(void)
+       }
+ }
++/**
++ * adl_idle_state_table_update - Adjust AlderLake idle states table.
++ */
++static void __init adl_idle_state_table_update(void)
++{
++      /* Check if user prefers C1 over C1E. */
++      if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
++              cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
++              cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
++
++              /* Disable C1E by clearing the "C1E promotion" bit. */
++              c1e_promotion = C1E_PROMOTION_DISABLE;
++              return;
++      }
++
++      /* Make sure C1E is enabled by default */
++      c1e_promotion = C1E_PROMOTION_ENABLE;
++}
++
+ /**
+  * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
+  */
+@@ -1689,6 +1818,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
+       case INTEL_FAM6_SAPPHIRERAPIDS_X:
+               spr_idle_state_table_update();
+               break;
++      case INTEL_FAM6_ALDERLAKE:
++      case INTEL_FAM6_ALDERLAKE_L:
++              adl_idle_state_table_update();
++              break;
+       }
+       for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
+-- 
+2.35.1
+
diff --git a/queue-5.18/intel_idle-make-spr-c1-and-c1e-be-independent.patch b/queue-5.18/intel_idle-make-spr-c1-and-c1e-be-independent.patch
new file mode 100644 (file)
index 0000000..e7d78bf
--- /dev/null
@@ -0,0 +1,90 @@
+From 72d2cfae1dd2c795060d056fcddb70dff941d0da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Jul 2022 09:26:55 +0300
+Subject: intel_idle: make SPR C1 and C1E be independent
+
+From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+
+[ Upstream commit 1548fac47a114b42063def551eb152a536ed9697 ]
+
+This patch partially reverts the changes made by the following commit:
+
+da0e58c038e6 intel_idle: add 'preferred_cstates' module argument
+
+As that commit describes, on early Sapphire Rapids Xeon platforms the C1 and
+C1E states were mutually exclusive, so that users could only have either C1 and
+C6, or C1E and C6.
+
+However, Intel firmware engineers managed to remove this limitation and make C1
+and C1E to be completely independent, just like on previous Xeon platforms.
+
+Therefore, this patch:
+ * Removes commentary describing the old, and now non-existing SPR C1E
+   limitation.
+ * Marks SPR C1E as available by default.
+ * Removes the 'preferred_cstates' parameter handling for SPR. Both C1 and
+   C1E will be available regardless of 'preferred_cstates' value.
+
+We expect that all SPR systems are shipping with new firmware, which includes
+the C1/C1E improvement.
+
+Cc: v5.18+ <stable@vger.kernel.org> # v5.18+
+Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/idle/intel_idle.c | 24 +-----------------------
+ 1 file changed, 1 insertion(+), 23 deletions(-)
+
+diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
+index 907700d1e78e..9515a3146dc9 100644
+--- a/drivers/idle/intel_idle.c
++++ b/drivers/idle/intel_idle.c
+@@ -911,16 +911,6 @@ static struct cpuidle_state adl_l_cstates[] __initdata = {
+               .enter = NULL }
+ };
+-/*
+- * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
+- * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
+- * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
+- * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
+- * both C1 and C1E requests end up with C1, so there is effectively no C1E.
+- *
+- * By default we enable C1 and disable C1E by marking it with
+- * 'CPUIDLE_FLAG_UNUSABLE'.
+- */
+ static struct cpuidle_state spr_cstates[] __initdata = {
+       {
+               .name = "C1",
+@@ -933,8 +923,7 @@ static struct cpuidle_state spr_cstates[] __initdata = {
+       {
+               .name = "C1E",
+               .desc = "MWAIT 0x01",
+-              .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
+-                                         CPUIDLE_FLAG_UNUSABLE,
++              .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+               .exit_latency = 2,
+               .target_residency = 4,
+               .enter = &intel_idle,
+@@ -1756,17 +1745,6 @@ static void __init spr_idle_state_table_update(void)
+ {
+       unsigned long long msr;
+-      /* Check if user prefers C1E over C1. */
+-      if ((preferred_states_mask & BIT(2)) &&
+-          !(preferred_states_mask & BIT(1))) {
+-              /* Disable C1 and enable C1E. */
+-              spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
+-              spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+-
+-              /* Enable C1E using the "C1E promotion" bit. */
+-              c1e_promotion = C1E_PROMOTION_ENABLE;
+-      }
+-
+       /*
+        * By default, the C6 state assumes the worst-case scenario of package
+        * C6. However, if PC6 is disabled, we update the numbers to match
+-- 
+2.35.1
+
diff --git a/queue-5.18/intel_th-pci-add-meteor-lake-p-support.patch b/queue-5.18/intel_th-pci-add-meteor-lake-p-support.patch
new file mode 100644 (file)
index 0000000..d5d460c
--- /dev/null
@@ -0,0 +1,40 @@
+From ca9b34b3b4760f6429dbc8d09097e704d75a7bed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Jul 2022 11:26:35 +0300
+Subject: intel_th: pci: Add Meteor Lake-P support
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+[ Upstream commit 802a9a0b1d91274ef10d9fe429b4cc1e8c200aef ]
+
+Add support for the Trace Hub in Meteor Lake-P.
+
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: stable <stable@kernel.org>
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Link: https://lore.kernel.org/r/20220705082637.59979-5-alexander.shishkin@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/intel_th/pci.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
+index fcd0aca75007..41a31c7f505f 100644
+--- a/drivers/hwtracing/intel_th/pci.c
++++ b/drivers/hwtracing/intel_th/pci.c
+@@ -284,6 +284,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x54a6),
+               .driver_data = (kernel_ulong_t)&intel_th_2x,
+       },
++      {
++              /* Meteor Lake-P */
++              PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7e24),
++              .driver_data = (kernel_ulong_t)&intel_th_2x,
++      },
+       {
+               /* Alder Lake CPU */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f),
+-- 
+2.35.1
+
diff --git a/queue-5.18/intel_th-pci-add-raptor-lake-s-cpu-support.patch b/queue-5.18/intel_th-pci-add-raptor-lake-s-cpu-support.patch
new file mode 100644 (file)
index 0000000..f12a531
--- /dev/null
@@ -0,0 +1,40 @@
+From 72f0e594ee06a5ab7c5ba3b37bb0774444d0b18e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Jul 2022 11:26:37 +0300
+Subject: intel_th: pci: Add Raptor Lake-S CPU support
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+[ Upstream commit ff46a601afc5a66a81c3945b83d0a2caeb88e8bc ]
+
+Add support for the Trace Hub in Raptor Lake-S CPU.
+
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: stable <stable@kernel.org>
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Link: https://lore.kernel.org/r/20220705082637.59979-7-alexander.shishkin@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/intel_th/pci.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
+index 5b6da26f1b63..147d338c191e 100644
+--- a/drivers/hwtracing/intel_th/pci.c
++++ b/drivers/hwtracing/intel_th/pci.c
+@@ -294,6 +294,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7a26),
+               .driver_data = (kernel_ulong_t)&intel_th_2x,
+       },
++      {
++              /* Raptor Lake-S CPU */
++              PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa76f),
++              .driver_data = (kernel_ulong_t)&intel_th_2x,
++      },
+       {
+               /* Alder Lake CPU */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f),
+-- 
+2.35.1
+
diff --git a/queue-5.18/intel_th-pci-add-raptor-lake-s-pch-support.patch b/queue-5.18/intel_th-pci-add-raptor-lake-s-pch-support.patch
new file mode 100644 (file)
index 0000000..ef73b40
--- /dev/null
@@ -0,0 +1,40 @@
+From 2ea65adef5be4557edb506fdaa6b3ea53411d77f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Jul 2022 11:26:36 +0300
+Subject: intel_th: pci: Add Raptor Lake-S PCH support
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+[ Upstream commit 23e2de5826e2fc4dd43e08bab3a2ea1a5338b063 ]
+
+Add support for the Trace Hub in Raptor Lake-S PCH.
+
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: stable <stable@kernel.org>
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Link: https://lore.kernel.org/r/20220705082637.59979-6-alexander.shishkin@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/intel_th/pci.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
+index 41a31c7f505f..5b6da26f1b63 100644
+--- a/drivers/hwtracing/intel_th/pci.c
++++ b/drivers/hwtracing/intel_th/pci.c
+@@ -289,6 +289,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7e24),
+               .driver_data = (kernel_ulong_t)&intel_th_2x,
+       },
++      {
++              /* Raptor Lake-S */
++              PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7a26),
++              .driver_data = (kernel_ulong_t)&intel_th_2x,
++      },
+       {
+               /* Alder Lake CPU */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f),
+-- 
+2.35.1
+
diff --git a/queue-5.18/iommu-vt-d-avoid-invalid-memory-access-via-node_onli.patch b/queue-5.18/iommu-vt-d-avoid-invalid-memory-access-via-node_onli.patch
new file mode 100644 (file)
index 0000000..5ce8b7e
--- /dev/null
@@ -0,0 +1,66 @@
+From 48a418e1d496eadf562380a9947ee715f429e6ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 17:38:36 +0200
+Subject: iommu/vt-d: avoid invalid memory access via node_online(NUMA_NO_NODE)
+
+From: Alexander Lobakin <alexandr.lobakin@intel.com>
+
+[ Upstream commit b0b0b77ea611e3088e9523e60860f4f41b62b235 ]
+
+KASAN reports:
+
+[ 4.668325][ T0] BUG: KASAN: wild-memory-access in dmar_parse_one_rhsa (arch/x86/include/asm/bitops.h:214 arch/x86/include/asm/bitops.h:226 include/asm-generic/bitops/instrumented-non-atomic.h:142 include/linux/nodemask.h:415 drivers/iommu/intel/dmar.c:497)
+[    4.676149][    T0] Read of size 8 at addr 1fffffff85115558 by task swapper/0/0
+[    4.683454][    T0]
+[    4.685638][    T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.19.0-rc3-00004-g0e862838f290 #1
+[    4.694331][    T0] Hardware name: Supermicro SYS-5018D-FN4T/X10SDV-8C-TLN4F, BIOS 1.1 03/02/2016
+[    4.703196][    T0] Call Trace:
+[    4.706334][    T0]  <TASK>
+[ 4.709133][ T0] ? dmar_parse_one_rhsa (arch/x86/include/asm/bitops.h:214 arch/x86/include/asm/bitops.h:226 include/asm-generic/bitops/instrumented-non-atomic.h:142 include/linux/nodemask.h:415 drivers/iommu/intel/dmar.c:497)
+
+after converting the type of the first argument (@nr, bit number)
+of arch_test_bit() from `long` to `unsigned long`[0].
+
+Under certain conditions (for example, when ACPI NUMA is disabled
+via command line), pxm_to_node() can return %NUMA_NO_NODE (-1).
+It is valid 'magic' number of NUMA node, but not valid bit number
+to use in bitops.
+node_online() eventually descends to test_bit() without checking
+for the input, assuming it's on caller side (which might be good
+for perf-critical tasks). There, -1 becomes %ULONG_MAX which leads
+to an insane array index when calculating bit position in memory.
+
+For now, add an explicit check for @node being not %NUMA_NO_NODE
+before calling test_bit(). The actual logics didn't change here
+at all.
+
+[0] https://github.com/norov/linux/commit/0e862838f290147ea9c16db852d8d494b552d38d
+
+Fixes: ee34b32d8c29 ("dmar: support for parsing Remapping Hardware Static Affinity structure")
+Cc: stable@vger.kernel.org # 2.6.33+
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Yury Norov <yury.norov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/dmar.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
+index 497c5bd95caf..2a10c9b54064 100644
+--- a/drivers/iommu/intel/dmar.c
++++ b/drivers/iommu/intel/dmar.c
+@@ -495,7 +495,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
+               if (drhd->reg_base_addr == rhsa->base_address) {
+                       int node = pxm_to_node(rhsa->proximity_domain);
+-                      if (!node_online(node))
++                      if (node != NUMA_NO_NODE && !node_online(node))
+                               node = NUMA_NO_NODE;
+                       drhd->iommu->node = node;
+                       return 0;
+-- 
+2.35.1
+
diff --git a/queue-5.18/kexec-clean-up-arch_kexec_kernel_verify_sig.patch b/queue-5.18/kexec-clean-up-arch_kexec_kernel_verify_sig.patch
new file mode 100644 (file)
index 0000000..ba802ce
--- /dev/null
@@ -0,0 +1,107 @@
+From 0d01ede0c3b718afbf2b4bdb7b91840f25cc0d0e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 21:40:24 +0800
+Subject: kexec: clean up arch_kexec_kernel_verify_sig
+
+From: Coiby Xu <coxu@redhat.com>
+
+[ Upstream commit 689a71493bd2f31c024f8c0395f85a1fd4b2138e ]
+
+Before commit 105e10e2cf1c ("kexec_file: drop weak attribute from
+functions"), there was already no arch-specific implementation
+of arch_kexec_kernel_verify_sig. With weak attribute dropped by that
+commit, arch_kexec_kernel_verify_sig is completely useless. So clean it
+up.
+
+Note later patches are dependent on this patch so it should be backported
+to the stable tree as well.
+
+Cc: stable@vger.kernel.org
+Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
+Reviewed-by: Michal Suchanek <msuchanek@suse.de>
+Acked-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Coiby Xu <coxu@redhat.com>
+[zohar@linux.ibm.com: reworded patch description "Note"]
+Link: https://lore.kernel.org/linux-integrity/20220714134027.394370-1-coxu@redhat.com/
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/kexec.h |  5 -----
+ kernel/kexec_file.c   | 33 +++++++++++++--------------------
+ 2 files changed, 13 insertions(+), 25 deletions(-)
+
+diff --git a/include/linux/kexec.h b/include/linux/kexec.h
+index 87c1795297b0..f3e7680befcc 100644
+--- a/include/linux/kexec.h
++++ b/include/linux/kexec.h
+@@ -212,11 +212,6 @@ static inline void *arch_kexec_kernel_image_load(struct kimage *image)
+ }
+ #endif
+-#ifdef CONFIG_KEXEC_SIG
+-int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+-                               unsigned long buf_len);
+-#endif
+-
+ extern int kexec_add_buffer(struct kexec_buf *kbuf);
+ int kexec_locate_mem_hole(struct kexec_buf *kbuf);
+diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
+index 925953dfef05..ad005cd184a4 100644
+--- a/kernel/kexec_file.c
++++ b/kernel/kexec_file.c
+@@ -81,24 +81,6 @@ int kexec_image_post_load_cleanup_default(struct kimage *image)
+       return image->fops->cleanup(image->image_loader_data);
+ }
+-#ifdef CONFIG_KEXEC_SIG
+-static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
+-                                        unsigned long buf_len)
+-{
+-      if (!image->fops || !image->fops->verify_sig) {
+-              pr_debug("kernel loader does not support signature verification.\n");
+-              return -EKEYREJECTED;
+-      }
+-
+-      return image->fops->verify_sig(buf, buf_len);
+-}
+-
+-int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len)
+-{
+-      return kexec_image_verify_sig_default(image, buf, buf_len);
+-}
+-#endif
+-
+ /*
+  * Free up memory used by kernel, initrd, and command line. This is temporary
+  * memory allocation which is not needed any more after these buffers have
+@@ -141,13 +123,24 @@ void kimage_file_post_load_cleanup(struct kimage *image)
+ }
+ #ifdef CONFIG_KEXEC_SIG
++static int kexec_image_verify_sig(struct kimage *image, void *buf,
++                                unsigned long buf_len)
++{
++      if (!image->fops || !image->fops->verify_sig) {
++              pr_debug("kernel loader does not support signature verification.\n");
++              return -EKEYREJECTED;
++      }
++
++      return image->fops->verify_sig(buf, buf_len);
++}
++
+ static int
+ kimage_validate_signature(struct kimage *image)
+ {
+       int ret;
+-      ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
+-                                         image->kernel_buf_len);
++      ret = kexec_image_verify_sig(image, image->kernel_buf,
++                                   image->kernel_buf_len);
+       if (ret) {
+               if (sig_enforce) {
+-- 
+2.35.1
+
diff --git a/queue-5.18/kexec-keys-s390-make-use-of-built-in-and-secondary-k.patch b/queue-5.18/kexec-keys-s390-make-use-of-built-in-and-secondary-k.patch
new file mode 100644 (file)
index 0000000..0648467
--- /dev/null
@@ -0,0 +1,72 @@
+From 2f10017ddc47f38b293b183149fca92892bcfec0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 21:40:27 +0800
+Subject: kexec, KEYS, s390: Make use of built-in and secondary keyring for
+ signature verification
+
+From: Michal Suchanek <msuchanek@suse.de>
+
+[ Upstream commit 0828c4a39be57768b8788e8cbd0d84683ea757e5 ]
+
+commit e23a8020ce4e ("s390/kexec_file: Signature verification prototype")
+adds support for KEXEC_SIG verification with keys from platform keyring
+but the built-in keys and secondary keyring are not used.
+
+Add support for the built-in keys and secondary keyring as x86 does.
+
+Fixes: e23a8020ce4e ("s390/kexec_file: Signature verification prototype")
+Cc: stable@vger.kernel.org
+Cc: Philipp Rudo <prudo@linux.ibm.com>
+Cc: kexec@lists.infradead.org
+Cc: keyrings@vger.kernel.org
+Cc: linux-security-module@vger.kernel.org
+Signed-off-by: Michal Suchanek <msuchanek@suse.de>
+Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com>
+Acked-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Coiby Xu <coxu@redhat.com>
+Acked-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/machine_kexec_file.c | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
+index 8f43575a4dd3..fc6d5f58debe 100644
+--- a/arch/s390/kernel/machine_kexec_file.c
++++ b/arch/s390/kernel/machine_kexec_file.c
+@@ -31,6 +31,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+       const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
+       struct module_signature *ms;
+       unsigned long sig_len;
++      int ret;
+       /* Skip signature verification when not secure IPLed. */
+       if (!ipl_secure_flag)
+@@ -65,11 +66,18 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+               return -EBADMSG;
+       }
+-      return verify_pkcs7_signature(kernel, kernel_len,
+-                                    kernel + kernel_len, sig_len,
+-                                    VERIFY_USE_PLATFORM_KEYRING,
+-                                    VERIFYING_MODULE_SIGNATURE,
+-                                    NULL, NULL);
++      ret = verify_pkcs7_signature(kernel, kernel_len,
++                                   kernel + kernel_len, sig_len,
++                                   VERIFY_USE_SECONDARY_KEYRING,
++                                   VERIFYING_MODULE_SIGNATURE,
++                                   NULL, NULL);
++      if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
++              ret = verify_pkcs7_signature(kernel, kernel_len,
++                                           kernel + kernel_len, sig_len,
++                                           VERIFY_USE_PLATFORM_KEYRING,
++                                           VERIFYING_MODULE_SIGNATURE,
++                                           NULL, NULL);
++      return ret;
+ }
+ #endif /* CONFIG_KEXEC_SIG */
+-- 
+2.35.1
+
diff --git a/queue-5.18/kexec_file-drop-weak-attribute-from-functions.patch b/queue-5.18/kexec_file-drop-weak-attribute-from-functions.patch
new file mode 100644 (file)
index 0000000..5c04724
--- /dev/null
@@ -0,0 +1,261 @@
+From a0103a12b495a7c1698fb02cfa8077d5018aedce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Jul 2022 13:04:04 +0530
+Subject: kexec_file: drop weak attribute from functions
+
+From: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+
+[ Upstream commit 65d9a9a60fd71be964effb2e94747a6acb6e7015 ]
+
+As requested
+(http://lkml.kernel.org/r/87ee0q7b92.fsf@email.froward.int.ebiederm.org),
+this series converts weak functions in kexec to use the #ifdef approach.
+
+Quoting the 3e35142ef99fe ("kexec_file: drop weak attribute from
+arch_kexec_apply_relocations[_add]") changelog:
+
+: Since commit d1bcae833b32f1 ("ELF: Don't generate unused section symbols")
+: [1], binutils (v2.36+) started dropping section symbols that it thought
+: were unused.  This isn't an issue in general, but with kexec_file.c, gcc
+: is placing kexec_arch_apply_relocations[_add] into a separate
+: .text.unlikely section and the section symbol ".text.unlikely" is being
+: dropped.  Due to this, recordmcount is unable to find a non-weak symbol in
+: .text.unlikely to generate a relocation record against.
+
+This patch (of 2);
+
+Drop __weak attribute from functions in kexec_file.c:
+- arch_kexec_kernel_image_probe()
+- arch_kimage_file_post_load_cleanup()
+- arch_kexec_kernel_image_load()
+- arch_kexec_locate_mem_hole()
+- arch_kexec_kernel_verify_sig()
+
+arch_kexec_kernel_image_load() calls into kexec_image_load_default(), so
+drop the static attribute for the latter.
+
+arch_kexec_kernel_verify_sig() is not overridden by any architecture, so
+drop the __weak attribute.
+
+Link: https://lkml.kernel.org/r/cover.1656659357.git.naveen.n.rao@linux.vnet.ibm.com
+Link: https://lkml.kernel.org/r/2cd7ca1fe4d6bb6ca38e3283c717878388ed6788.1656659357.git.naveen.n.rao@linux.vnet.ibm.com
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Suggested-by: Eric Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/include/asm/kexec.h   |  4 ++-
+ arch/powerpc/include/asm/kexec.h |  9 +++++++
+ arch/s390/include/asm/kexec.h    |  3 +++
+ arch/x86/include/asm/kexec.h     |  6 +++++
+ include/linux/kexec.h            | 44 +++++++++++++++++++++++++++-----
+ kernel/kexec_file.c              | 35 ++-----------------------
+ 6 files changed, 61 insertions(+), 40 deletions(-)
+
+diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
+index 9839bfc163d7..78d272b26ebd 100644
+--- a/arch/arm64/include/asm/kexec.h
++++ b/arch/arm64/include/asm/kexec.h
+@@ -115,7 +115,9 @@ extern const struct kexec_file_ops kexec_image_ops;
+ struct kimage;
+-extern int arch_kimage_file_post_load_cleanup(struct kimage *image);
++int arch_kimage_file_post_load_cleanup(struct kimage *image);
++#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
++
+ extern int load_other_segments(struct kimage *image,
+               unsigned long kernel_load_addr, unsigned long kernel_size,
+               char *initrd, unsigned long initrd_len,
+diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
+index 2aefe14e1442..1e5e9b6ec78d 100644
+--- a/arch/powerpc/include/asm/kexec.h
++++ b/arch/powerpc/include/asm/kexec.h
+@@ -120,6 +120,15 @@ int setup_purgatory(struct kimage *image, const void *slave_code,
+ #ifdef CONFIG_PPC64
+ struct kexec_buf;
++int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len);
++#define arch_kexec_kernel_image_probe arch_kexec_kernel_image_probe
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image);
++#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
++
++int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf);
++#define arch_kexec_locate_mem_hole arch_kexec_locate_mem_hole
++
+ int load_crashdump_segments_ppc64(struct kimage *image,
+                                 struct kexec_buf *kbuf);
+ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
+index 63098df81c9f..d13bd221cd37 100644
+--- a/arch/s390/include/asm/kexec.h
++++ b/arch/s390/include/asm/kexec.h
+@@ -92,5 +92,8 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+                                    const Elf_Shdr *relsec,
+                                    const Elf_Shdr *symtab);
+ #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image);
++#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+ #endif
+ #endif /*_S390_KEXEC_H */
+diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
+index 6ad8d946cd3e..5ec359c1b50c 100644
+--- a/arch/x86/include/asm/kexec.h
++++ b/arch/x86/include/asm/kexec.h
+@@ -193,6 +193,12 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+                                    const Elf_Shdr *relsec,
+                                    const Elf_Shdr *symtab);
+ #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
++
++void *arch_kexec_kernel_image_load(struct kimage *image);
++#define arch_kexec_kernel_image_load arch_kexec_kernel_image_load
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image);
++#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+ #endif
+ #endif
+diff --git a/include/linux/kexec.h b/include/linux/kexec.h
+index 8d573baaab29..87c1795297b0 100644
+--- a/include/linux/kexec.h
++++ b/include/linux/kexec.h
+@@ -188,21 +188,53 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
+                                  void *buf, unsigned int size,
+                                  bool get_value);
+ void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
++void *kexec_image_load_default(struct kimage *image);
++
++#ifndef arch_kexec_kernel_image_probe
++static inline int
++arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len)
++{
++      return kexec_image_probe_default(image, buf, buf_len);
++}
++#endif
++
++#ifndef arch_kimage_file_post_load_cleanup
++static inline int arch_kimage_file_post_load_cleanup(struct kimage *image)
++{
++      return kexec_image_post_load_cleanup_default(image);
++}
++#endif
++
++#ifndef arch_kexec_kernel_image_load
++static inline void *arch_kexec_kernel_image_load(struct kimage *image)
++{
++      return kexec_image_load_default(image);
++}
++#endif
+-/* Architectures may override the below functions */
+-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+-                                unsigned long buf_len);
+-void *arch_kexec_kernel_image_load(struct kimage *image);
+-int arch_kimage_file_post_load_cleanup(struct kimage *image);
+ #ifdef CONFIG_KEXEC_SIG
+ int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+                                unsigned long buf_len);
+ #endif
+-int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf);
+ extern int kexec_add_buffer(struct kexec_buf *kbuf);
+ int kexec_locate_mem_hole(struct kexec_buf *kbuf);
++#ifndef arch_kexec_locate_mem_hole
++/**
++ * arch_kexec_locate_mem_hole - Find free memory to place the segments.
++ * @kbuf:                       Parameters for the memory search.
++ *
++ * On success, kbuf->mem will have the start address of the memory region found.
++ *
++ * Return: 0 on success, negative errno on error.
++ */
++static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
++{
++      return kexec_locate_mem_hole(kbuf);
++}
++#endif
++
+ /* Alignment required for elf header segment */
+ #define ELF_CORE_HEADER_ALIGN   4096
+diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
+index bb0fb63f563c..925953dfef05 100644
+--- a/kernel/kexec_file.c
++++ b/kernel/kexec_file.c
+@@ -62,14 +62,7 @@ int kexec_image_probe_default(struct kimage *image, void *buf,
+       return ret;
+ }
+-/* Architectures can provide this probe function */
+-int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+-                                       unsigned long buf_len)
+-{
+-      return kexec_image_probe_default(image, buf, buf_len);
+-}
+-
+-static void *kexec_image_load_default(struct kimage *image)
++void *kexec_image_load_default(struct kimage *image)
+ {
+       if (!image->fops || !image->fops->load)
+               return ERR_PTR(-ENOEXEC);
+@@ -80,11 +73,6 @@ static void *kexec_image_load_default(struct kimage *image)
+                                image->cmdline_buf_len);
+ }
+-void * __weak arch_kexec_kernel_image_load(struct kimage *image)
+-{
+-      return kexec_image_load_default(image);
+-}
+-
+ int kexec_image_post_load_cleanup_default(struct kimage *image)
+ {
+       if (!image->fops || !image->fops->cleanup)
+@@ -93,11 +81,6 @@ int kexec_image_post_load_cleanup_default(struct kimage *image)
+       return image->fops->cleanup(image->image_loader_data);
+ }
+-int __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
+-{
+-      return kexec_image_post_load_cleanup_default(image);
+-}
+-
+ #ifdef CONFIG_KEXEC_SIG
+ static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
+                                         unsigned long buf_len)
+@@ -110,8 +93,7 @@ static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
+       return image->fops->verify_sig(buf, buf_len);
+ }
+-int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+-                                      unsigned long buf_len)
++int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len)
+ {
+       return kexec_image_verify_sig_default(image, buf, buf_len);
+ }
+@@ -621,19 +603,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
+       return ret == 1 ? 0 : -EADDRNOTAVAIL;
+ }
+-/**
+- * arch_kexec_locate_mem_hole - Find free memory to place the segments.
+- * @kbuf:                       Parameters for the memory search.
+- *
+- * On success, kbuf->mem will have the start address of the memory region found.
+- *
+- * Return: 0 on success, negative errno on error.
+- */
+-int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
+-{
+-      return kexec_locate_mem_hole(kbuf);
+-}
+-
+ /**
+  * kexec_add_buffer - place a buffer in a kexec segment
+  * @kbuf:     Buffer contents and memory parameters.
+-- 
+2.35.1
+
diff --git a/queue-5.18/keys-asymmetric-enforce-sm2-signature-use-pkey-algo.patch b/queue-5.18/keys-asymmetric-enforce-sm2-signature-use-pkey-algo.patch
new file mode 100644 (file)
index 0000000..fc0adb2
--- /dev/null
@@ -0,0 +1,60 @@
+From 7537225de80672a829ff983a94b62557441b5bf3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jun 2022 11:37:20 +0800
+Subject: KEYS: asymmetric: enforce SM2 signature use pkey algo
+
+From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+
+[ Upstream commit 0815291a8fd66cdcf7db1445d4d99b0d16065829 ]
+
+The signature verification of SM2 needs to add the Za value and
+recalculate sig->digest, which requires the detection of the pkey_algo
+in public_key_verify_signature(). As Eric Biggers said, the pkey_algo
+field in sig is attacker-controlled and should be use pkey->pkey_algo
+instead of sig->pkey_algo, and secondly, if sig->pkey_algo is NULL, it
+will also cause signature verification failure.
+
+The software_key_determine_akcipher() already forces the algorithms
+are matched, so the SM3 algorithm is enforced in the SM2 signature,
+although this has been checked, we still avoid using any algorithm
+information in the signature as input.
+
+Fixes: 215525639631 ("X.509: support OSCCA SM2-with-SM3 certificate verification")
+Reported-by: Eric Biggers <ebiggers@google.com>
+Cc: stable@vger.kernel.org # v5.10+
+Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ crypto/asymmetric_keys/public_key.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
+index 7c9e6be35c30..2f8352e88860 100644
+--- a/crypto/asymmetric_keys/public_key.c
++++ b/crypto/asymmetric_keys/public_key.c
+@@ -304,6 +304,10 @@ static int cert_sig_digest_update(const struct public_key_signature *sig,
+       BUG_ON(!sig->data);
++      /* SM2 signatures always use the SM3 hash algorithm */
++      if (!sig->hash_algo || strcmp(sig->hash_algo, "sm3") != 0)
++              return -EINVAL;
++
+       ret = sm2_compute_z_digest(tfm_pkey, SM2_DEFAULT_USERID,
+                                       SM2_DEFAULT_USERID_LEN, dgst);
+       if (ret)
+@@ -414,8 +418,7 @@ int public_key_verify_signature(const struct public_key *pkey,
+       if (ret)
+               goto error_free_key;
+-      if (sig->pkey_algo && strcmp(sig->pkey_algo, "sm2") == 0 &&
+-          sig->data_size) {
++      if (strcmp(pkey->pkey_algo, "sm2") == 0 && sig->data_size) {
+               ret = cert_sig_digest_update(sig, tfm);
+               if (ret)
+                       goto error_free_key;
+-- 
+2.35.1
+
diff --git a/queue-5.18/ksmbd-add-smbd-max-io-size-parameter.patch b/queue-5.18/ksmbd-add-smbd-max-io-size-parameter.patch
new file mode 100644 (file)
index 0000000..0347958
--- /dev/null
@@ -0,0 +1,115 @@
+From e6b41f4cc7280b1aff9e2ef689f1e8a83a605357 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 May 2022 16:22:43 +0900
+Subject: ksmbd: add smbd max io size parameter
+
+From: Namjae Jeon <linkinjeon@kernel.org>
+
+[ Upstream commit 65bb45b97b578c8eed1ffa80caec84708df49729 ]
+
+Add 'smbd max io size' parameter to adjust smbd-direct max read/write
+size.
+
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Reviewed-by: Hyunchul Lee <hyc.lee@gmail.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/ksmbd_netlink.h  | 3 ++-
+ fs/ksmbd/transport_ipc.c  | 3 +++
+ fs/ksmbd/transport_rdma.c | 8 +++++++-
+ fs/ksmbd/transport_rdma.h | 6 ++++++
+ 4 files changed, 18 insertions(+), 2 deletions(-)
+
+diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
+index ebe6ca08467a..52aa0adeb951 100644
+--- a/fs/ksmbd/ksmbd_netlink.h
++++ b/fs/ksmbd/ksmbd_netlink.h
+@@ -104,7 +104,8 @@ struct ksmbd_startup_request {
+                                        */
+       __u32   sub_auth[3];            /* Subauth value for Security ID */
+       __u32   smb2_max_credits;       /* MAX credits */
+-      __u32   reserved[128];          /* Reserved room */
++      __u32   smbd_max_io_size;       /* smbd read write size */
++      __u32   reserved[127];          /* Reserved room */
+       __u32   ifc_list_sz;            /* interfaces list size */
+       __s8    ____payload[];
+ };
+diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
+index 3ad6881e0f7e..7cb0eeb07c80 100644
+--- a/fs/ksmbd/transport_ipc.c
++++ b/fs/ksmbd/transport_ipc.c
+@@ -26,6 +26,7 @@
+ #include "mgmt/ksmbd_ida.h"
+ #include "connection.h"
+ #include "transport_tcp.h"
++#include "transport_rdma.h"
+ #define IPC_WAIT_TIMEOUT      (2 * HZ)
+@@ -303,6 +304,8 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
+               init_smb2_max_trans_size(req->smb2_max_trans);
+       if (req->smb2_max_credits)
+               init_smb2_max_credits(req->smb2_max_credits);
++      if (req->smbd_max_io_size)
++              init_smbd_max_io_size(req->smbd_max_io_size);
+       ret = ksmbd_set_netbios_name(req->netbios_name);
+       ret |= ksmbd_set_server_string(req->server_string);
+diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
+index b44a5e584bac..afc66b9765e7 100644
+--- a/fs/ksmbd/transport_rdma.c
++++ b/fs/ksmbd/transport_rdma.c
+@@ -80,7 +80,7 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
+ /*  The maximum single-message size which can be received */
+ static int smb_direct_max_receive_size = 8192;
+-static int smb_direct_max_read_write_size = 8 * 1024 * 1024;
++static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
+ static LIST_HEAD(smb_direct_device_list);
+ static DEFINE_RWLOCK(smb_direct_device_lock);
+@@ -214,6 +214,12 @@ struct smb_direct_rdma_rw_msg {
+       struct scatterlist      sg_list[];
+ };
++void init_smbd_max_io_size(unsigned int sz)
++{
++      sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE);
++      smb_direct_max_read_write_size = sz;
++}
++
+ static inline int get_buf_page_count(void *buf, int size)
+ {
+       return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
+diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h
+index 5567d93a6f96..e7b4e6790fab 100644
+--- a/fs/ksmbd/transport_rdma.h
++++ b/fs/ksmbd/transport_rdma.h
+@@ -7,6 +7,10 @@
+ #ifndef __KSMBD_TRANSPORT_RDMA_H__
+ #define __KSMBD_TRANSPORT_RDMA_H__
++#define SMBD_DEFAULT_IOSIZE (8 * 1024 * 1024)
++#define SMBD_MIN_IOSIZE (512 * 1024)
++#define SMBD_MAX_IOSIZE (16 * 1024 * 1024)
++
+ /* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */
+ struct smb_direct_negotiate_req {
+       __le16 min_version;
+@@ -52,10 +56,12 @@ struct smb_direct_data_transfer {
+ int ksmbd_rdma_init(void);
+ void ksmbd_rdma_destroy(void);
+ bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
++void init_smbd_max_io_size(unsigned int sz);
+ #else
+ static inline int ksmbd_rdma_init(void) { return 0; }
+ static inline int ksmbd_rdma_destroy(void) { return 0; }
+ static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
++static inline void init_smbd_max_io_size(unsigned int sz) { }
+ #endif
+ #endif /* __KSMBD_TRANSPORT_RDMA_H__ */
+-- 
+2.35.1
+
diff --git a/queue-5.18/ksmbd-fix-wrong-smbd-max-read-write-size-check.patch b/queue-5.18/ksmbd-fix-wrong-smbd-max-read-write-size-check.patch
new file mode 100644 (file)
index 0000000..b6297e1
--- /dev/null
@@ -0,0 +1,172 @@
+From 1149a190b27509bf700591539f0cd164592d4ce0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 May 2022 16:23:28 +0900
+Subject: ksmbd: fix wrong smbd max read/write size check
+
+From: Namjae Jeon <linkinjeon@kernel.org>
+
+[ Upstream commit 7a84399e1ce3f5f2fbec3e7dd93459ba25badc2f ]
+
+smb-direct max read/write size can be different with smb2 max read/write
+size. So smb2_read() can return error by wrong max read/write size check.
+This patch use smb_direct_max_read_write_size for this check in
+smb-direct read/write().
+
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Reviewed-by: Hyunchul Lee <hyc.lee@gmail.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/smb2pdu.c        | 39 +++++++++++++++++++++++++--------------
+ fs/ksmbd/transport_rdma.c |  5 +++++
+ fs/ksmbd/transport_rdma.h |  2 ++
+ 3 files changed, 32 insertions(+), 14 deletions(-)
+
+diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
+index 8f86b8d6765f..6c8dd718b5db 100644
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -6194,6 +6194,8 @@ int smb2_read(struct ksmbd_work *work)
+       size_t length, mincount;
+       ssize_t nbytes = 0, remain_bytes = 0;
+       int err = 0;
++      bool is_rdma_channel = false;
++      unsigned int max_read_size = conn->vals->max_read_size;
+       WORK_BUFFERS(work, req, rsp);
+@@ -6205,6 +6207,11 @@ int smb2_read(struct ksmbd_work *work)
+       if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
+           req->Channel == SMB2_CHANNEL_RDMA_V1) {
++              is_rdma_channel = true;
++              max_read_size = get_smbd_max_read_write_size();
++      }
++
++      if (is_rdma_channel == true) {
+               unsigned int ch_offset = le16_to_cpu(req->ReadChannelInfoOffset);
+               if (ch_offset < offsetof(struct smb2_read_req, Buffer)) {
+@@ -6236,9 +6243,9 @@ int smb2_read(struct ksmbd_work *work)
+       length = le32_to_cpu(req->Length);
+       mincount = le32_to_cpu(req->MinimumCount);
+-      if (length > conn->vals->max_read_size) {
++      if (length > max_read_size) {
+               ksmbd_debug(SMB, "limiting read size to max size(%u)\n",
+-                          conn->vals->max_read_size);
++                          max_read_size);
+               err = -EINVAL;
+               goto out;
+       }
+@@ -6270,8 +6277,7 @@ int smb2_read(struct ksmbd_work *work)
+       ksmbd_debug(SMB, "nbytes %zu, offset %lld mincount %zu\n",
+                   nbytes, offset, mincount);
+-      if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
+-          req->Channel == SMB2_CHANNEL_RDMA_V1) {
++      if (is_rdma_channel == true) {
+               /* write data to the client using rdma channel */
+               remain_bytes = smb2_read_rdma_channel(work, req,
+                                                     work->aux_payload_buf,
+@@ -6432,8 +6438,9 @@ int smb2_write(struct ksmbd_work *work)
+       size_t length;
+       ssize_t nbytes;
+       char *data_buf;
+-      bool writethrough = false;
++      bool writethrough = false, is_rdma_channel = false;
+       int err = 0;
++      unsigned int max_write_size = work->conn->vals->max_write_size;
+       WORK_BUFFERS(work, req, rsp);
+@@ -6442,8 +6449,17 @@ int smb2_write(struct ksmbd_work *work)
+               return smb2_write_pipe(work);
+       }
++      offset = le64_to_cpu(req->Offset);
++      length = le32_to_cpu(req->Length);
++
+       if (req->Channel == SMB2_CHANNEL_RDMA_V1 ||
+           req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
++              is_rdma_channel = true;
++              max_write_size = get_smbd_max_read_write_size();
++              length = le32_to_cpu(req->RemainingBytes);
++      }
++
++      if (is_rdma_channel == true) {
+               unsigned int ch_offset = le16_to_cpu(req->WriteChannelInfoOffset);
+               if (req->Length != 0 || req->DataOffset != 0 ||
+@@ -6478,12 +6494,9 @@ int smb2_write(struct ksmbd_work *work)
+               goto out;
+       }
+-      offset = le64_to_cpu(req->Offset);
+-      length = le32_to_cpu(req->Length);
+-
+-      if (length > work->conn->vals->max_write_size) {
++      if (length > max_write_size) {
+               ksmbd_debug(SMB, "limiting write size to max size(%u)\n",
+-                          work->conn->vals->max_write_size);
++                          max_write_size);
+               err = -EINVAL;
+               goto out;
+       }
+@@ -6491,8 +6504,7 @@ int smb2_write(struct ksmbd_work *work)
+       if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
+               writethrough = true;
+-      if (req->Channel != SMB2_CHANNEL_RDMA_V1 &&
+-          req->Channel != SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
++      if (is_rdma_channel == false) {
+               if ((u64)le16_to_cpu(req->DataOffset) + length >
+                   get_rfc1002_len(work->request_buf)) {
+                       pr_err("invalid write data offset %u, smb_len %u\n",
+@@ -6518,8 +6530,7 @@ int smb2_write(struct ksmbd_work *work)
+               /* read data from the client using rdma channel, and
+                * write the data.
+                */
+-              nbytes = smb2_write_rdma_channel(work, req, fp, offset,
+-                                               le32_to_cpu(req->RemainingBytes),
++              nbytes = smb2_write_rdma_channel(work, req, fp, offset, length,
+                                                writethrough);
+               if (nbytes < 0) {
+                       err = (int)nbytes;
+diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
+index afc66b9765e7..c6af8d89b7f7 100644
+--- a/fs/ksmbd/transport_rdma.c
++++ b/fs/ksmbd/transport_rdma.c
+@@ -220,6 +220,11 @@ void init_smbd_max_io_size(unsigned int sz)
+       smb_direct_max_read_write_size = sz;
+ }
++unsigned int get_smbd_max_read_write_size(void)
++{
++      return smb_direct_max_read_write_size;
++}
++
+ static inline int get_buf_page_count(void *buf, int size)
+ {
+       return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
+diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h
+index e7b4e6790fab..77aee4e5c9dc 100644
+--- a/fs/ksmbd/transport_rdma.h
++++ b/fs/ksmbd/transport_rdma.h
+@@ -57,11 +57,13 @@ int ksmbd_rdma_init(void);
+ void ksmbd_rdma_destroy(void);
+ bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
+ void init_smbd_max_io_size(unsigned int sz);
++unsigned int get_smbd_max_read_write_size(void);
+ #else
+ static inline int ksmbd_rdma_init(void) { return 0; }
+ static inline int ksmbd_rdma_destroy(void) { return 0; }
+ static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
+ static inline void init_smbd_max_io_size(unsigned int sz) { }
++static inline unsigned int get_smbd_max_read_write_size(void) { return 0; }
+ #endif
+ #endif /* __KSMBD_TRANSPORT_RDMA_H__ */
+-- 
+2.35.1
+
diff --git a/queue-5.18/ksmbd-prevent-out-of-bound-read-for-smb2_write.patch b/queue-5.18/ksmbd-prevent-out-of-bound-read-for-smb2_write.patch
new file mode 100644 (file)
index 0000000..e872b66
--- /dev/null
@@ -0,0 +1,128 @@
+From 79288fb9f5ec9fb25c6f827a60c7233dece972c4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 23:41:51 +0900
+Subject: ksmbd: prevent out of bound read for SMB2_WRITE
+
+From: Hyunchul Lee <hyc.lee@gmail.com>
+
+[ Upstream commit ac60778b87e45576d7bfdbd6f53df902654e6f09 ]
+
+OOB read memory can be written to a file,
+if DataOffset is 0 and Length is too large
+in SMB2_WRITE request of compound request.
+
+To prevent this, when checking the length of
+the data area of SMB2_WRITE in smb2_get_data_area_len(),
+let the minimum of DataOffset be the size of
+SMB2 header + the size of SMB2_WRITE header.
+
+This bug can lead an oops looking something like:
+
+[  798.008715] BUG: KASAN: slab-out-of-bounds in copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008724] Read of size 252 at addr ffff88800f863e90 by task kworker/0:2/2859
+...
+[  798.008754] Call Trace:
+[  798.008756]  <TASK>
+[  798.008759]  dump_stack_lvl+0x49/0x5f
+[  798.008764]  print_report.cold+0x5e/0x5cf
+[  798.008768]  ? __filemap_get_folio+0x285/0x6d0
+[  798.008774]  ? copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008777]  kasan_report+0xaa/0x120
+[  798.008781]  ? copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008784]  kasan_check_range+0x100/0x1e0
+[  798.008788]  memcpy+0x24/0x60
+[  798.008792]  copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008795]  ? pagecache_get_page+0x53/0x160
+[  798.008799]  ? iov_iter_get_pages_alloc+0x1590/0x1590
+[  798.008803]  ? ext4_write_begin+0xfc0/0xfc0
+[  798.008807]  ? current_time+0x72/0x210
+[  798.008811]  generic_perform_write+0x2c8/0x530
+[  798.008816]  ? filemap_fdatawrite_wbc+0x180/0x180
+[  798.008820]  ? down_write+0xb4/0x120
+[  798.008824]  ? down_write_killable+0x130/0x130
+[  798.008829]  ext4_buffered_write_iter+0x137/0x2c0
+[  798.008833]  ext4_file_write_iter+0x40b/0x1490
+[  798.008837]  ? __fsnotify_parent+0x275/0xb20
+[  798.008842]  ? __fsnotify_update_child_dentry_flags+0x2c0/0x2c0
+[  798.008846]  ? ext4_buffered_write_iter+0x2c0/0x2c0
+[  798.008851]  __kernel_write+0x3a1/0xa70
+[  798.008855]  ? __x64_sys_preadv2+0x160/0x160
+[  798.008860]  ? security_file_permission+0x4a/0xa0
+[  798.008865]  kernel_write+0xbb/0x360
+[  798.008869]  ksmbd_vfs_write+0x27e/0xb90 [ksmbd]
+[  798.008881]  ? ksmbd_vfs_read+0x830/0x830 [ksmbd]
+[  798.008892]  ? _raw_read_unlock+0x2a/0x50
+[  798.008896]  smb2_write+0xb45/0x14e0 [ksmbd]
+[  798.008909]  ? __kasan_check_write+0x14/0x20
+[  798.008912]  ? _raw_spin_lock_bh+0xd0/0xe0
+[  798.008916]  ? smb2_read+0x15e0/0x15e0 [ksmbd]
+[  798.008927]  ? memcpy+0x4e/0x60
+[  798.008931]  ? _raw_spin_unlock+0x19/0x30
+[  798.008934]  ? ksmbd_smb2_check_message+0x16af/0x2350 [ksmbd]
+[  798.008946]  ? _raw_spin_lock_bh+0xe0/0xe0
+[  798.008950]  handle_ksmbd_work+0x30e/0x1020 [ksmbd]
+[  798.008962]  process_one_work+0x778/0x11c0
+[  798.008966]  ? _raw_spin_lock_irq+0x8e/0xe0
+[  798.008970]  worker_thread+0x544/0x1180
+[  798.008973]  ? __cpuidle_text_end+0x4/0x4
+[  798.008977]  kthread+0x282/0x320
+[  798.008982]  ? process_one_work+0x11c0/0x11c0
+[  798.008985]  ? kthread_complete_and_exit+0x30/0x30
+[  798.008989]  ret_from_fork+0x1f/0x30
+[  798.008995]  </TASK>
+
+Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
+Cc: stable@vger.kernel.org
+Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17817
+Signed-off-by: Hyunchul Lee <hyc.lee@gmail.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/smb2misc.c | 7 +++++--
+ fs/ksmbd/smb2pdu.c  | 8 +++-----
+ 2 files changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
+index 03bcd7ce0c75..6e25ace36568 100644
+--- a/fs/ksmbd/smb2misc.c
++++ b/fs/ksmbd/smb2misc.c
+@@ -131,8 +131,11 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
+               *len = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoLength);
+               break;
+       case SMB2_WRITE:
+-              if (((struct smb2_write_req *)hdr)->DataOffset) {
+-                      *off = le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset);
++              if (((struct smb2_write_req *)hdr)->DataOffset ||
++                  ((struct smb2_write_req *)hdr)->Length) {
++                      *off = max_t(unsigned int,
++                                   le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset),
++                                   offsetof(struct smb2_write_req, Buffer));
+                       *len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length);
+                       break;
+               }
+diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
+index 6c8dd718b5db..85a9ed7156ea 100644
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -6505,14 +6505,12 @@ int smb2_write(struct ksmbd_work *work)
+               writethrough = true;
+       if (is_rdma_channel == false) {
+-              if ((u64)le16_to_cpu(req->DataOffset) + length >
+-                  get_rfc1002_len(work->request_buf)) {
+-                      pr_err("invalid write data offset %u, smb_len %u\n",
+-                             le16_to_cpu(req->DataOffset),
+-                             get_rfc1002_len(work->request_buf));
++              if (le16_to_cpu(req->DataOffset) <
++                  offsetof(struct smb2_write_req, Buffer)) {
+                       err = -EINVAL;
+                       goto out;
+               }
++
+               data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
+                                   le16_to_cpu(req->DataOffset));
+-- 
+2.35.1
+
diff --git a/queue-5.18/ksmbd-smbd-change-prototypes-of-rdma-read-write-rela.patch b/queue-5.18/ksmbd-smbd-change-prototypes-of-rdma-read-write-rela.patch
new file mode 100644 (file)
index 0000000..5871335
--- /dev/null
@@ -0,0 +1,258 @@
+From a2389c2ae23605f7b53ab49541ee17285fdf563e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Apr 2022 08:30:25 +0900
+Subject: ksmbd: smbd: change prototypes of RDMA read/write related functions
+
+From: Hyunchul Lee <hyc.lee@gmail.com>
+
+[ Upstream commit 1807abcf8778bcbbf584fe54da9ccbe9029c49bb ]
+
+Change the prototypes of RDMA read/write
+operations to accept a pointer and length
+of buffer descriptors.
+
+Signed-off-by: Hyunchul Lee <hyc.lee@gmail.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/connection.c     | 20 ++++++++++----------
+ fs/ksmbd/connection.h     | 27 ++++++++++++++++-----------
+ fs/ksmbd/smb2pdu.c        | 23 ++++++++---------------
+ fs/ksmbd/transport_rdma.c | 30 +++++++++++++++++-------------
+ 4 files changed, 51 insertions(+), 49 deletions(-)
+
+diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
+index bc6050b67256..e8f476c5f189 100644
+--- a/fs/ksmbd/connection.c
++++ b/fs/ksmbd/connection.c
+@@ -205,31 +205,31 @@ int ksmbd_conn_write(struct ksmbd_work *work)
+       return 0;
+ }
+-int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf,
+-                       unsigned int buflen, u32 remote_key, u64 remote_offset,
+-                       u32 remote_len)
++int ksmbd_conn_rdma_read(struct ksmbd_conn *conn,
++                       void *buf, unsigned int buflen,
++                       struct smb2_buffer_desc_v1 *desc,
++                       unsigned int desc_len)
+ {
+       int ret = -EINVAL;
+       if (conn->transport->ops->rdma_read)
+               ret = conn->transport->ops->rdma_read(conn->transport,
+                                                     buf, buflen,
+-                                                    remote_key, remote_offset,
+-                                                    remote_len);
++                                                    desc, desc_len);
+       return ret;
+ }
+-int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf,
+-                        unsigned int buflen, u32 remote_key,
+-                        u64 remote_offset, u32 remote_len)
++int ksmbd_conn_rdma_write(struct ksmbd_conn *conn,
++                        void *buf, unsigned int buflen,
++                        struct smb2_buffer_desc_v1 *desc,
++                        unsigned int desc_len)
+ {
+       int ret = -EINVAL;
+       if (conn->transport->ops->rdma_write)
+               ret = conn->transport->ops->rdma_write(conn->transport,
+                                                      buf, buflen,
+-                                                     remote_key, remote_offset,
+-                                                     remote_len);
++                                                     desc, desc_len);
+       return ret;
+ }
+diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
+index 7a59aacb5daa..98c1cbe45ec9 100644
+--- a/fs/ksmbd/connection.h
++++ b/fs/ksmbd/connection.h
+@@ -122,11 +122,14 @@ struct ksmbd_transport_ops {
+       int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov,
+                     int size, bool need_invalidate_rkey,
+                     unsigned int remote_key);
+-      int (*rdma_read)(struct ksmbd_transport *t, void *buf, unsigned int len,
+-                       u32 remote_key, u64 remote_offset, u32 remote_len);
+-      int (*rdma_write)(struct ksmbd_transport *t, void *buf,
+-                        unsigned int len, u32 remote_key, u64 remote_offset,
+-                        u32 remote_len);
++      int (*rdma_read)(struct ksmbd_transport *t,
++                       void *buf, unsigned int len,
++                       struct smb2_buffer_desc_v1 *desc,
++                       unsigned int desc_len);
++      int (*rdma_write)(struct ksmbd_transport *t,
++                        void *buf, unsigned int len,
++                        struct smb2_buffer_desc_v1 *desc,
++                        unsigned int desc_len);
+ };
+ struct ksmbd_transport {
+@@ -148,12 +151,14 @@ struct ksmbd_conn *ksmbd_conn_alloc(void);
+ void ksmbd_conn_free(struct ksmbd_conn *conn);
+ bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c);
+ int ksmbd_conn_write(struct ksmbd_work *work);
+-int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf,
+-                       unsigned int buflen, u32 remote_key, u64 remote_offset,
+-                       u32 remote_len);
+-int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf,
+-                        unsigned int buflen, u32 remote_key, u64 remote_offset,
+-                        u32 remote_len);
++int ksmbd_conn_rdma_read(struct ksmbd_conn *conn,
++                       void *buf, unsigned int buflen,
++                       struct smb2_buffer_desc_v1 *desc,
++                       unsigned int desc_len);
++int ksmbd_conn_rdma_write(struct ksmbd_conn *conn,
++                        void *buf, unsigned int buflen,
++                        struct smb2_buffer_desc_v1 *desc,
++                        unsigned int desc_len);
+ void ksmbd_conn_enqueue_request(struct ksmbd_work *work);
+ int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work);
+ void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops);
+diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
+index 5df87fe18905..8f86b8d6765f 100644
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -6132,7 +6132,6 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
+ static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
+                                       struct smb2_buffer_desc_v1 *desc,
+                                       __le32 Channel,
+-                                      __le16 ChannelInfoOffset,
+                                       __le16 ChannelInfoLength)
+ {
+       unsigned int i, ch_count;
+@@ -6158,7 +6157,8 @@ static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
+       work->need_invalidate_rkey =
+               (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
+-      work->remote_key = le32_to_cpu(desc->token);
++      if (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE)
++              work->remote_key = le32_to_cpu(desc->token);
+       return 0;
+ }
+@@ -6166,14 +6166,12 @@ static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
+                                     struct smb2_read_req *req, void *data_buf,
+                                     size_t length)
+ {
+-      struct smb2_buffer_desc_v1 *desc =
+-              (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
+       int err;
+       err = ksmbd_conn_rdma_write(work->conn, data_buf, length,
+-                                  le32_to_cpu(desc->token),
+-                                  le64_to_cpu(desc->offset),
+-                                  le32_to_cpu(desc->length));
++                                  (struct smb2_buffer_desc_v1 *)
++                                  ((char *)req + le16_to_cpu(req->ReadChannelInfoOffset)),
++                                  le16_to_cpu(req->ReadChannelInfoLength));
+       if (err)
+               return err;
+@@ -6217,7 +6215,6 @@ int smb2_read(struct ksmbd_work *work)
+                                                  (struct smb2_buffer_desc_v1 *)
+                                                  ((char *)req + ch_offset),
+                                                  req->Channel,
+-                                                 req->ReadChannelInfoOffset,
+                                                  req->ReadChannelInfoLength);
+               if (err)
+                       goto out;
+@@ -6395,21 +6392,18 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
+                                      struct ksmbd_file *fp,
+                                      loff_t offset, size_t length, bool sync)
+ {
+-      struct smb2_buffer_desc_v1 *desc;
+       char *data_buf;
+       int ret;
+       ssize_t nbytes;
+-      desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
+-
+       data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
+       if (!data_buf)
+               return -ENOMEM;
+       ret = ksmbd_conn_rdma_read(work->conn, data_buf, length,
+-                                 le32_to_cpu(desc->token),
+-                                 le64_to_cpu(desc->offset),
+-                                 le32_to_cpu(desc->length));
++                                 (struct smb2_buffer_desc_v1 *)
++                                 ((char *)req + le16_to_cpu(req->WriteChannelInfoOffset)),
++                                 le16_to_cpu(req->WriteChannelInfoLength));
+       if (ret < 0) {
+               kvfree(data_buf);
+               return ret;
+@@ -6461,7 +6455,6 @@ int smb2_write(struct ksmbd_work *work)
+                                                  (struct smb2_buffer_desc_v1 *)
+                                                  ((char *)req + ch_offset),
+                                                  req->Channel,
+-                                                 req->WriteChannelInfoOffset,
+                                                  req->WriteChannelInfoLength);
+               if (err)
+                       goto out;
+diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
+index 3f5d13571694..479d279ee146 100644
+--- a/fs/ksmbd/transport_rdma.c
++++ b/fs/ksmbd/transport_rdma.c
+@@ -1352,14 +1352,18 @@ static void write_done(struct ib_cq *cq, struct ib_wc *wc)
+       read_write_done(cq, wc, DMA_TO_DEVICE);
+ }
+-static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf,
+-                              int buf_len, u32 remote_key, u64 remote_offset,
+-                              u32 remote_len, bool is_read)
++static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
++                              void *buf, int buf_len,
++                              struct smb2_buffer_desc_v1 *desc,
++                              unsigned int desc_len,
++                              bool is_read)
+ {
+       struct smb_direct_rdma_rw_msg *msg;
+       int ret;
+       DECLARE_COMPLETION_ONSTACK(completion);
+       struct ib_send_wr *first_wr = NULL;
++      u32 remote_key = le32_to_cpu(desc[0].token);
++      u64 remote_offset = le64_to_cpu(desc[0].offset);
+       ret = wait_for_credits(t, &t->wait_rw_avail_ops, &t->rw_avail_ops);
+       if (ret < 0)
+@@ -1424,22 +1428,22 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf,
+       return ret;
+ }
+-static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf,
+-                               unsigned int buflen, u32 remote_key,
+-                               u64 remote_offset, u32 remote_len)
++static int smb_direct_rdma_write(struct ksmbd_transport *t,
++                               void *buf, unsigned int buflen,
++                               struct smb2_buffer_desc_v1 *desc,
++                               unsigned int desc_len)
+ {
+       return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
+-                                  remote_key, remote_offset,
+-                                  remote_len, false);
++                                  desc, desc_len, false);
+ }
+-static int smb_direct_rdma_read(struct ksmbd_transport *t, void *buf,
+-                              unsigned int buflen, u32 remote_key,
+-                              u64 remote_offset, u32 remote_len)
++static int smb_direct_rdma_read(struct ksmbd_transport *t,
++                              void *buf, unsigned int buflen,
++                              struct smb2_buffer_desc_v1 *desc,
++                              unsigned int desc_len)
+ {
+       return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
+-                                  remote_key, remote_offset,
+-                                  remote_len, true);
++                                  desc, desc_len, true);
+ }
+ static void smb_direct_disconnect(struct ksmbd_transport *t)
+-- 
+2.35.1
+
diff --git a/queue-5.18/ksmbd-smbd-introduce-read-write-credits-for-rdma-rea.patch b/queue-5.18/ksmbd-smbd-introduce-read-write-credits-for-rdma-rea.patch
new file mode 100644 (file)
index 0000000..4c4ecc1
--- /dev/null
@@ -0,0 +1,294 @@
+From 874e8676953ec14919db995ac2610534d933d174 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Apr 2022 08:30:26 +0900
+Subject: ksmbd: smbd: introduce read/write credits for RDMA read/write
+
+From: Hyunchul Lee <hyc.lee@gmail.com>
+
+[ Upstream commit ddbdc861e37c168cf2fb8a7b7477f5d18b4daf76 ]
+
+SMB2_READ/SMB2_WRITE request has to be granted the number
+of rw credits, the pages the request wants to transfer
+/ the maximum pages which can be registered with one
+MR to read and write a file.
+And allocate enough RDMA resources for the maximum
+number of rw credits allowed by ksmbd.
+
+Signed-off-by: Hyunchul Lee <hyc.lee@gmail.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/transport_rdma.c | 120 ++++++++++++++++++++++----------------
+ 1 file changed, 71 insertions(+), 49 deletions(-)
+
+diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
+index 479d279ee146..b44a5e584bac 100644
+--- a/fs/ksmbd/transport_rdma.c
++++ b/fs/ksmbd/transport_rdma.c
+@@ -80,9 +80,7 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
+ /*  The maximum single-message size which can be received */
+ static int smb_direct_max_receive_size = 8192;
+-static int smb_direct_max_read_write_size = 524224;
+-
+-static int smb_direct_max_outstanding_rw_ops = 8;
++static int smb_direct_max_read_write_size = 8 * 1024 * 1024;
+ static LIST_HEAD(smb_direct_device_list);
+ static DEFINE_RWLOCK(smb_direct_device_lock);
+@@ -147,10 +145,12 @@ struct smb_direct_transport {
+       atomic_t                send_credits;
+       spinlock_t              lock_new_recv_credits;
+       int                     new_recv_credits;
+-      atomic_t                rw_avail_ops;
++      int                     max_rw_credits;
++      int                     pages_per_rw_credit;
++      atomic_t                rw_credits;
+       wait_queue_head_t       wait_send_credits;
+-      wait_queue_head_t       wait_rw_avail_ops;
++      wait_queue_head_t       wait_rw_credits;
+       mempool_t               *sendmsg_mempool;
+       struct kmem_cache       *sendmsg_cache;
+@@ -377,7 +377,7 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
+       t->reassembly_queue_length = 0;
+       init_waitqueue_head(&t->wait_reassembly_queue);
+       init_waitqueue_head(&t->wait_send_credits);
+-      init_waitqueue_head(&t->wait_rw_avail_ops);
++      init_waitqueue_head(&t->wait_rw_credits);
+       spin_lock_init(&t->receive_credit_lock);
+       spin_lock_init(&t->recvmsg_queue_lock);
+@@ -984,18 +984,19 @@ static int smb_direct_flush_send_list(struct smb_direct_transport *t,
+ }
+ static int wait_for_credits(struct smb_direct_transport *t,
+-                          wait_queue_head_t *waitq, atomic_t *credits)
++                          wait_queue_head_t *waitq, atomic_t *total_credits,
++                          int needed)
+ {
+       int ret;
+       do {
+-              if (atomic_dec_return(credits) >= 0)
++              if (atomic_sub_return(needed, total_credits) >= 0)
+                       return 0;
+-              atomic_inc(credits);
++              atomic_add(needed, total_credits);
+               ret = wait_event_interruptible(*waitq,
+-                                             atomic_read(credits) > 0 ||
+-                                              t->status != SMB_DIRECT_CS_CONNECTED);
++                                             atomic_read(total_credits) >= needed ||
++                                             t->status != SMB_DIRECT_CS_CONNECTED);
+               if (t->status != SMB_DIRECT_CS_CONNECTED)
+                       return -ENOTCONN;
+@@ -1016,7 +1017,19 @@ static int wait_for_send_credits(struct smb_direct_transport *t,
+                       return ret;
+       }
+-      return wait_for_credits(t, &t->wait_send_credits, &t->send_credits);
++      return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1);
++}
++
++static int wait_for_rw_credits(struct smb_direct_transport *t, int credits)
++{
++      return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits);
++}
++
++static int calc_rw_credits(struct smb_direct_transport *t,
++                         char *buf, unsigned int len)
++{
++      return DIV_ROUND_UP(get_buf_page_count(buf, len),
++                          t->pages_per_rw_credit);
+ }
+ static int smb_direct_create_header(struct smb_direct_transport *t,
+@@ -1332,8 +1345,8 @@ static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
+               smb_direct_disconnect_rdma_connection(t);
+       }
+-      if (atomic_inc_return(&t->rw_avail_ops) > 0)
+-              wake_up(&t->wait_rw_avail_ops);
++      if (atomic_inc_return(&t->rw_credits) > 0)
++              wake_up(&t->wait_rw_credits);
+       rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
+                           msg->sg_list, msg->sgt.nents, dir);
+@@ -1364,8 +1377,10 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
+       struct ib_send_wr *first_wr = NULL;
+       u32 remote_key = le32_to_cpu(desc[0].token);
+       u64 remote_offset = le64_to_cpu(desc[0].offset);
++      int credits_needed;
+-      ret = wait_for_credits(t, &t->wait_rw_avail_ops, &t->rw_avail_ops);
++      credits_needed = calc_rw_credits(t, buf, buf_len);
++      ret = wait_for_rw_credits(t, credits_needed);
+       if (ret < 0)
+               return ret;
+@@ -1373,7 +1388,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
+       msg = kmalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
+                     sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
+       if (!msg) {
+-              atomic_inc(&t->rw_avail_ops);
++              atomic_add(credits_needed, &t->rw_credits);
+               return -ENOMEM;
+       }
+@@ -1382,7 +1397,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
+                                    get_buf_page_count(buf, buf_len),
+                                    msg->sg_list, SG_CHUNK_SIZE);
+       if (ret) {
+-              atomic_inc(&t->rw_avail_ops);
++              atomic_add(credits_needed, &t->rw_credits);
+               kfree(msg);
+               return -ENOMEM;
+       }
+@@ -1418,7 +1433,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
+       return 0;
+ err:
+-      atomic_inc(&t->rw_avail_ops);
++      atomic_add(credits_needed, &t->rw_credits);
+       if (first_wr)
+               rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
+                                   msg->sg_list, msg->sgt.nents,
+@@ -1643,11 +1658,19 @@ static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
+       return ret;
+ }
++static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t)
++{
++      return min_t(unsigned int,
++                   t->cm_id->device->attrs.max_fast_reg_page_list_len,
++                   256);
++}
++
+ static int smb_direct_init_params(struct smb_direct_transport *t,
+                                 struct ib_qp_cap *cap)
+ {
+       struct ib_device *device = t->cm_id->device;
+-      int max_send_sges, max_pages, max_rw_wrs, max_send_wrs;
++      int max_send_sges, max_rw_wrs, max_send_wrs;
++      unsigned int max_sge_per_wr, wrs_per_credit;
+       /* need 2 more sge. because a SMB_DIRECT header will be mapped,
+        * and maybe a send buffer could be not page aligned.
+@@ -1659,25 +1682,31 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
+               return -EINVAL;
+       }
+-      /*
+-       * allow smb_direct_max_outstanding_rw_ops of in-flight RDMA
+-       * read/writes. HCA guarantees at least max_send_sge of sges for
+-       * a RDMA read/write work request, and if memory registration is used,
+-       * we need reg_mr, local_inv wrs for each read/write.
++      /* Calculate the number of work requests for RDMA R/W.
++       * The maximum number of pages which can be registered
++       * with one Memory region can be transferred with one
++       * R/W credit. And at least 4 work requests for each credit
++       * are needed for MR registration, RDMA R/W, local & remote
++       * MR invalidation.
+        */
+       t->max_rdma_rw_size = smb_direct_max_read_write_size;
+-      max_pages = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
+-      max_rw_wrs = DIV_ROUND_UP(max_pages, SMB_DIRECT_MAX_SEND_SGES);
+-      max_rw_wrs += rdma_rw_mr_factor(device, t->cm_id->port_num,
+-                      max_pages) * 2;
+-      max_rw_wrs *= smb_direct_max_outstanding_rw_ops;
++      t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t);
++      t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size,
++                                       (t->pages_per_rw_credit - 1) *
++                                       PAGE_SIZE);
++
++      max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
++                             device->attrs.max_sge_rd);
++      wrs_per_credit = max_t(unsigned int, 4,
++                             DIV_ROUND_UP(t->pages_per_rw_credit,
++                                          max_sge_per_wr) + 1);
++      max_rw_wrs = t->max_rw_credits * wrs_per_credit;
+       max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
+       if (max_send_wrs > device->attrs.max_cqe ||
+           max_send_wrs > device->attrs.max_qp_wr) {
+-              pr_err("consider lowering send_credit_target = %d, or max_outstanding_rw_ops = %d\n",
+-                     smb_direct_send_credit_target,
+-                     smb_direct_max_outstanding_rw_ops);
++              pr_err("consider lowering send_credit_target = %d\n",
++                     smb_direct_send_credit_target);
+               pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
+                      device->attrs.max_cqe, device->attrs.max_qp_wr);
+               return -EINVAL;
+@@ -1712,7 +1741,7 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
+       t->send_credit_target = smb_direct_send_credit_target;
+       atomic_set(&t->send_credits, 0);
+-      atomic_set(&t->rw_avail_ops, smb_direct_max_outstanding_rw_ops);
++      atomic_set(&t->rw_credits, t->max_rw_credits);
+       t->max_send_size = smb_direct_max_send_size;
+       t->max_recv_size = smb_direct_max_receive_size;
+@@ -1720,12 +1749,10 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
+       cap->max_send_wr = max_send_wrs;
+       cap->max_recv_wr = t->recv_credit_max;
+-      cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
++      cap->max_send_sge = max_sge_per_wr;
+       cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
+       cap->max_inline_data = 0;
+-      cap->max_rdma_ctxs =
+-              rdma_rw_mr_factor(device, t->cm_id->port_num, max_pages) *
+-              smb_direct_max_outstanding_rw_ops;
++      cap->max_rdma_ctxs = t->max_rw_credits;
+       return 0;
+ }
+@@ -1818,7 +1845,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
+       }
+       t->send_cq = ib_alloc_cq(t->cm_id->device, t,
+-                               t->send_credit_target, 0, IB_POLL_WORKQUEUE);
++                               smb_direct_send_credit_target + cap->max_rdma_ctxs,
++                               0, IB_POLL_WORKQUEUE);
+       if (IS_ERR(t->send_cq)) {
+               pr_err("Can't create RDMA send CQ\n");
+               ret = PTR_ERR(t->send_cq);
+@@ -1827,8 +1855,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
+       }
+       t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
+-                               cap->max_send_wr + cap->max_rdma_ctxs,
+-                               0, IB_POLL_WORKQUEUE);
++                               t->recv_credit_max, 0, IB_POLL_WORKQUEUE);
+       if (IS_ERR(t->recv_cq)) {
+               pr_err("Can't create RDMA recv CQ\n");
+               ret = PTR_ERR(t->recv_cq);
+@@ -1857,17 +1884,12 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
+       pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
+       if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
+-              int pages_per_mr, mr_count;
+-
+-              pages_per_mr = min_t(int, pages_per_rw,
+-                                   t->cm_id->device->attrs.max_fast_reg_page_list_len);
+-              mr_count = DIV_ROUND_UP(pages_per_rw, pages_per_mr) *
+-                      atomic_read(&t->rw_avail_ops);
+-              ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, mr_count,
+-                                    IB_MR_TYPE_MEM_REG, pages_per_mr, 0);
++              ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs,
++                                    t->max_rw_credits, IB_MR_TYPE_MEM_REG,
++                                    t->pages_per_rw_credit, 0);
+               if (ret) {
+                       pr_err("failed to init mr pool count %d pages %d\n",
+-                             mr_count, pages_per_mr);
++                             t->max_rw_credits, t->pages_per_rw_credit);
+                       goto err;
+               }
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.18/ksmbd-validate-length-in-smb2_write.patch b/queue-5.18/ksmbd-validate-length-in-smb2_write.patch
new file mode 100644 (file)
index 0000000..d8404cc
--- /dev/null
@@ -0,0 +1,101 @@
+From e58f6941d18cf59076c318ceaa24694c385f008b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 May 2022 15:40:10 +0200
+Subject: ksmbd: validate length in smb2_write()
+
+From: Marios Makassikis <mmakassikis@freebox.fr>
+
+[ Upstream commit 158a66b245739e15858de42c0ba60fcf3de9b8e6 ]
+
+The SMB2 Write packet contains data that is to be written
+to a file or to a pipe. Depending on the client, there may
+be padding between the header and the data field.
+Currently, the length is validated only in the case padding
+is present.
+
+Since the DataOffset field always points to the beginning
+of the data, there is no need to have a special case for
+padding. By removing this, the length is validated in both
+cases.
+
+Signed-off-by: Marios Makassikis <mmakassikis@freebox.fr>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/smb2pdu.c | 49 ++++++++++++++++++----------------------------
+ 1 file changed, 19 insertions(+), 30 deletions(-)
+
+diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
+index 0a76aa7fe5f9..5df87fe18905 100644
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -6344,23 +6344,18 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work)
+       length = le32_to_cpu(req->Length);
+       id = req->VolatileFileId;
+-      if (le16_to_cpu(req->DataOffset) ==
+-          offsetof(struct smb2_write_req, Buffer)) {
+-              data_buf = (char *)&req->Buffer[0];
+-      } else {
+-              if ((u64)le16_to_cpu(req->DataOffset) + length >
+-                  get_rfc1002_len(work->request_buf)) {
+-                      pr_err("invalid write data offset %u, smb_len %u\n",
+-                             le16_to_cpu(req->DataOffset),
+-                             get_rfc1002_len(work->request_buf));
+-                      err = -EINVAL;
+-                      goto out;
+-              }
+-
+-              data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
+-                              le16_to_cpu(req->DataOffset));
++      if ((u64)le16_to_cpu(req->DataOffset) + length >
++          get_rfc1002_len(work->request_buf)) {
++              pr_err("invalid write data offset %u, smb_len %u\n",
++                     le16_to_cpu(req->DataOffset),
++                     get_rfc1002_len(work->request_buf));
++              err = -EINVAL;
++              goto out;
+       }
++      data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
++                         le16_to_cpu(req->DataOffset));
++
+       rpc_resp = ksmbd_rpc_write(work->sess, id, data_buf, length);
+       if (rpc_resp) {
+               if (rpc_resp->flags == KSMBD_RPC_ENOTIMPLEMENTED) {
+@@ -6505,22 +6500,16 @@ int smb2_write(struct ksmbd_work *work)
+       if (req->Channel != SMB2_CHANNEL_RDMA_V1 &&
+           req->Channel != SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
+-              if (le16_to_cpu(req->DataOffset) ==
+-                  offsetof(struct smb2_write_req, Buffer)) {
+-                      data_buf = (char *)&req->Buffer[0];
+-              } else {
+-                      if ((u64)le16_to_cpu(req->DataOffset) + length >
+-                          get_rfc1002_len(work->request_buf)) {
+-                              pr_err("invalid write data offset %u, smb_len %u\n",
+-                                     le16_to_cpu(req->DataOffset),
+-                                     get_rfc1002_len(work->request_buf));
+-                              err = -EINVAL;
+-                              goto out;
+-                      }
+-
+-                      data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
+-                                      le16_to_cpu(req->DataOffset));
++              if ((u64)le16_to_cpu(req->DataOffset) + length >
++                  get_rfc1002_len(work->request_buf)) {
++                      pr_err("invalid write data offset %u, smb_len %u\n",
++                             le16_to_cpu(req->DataOffset),
++                             get_rfc1002_len(work->request_buf));
++                      err = -EINVAL;
++                      goto out;
+               }
++              data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
++                                  le16_to_cpu(req->DataOffset));
+               ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags));
+               if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
+-- 
+2.35.1
+
diff --git a/queue-5.18/kvm-nvmx-attempt-to-load-perf_global_ctrl-on-nvmx-xf.patch b/queue-5.18/kvm-nvmx-attempt-to-load-perf_global_ctrl-on-nvmx-xf.patch
new file mode 100644 (file)
index 0000000..e437d59
--- /dev/null
@@ -0,0 +1,78 @@
+From 0c595ec21faf719ae7503cd05c3534eb55ebe586 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 22:44:08 +0000
+Subject: KVM: nVMX: Attempt to load PERF_GLOBAL_CTRL on nVMX xfer iff it
+ exists
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 4496a6f9b45e8cd83343ad86a3984d614e22cf54 ]
+
+Attempt to load PERF_GLOBAL_CTRL during nested VM-Enter/VM-Exit if and
+only if the MSR exists (according to the guest vCPU model).  KVM has very
+misguided handling of VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL and
+attempts to force the nVMX MSR settings to match the vPMU model, i.e. to
+hide/expose the control based on whether or not the MSR exists from the
+guest's perspective.
+
+KVM's modifications fail to handle the scenario where the vPMU is hidden
+from the guest _after_ being exposed to the guest, e.g. by userspace
+doing multiple KVM_SET_CPUID2 calls, which is allowed if done before any
+KVM_RUN.  nested_vmx_pmu_refresh() is called if and only if there's a
+recognized vPMU, i.e. KVM will leave the bits in the allow state and then
+ultimately reject the MSR load and WARN.
+
+KVM should not force the VMX MSRs in the first place.  KVM taking control
+of the MSRs was a misguided attempt at mimicking what commit 5f76f6f5ff96
+("KVM: nVMX: Do not expose MPX VMX controls when guest MPX disabled",
+2018-10-01) did for MPX.  However, the MPX commit was a workaround for
+another KVM bug and not something that should be imitated (and it should
+never been done in the first place).
+
+In other words, KVM's ABI _should_ be that userspace has full control
+over the MSRs, at which point triggering the WARN that loading the MSR
+must not fail is trivial.
+
+The intent of the WARN is still valid; KVM has consistency checks to
+ensure that vmcs12->{guest,host}_ia32_perf_global_ctrl is valid.  The
+problem is that '0' must be considered a valid value at all times, and so
+the simple/obvious solution is to just not actually load the MSR when it
+does not exist.  It is userspace's responsibility to provide a sane vCPU
+model, i.e. KVM is well within its ABI and Intel's VMX architecture to
+skip the loads if the MSR does not exist.
+
+Fixes: 03a8871add95 ("KVM: nVMX: Expose load IA32_PERF_GLOBAL_CTRL VM-{Entry,Exit} control")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220722224409.1336532-5-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index aa287302f991..5c62e552082a 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2621,6 +2621,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+               vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
+       if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
++          intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
+           WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+                                    vmcs12->guest_ia32_perf_global_ctrl))) {
+               *entry_failure_code = ENTRY_FAIL_DEFAULT;
+@@ -4346,7 +4347,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
+               vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
+               vcpu->arch.pat = vmcs12->host_ia32_pat;
+       }
+-      if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
++      if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
++          intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
+               WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+                                        vmcs12->host_ia32_perf_global_ctrl));
+-- 
+2.35.1
+
diff --git a/queue-5.18/kvm-set_msr_mce-permit-guests-to-ignore-single-bit-e.patch b/queue-5.18/kvm-set_msr_mce-permit-guests-to-ignore-single-bit-e.patch
new file mode 100644 (file)
index 0000000..055458a
--- /dev/null
@@ -0,0 +1,71 @@
+From afc6d9998f6e8d08603017905a5383988aa91bd0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 May 2022 08:15:11 +0000
+Subject: KVM: set_msr_mce: Permit guests to ignore single-bit ECC errors
+
+From: Lev Kujawski <lkujaw@member.fsf.org>
+
+[ Upstream commit 0471a7bd1bca2a47a5f378f2222c5cf39ce94152 ]
+
+Certain guest operating systems (e.g., UNIXWARE) clear bit 0 of
+MC1_CTL to ignore single-bit ECC data errors.  Single-bit ECC data
+errors are always correctable and thus are safe to ignore because they
+are informational in nature rather than signaling a loss of data
+integrity.
+
+Prior to this patch, these guests would crash upon writing MC1_CTL,
+with resultant error messages like the following:
+
+error: kvm run failed Operation not permitted
+EAX=fffffffe EBX=fffffffe ECX=00000404 EDX=ffffffff
+ESI=ffffffff EDI=00000001 EBP=fffdaba4 ESP=fffdab20
+EIP=c01333a5 EFL=00000246 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
+ES =0108 00000000 ffffffff 00c09300 DPL=0 DS   [-WA]
+CS =0100 00000000 ffffffff 00c09b00 DPL=0 CS32 [-RA]
+SS =0108 00000000 ffffffff 00c09300 DPL=0 DS   [-WA]
+DS =0108 00000000 ffffffff 00c09300 DPL=0 DS   [-WA]
+FS =0000 00000000 ffffffff 00c00000
+GS =0000 00000000 ffffffff 00c00000
+LDT=0118 c1026390 00000047 00008200 DPL=0 LDT
+TR =0110 ffff5af0 00000067 00008b00 DPL=0 TSS32-busy
+GDT=     ffff5020 000002cf
+IDT=     ffff52f0 000007ff
+CR0=8001003b CR2=00000000 CR3=0100a000 CR4=00000230
+DR0=00000000 DR1=00000000 DR2=00000000 DR3=00000000
+DR6=ffff0ff0 DR7=00000400
+EFER=0000000000000000
+Code=08 89 01 89 51 04 c3 8b 4c 24 08 8b 01 8b 51 04 8b 4c 24 04 <0f>
+30 c3 f7 05 a4 6d ff ff 10 00 00 00 74 03 0f 31 c3 33 c0 33 d2 c3 8d
+74 26 00 0f 31 c3
+
+Signed-off-by: Lev Kujawski <lkujaw@member.fsf.org>
+Message-Id: <20220521081511.187388-1-lkujaw@member.fsf.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 767a61e29f51..2316c978b598 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3226,10 +3226,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+                       /* only 0 or all 1s can be written to IA32_MCi_CTL
+                        * some Linux kernels though clear bit 10 in bank 4 to
+                        * workaround a BIOS/GART TBL issue on AMD K8s, ignore
+-                       * this to avoid an uncatched #GP in the guest
++                       * this to avoid an uncatched #GP in the guest.
++                       *
++                       * UNIXWARE clears bit 0 of MC1_CTL to ignore
++                       * correctable, single-bit ECC data errors.
+                        */
+                       if ((offset & 0x3) == 0 &&
+-                          data != 0 && (data | (1 << 10)) != ~(u64)0)
++                          data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
+                               return -1;
+                       /* MCi_STATUS */
+-- 
+2.35.1
+
diff --git a/queue-5.18/kvm-vmx-add-helper-to-check-if-the-guest-pmu-has-per.patch b/queue-5.18/kvm-vmx-add-helper-to-check-if-the-guest-pmu-has-per.patch
new file mode 100644 (file)
index 0000000..3ba173f
--- /dev/null
@@ -0,0 +1,73 @@
+From d297e223ab3f71968097240e39d44fb5cc478e26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 22:44:07 +0000
+Subject: KVM: VMX: Add helper to check if the guest PMU has PERF_GLOBAL_CTRL
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit b663f0b5f3d665c261256d1f76e98f077c6e56af ]
+
+Add a helper to check of the guest PMU has PERF_GLOBAL_CTRL, which is
+unintuitive _and_ diverges from Intel's architecturally defined behavior.
+Even worse, KVM currently implements the check using two different (but
+equivalent) checks, _and_ there has been at least one attempt to add a
+_third_ flavor.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220722224409.1336532-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/pmu_intel.c |  4 ++--
+ arch/x86/kvm/vmx/vmx.h       | 12 ++++++++++++
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
+index 2cbd5f183ab5..8bd154f8c966 100644
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -98,7 +98,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
+ {
+       struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+-      if (pmu->version < 2)
++      if (!intel_pmu_has_perf_global_ctrl(pmu))
+               return true;
+       return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
+@@ -215,7 +215,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
+       case MSR_CORE_PERF_GLOBAL_STATUS:
+       case MSR_CORE_PERF_GLOBAL_CTRL:
+       case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+-              ret = pmu->version > 1;
++              return intel_pmu_has_perf_global_ctrl(pmu);
+               break;
+       default:
+               ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
+diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
+index 1e7f9453894b..93aa1f3ea01e 100644
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -92,6 +92,18 @@ union vmx_exit_reason {
+       u32 full;
+ };
++static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
++{
++      /*
++       * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
++       * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
++       * greater than zero.  However, KVM only exposes and emulates the MSR
++       * to/for the guest if the guest PMU supports at least "Architectural
++       * Performance Monitoring Version 2".
++       */
++      return pmu->version > 1;
++}
++
+ #define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
+ #define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
+-- 
+2.35.1
+
diff --git a/queue-5.18/kvm-vmx-mark-all-perf_global_-ovf-_ctrl-bits-reserve.patch b/queue-5.18/kvm-vmx-mark-all-perf_global_-ovf-_ctrl-bits-reserve.patch
new file mode 100644 (file)
index 0000000..54ff0c6
--- /dev/null
@@ -0,0 +1,43 @@
+From cbb6518aa4bc3a15da904a5d81ff02604a6fcbe8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 22:44:06 +0000
+Subject: KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if there's no
+ vPMU
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 93255bf92939d948bc86d81c6bb70bb0fecc5db1 ]
+
+Mark all MSR_CORE_PERF_GLOBAL_CTRL and MSR_CORE_PERF_GLOBAL_OVF_CTRL bits
+as reserved if there is no guest vPMU.  The nVMX VM-Entry consistency
+checks do not check for a valid vPMU prior to consuming the masks via
+kvm_valid_perf_global_ctrl(), i.e. may incorrectly allow a non-zero mask
+to be loaded via VM-Enter or VM-Exit (well, attempted to be loaded, the
+actual MSR load will be rejected by intel_is_valid_msr()).
+
+Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220722224409.1336532-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/pmu_intel.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
+index 040d598622e3..cd2d0454f8b0 100644
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -488,6 +488,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+       pmu->version = 0;
+       pmu->reserved_bits = 0xffffffff00200000ull;
+       pmu->raw_event_mask = X86_RAW_EVENT_MASK;
++      pmu->global_ctrl_mask = ~0ull;
++      pmu->global_ovf_ctrl_mask = ~0ull;
+       pmu->fixed_ctr_ctrl_mask = ~0ull;
+       entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+-- 
+2.35.1
+
diff --git a/queue-5.18/kvm-x86-pmu-ignore-pmu-global_ctrl-check-if-vpmu-doe.patch b/queue-5.18/kvm-x86-pmu-ignore-pmu-global_ctrl-check-if-vpmu-doe.patch
new file mode 100644 (file)
index 0000000..802275c
--- /dev/null
@@ -0,0 +1,40 @@
+From 6174683901bf4385f235ccf8923ece845be35a32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 May 2022 18:22:02 +0800
+Subject: KVM: x86/pmu: Ignore pmu->global_ctrl check if vPMU doesn't support
+ global_ctrl
+
+From: Like Xu <likexu@tencent.com>
+
+[ Upstream commit 98defd2e17803263f49548fea930cfc974d505aa ]
+
+MSR_CORE_PERF_GLOBAL_CTRL is introduced as part of Architecture PMU V2,
+as indicated by Intel SDM 19.2.2 and the intel_is_valid_msr() function.
+
+So in the absence of global_ctrl support, all PMCs are enabled as AMD does.
+
+Signed-off-by: Like Xu <likexu@tencent.com>
+Message-Id: <20220509102204.62389-1-likexu@tencent.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/pmu_intel.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
+index cd2d0454f8b0..2cbd5f183ab5 100644
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -98,6 +98,9 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
+ {
+       struct kvm_pmu *pmu = pmc_to_pmu(pmc);
++      if (pmu->version < 2)
++              return true;
++
+       return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.18/kvm-x86-pmu-introduce-the-ctrl_mask-value-for-fixed-.patch b/queue-5.18/kvm-x86-pmu-introduce-the-ctrl_mask-value-for-fixed-.patch
new file mode 100644 (file)
index 0000000..4b5bde5
--- /dev/null
@@ -0,0 +1,79 @@
+From 1687eb7807a86765c388c89e08f961c30ccc30a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Apr 2022 18:19:34 +0800
+Subject: KVM: x86/pmu: Introduce the ctrl_mask value for fixed counter
+
+From: Like Xu <like.xu@linux.intel.com>
+
+[ Upstream commit 2c985527dd8d283e786ad7a67e532ef7f6f00fac ]
+
+The mask value of fixed counter control register should be dynamic
+adjusted with the number of fixed counters. This patch introduces a
+variable that includes the reserved bits of fixed counter control
+registers. This is a generic code refactoring.
+
+Co-developed-by: Luwei Kang <luwei.kang@intel.com>
+Signed-off-by: Luwei Kang <luwei.kang@intel.com>
+Signed-off-by: Like Xu <like.xu@linux.intel.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Message-Id: <20220411101946.20262-6-likexu@tencent.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/kvm_host.h | 1 +
+ arch/x86/kvm/vmx/pmu_intel.c    | 6 +++++-
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 57550a427789..35c7a1fce8ea 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -508,6 +508,7 @@ struct kvm_pmu {
+       unsigned nr_arch_fixed_counters;
+       unsigned available_event_types;
+       u64 fixed_ctr_ctrl;
++      u64 fixed_ctr_ctrl_mask;
+       u64 global_ctrl;
+       u64 global_status;
+       u64 counter_bitmask[2];
+diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
+index b82b6709d7a8..040d598622e3 100644
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -395,7 +395,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+       case MSR_CORE_PERF_FIXED_CTR_CTRL:
+               if (pmu->fixed_ctr_ctrl == data)
+                       return 0;
+-              if (!(data & 0xfffffffffffff444ull)) {
++              if (!(data & pmu->fixed_ctr_ctrl_mask)) {
+                       reprogram_fixed_counters(pmu, data);
+                       return 0;
+               }
+@@ -479,6 +479,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+       struct kvm_cpuid_entry2 *entry;
+       union cpuid10_eax eax;
+       union cpuid10_edx edx;
++      int i;
+       pmu->nr_arch_gp_counters = 0;
+       pmu->nr_arch_fixed_counters = 0;
+@@ -487,6 +488,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+       pmu->version = 0;
+       pmu->reserved_bits = 0xffffffff00200000ull;
+       pmu->raw_event_mask = X86_RAW_EVENT_MASK;
++      pmu->fixed_ctr_ctrl_mask = ~0ull;
+       entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+       if (!entry || !vcpu->kvm->arch.enable_pmu)
+@@ -522,6 +524,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+               setup_fixed_pmc_eventsel(pmu);
+       }
++      for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
++              pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4));
+       pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
+               (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
+       pmu->global_ctrl_mask = ~pmu->global_ctrl;
+-- 
+2.35.1
+
diff --git a/queue-5.18/kvm-x86-signal-gp-not-eperm-on-bad-wrmsr-mci_ctl-sta.patch b/queue-5.18/kvm-x86-signal-gp-not-eperm-on-bad-wrmsr-mci_ctl-sta.patch
new file mode 100644 (file)
index 0000000..b900ac5
--- /dev/null
@@ -0,0 +1,50 @@
+From e9cd8ca56097b6239965d4f48f6472c096fa12cf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 May 2022 22:27:14 +0000
+Subject: KVM: x86: Signal #GP, not -EPERM, on bad WRMSR(MCi_CTL/STATUS)
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 2368048bf5c2ec4b604ac3431564071e89a0bc71 ]
+
+Return '1', not '-1', when handling an illegal WRMSR to a MCi_CTL or
+MCi_STATUS MSR.  The behavior of "all zeros' or "all ones" for CTL MSRs
+is architectural, as is the "only zeros" behavior for STATUS MSRs.  I.e.
+the intent is to inject a #GP, not exit to userspace due to an unhandled
+emulation case.  Returning '-1' gets interpreted as -EPERM up the stack
+and effecitvely kills the guest.
+
+Fixes: 890ca9aefa78 ("KVM: Add MCE support")
+Fixes: 9ffd986c6e4e ("KVM: X86: #GP when guest attempts to write MCi_STATUS register w/o 0")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Link: https://lore.kernel.org/r/20220512222716.4112548-2-seanjc@google.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 2316c978b598..0d6cea0d33a9 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3233,13 +3233,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+                        */
+                       if ((offset & 0x3) == 0 &&
+                           data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
+-                              return -1;
++                              return 1;
+                       /* MCi_STATUS */
+                       if (!msr_info->host_initiated &&
+                           (offset & 0x3) == 1 && data != 0) {
+                               if (!can_set_mci_status(vcpu))
+-                                      return -1;
++                                      return 1;
+                       }
+                       vcpu->arch.mce_banks[offset] = data;
+-- 
+2.35.1
+
diff --git a/queue-5.18/locking-csd_lock-change-csdlock_debug-from-early_par.patch b/queue-5.18/locking-csd_lock-change-csdlock_debug-from-early_par.patch
new file mode 100644 (file)
index 0000000..feb1738
--- /dev/null
@@ -0,0 +1,56 @@
+From 9219faa1ac767c0882f58d034a20086d6c06042e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 May 2022 17:46:39 +0800
+Subject: locking/csd_lock: Change csdlock_debug from early_param to __setup
+
+From: Chen Zhongjin <chenzhongjin@huawei.com>
+
+[ Upstream commit 9c9b26b0df270d4f9246e483a44686fca951a29c ]
+
+The csdlock_debug kernel-boot parameter is parsed by the
+early_param() function csdlock_debug().  If set, csdlock_debug()
+invokes static_branch_enable() to enable csd_lock_wait feature, which
+triggers a panic on arm64 for kernels built with CONFIG_SPARSEMEM=y and
+CONFIG_SPARSEMEM_VMEMMAP=n.
+
+With CONFIG_SPARSEMEM_VMEMMAP=n, __nr_to_section is called in
+static_key_enable() and returns NULL, resulting in a NULL dereference
+because mem_section is initialized only later in sparse_init().
+
+This is also a problem for powerpc because early_param() functions
+are invoked earlier than jump_label_init(), also resulting in
+static_key_enable() failures.  These failures cause the warning "static
+key 'xxx' used before call to jump_label_init()".
+
+Thus, early_param is too early for csd_lock_wait to run
+static_branch_enable(), so changes it to __setup to fix these.
+
+Fixes: 8d0968cc6b8f ("locking/csd_lock: Add boot parameter for controlling CSD lock debugging")
+Cc: stable@vger.kernel.org
+Reported-by: Chen jingwen <chenjingwen6@huawei.com>
+Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/smp.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/smp.c b/kernel/smp.c
+index 65a630f62363..381eb15cd28f 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -174,9 +174,9 @@ static int __init csdlock_debug(char *str)
+       if (val)
+               static_branch_enable(&csdlock_debug_enabled);
+-      return 0;
++      return 1;
+ }
+-early_param("csdlock_debug", csdlock_debug);
++__setup("csdlock_debug=", csdlock_debug);
+ static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
+ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
+-- 
+2.35.1
+
diff --git a/queue-5.18/mm-damon-reclaim-fix-potential-memory-leak-in-damon_.patch b/queue-5.18/mm-damon-reclaim-fix-potential-memory-leak-in-damon_.patch
new file mode 100644 (file)
index 0000000..b60df35
--- /dev/null
@@ -0,0 +1,46 @@
+From da88b2e18326479f78988cfb3e313dbb2286e447 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 14:37:46 +0800
+Subject: mm/damon/reclaim: fix potential memory leak in damon_reclaim_init()
+
+From: Jianglei Nie <niejianglei2021@163.com>
+
+[ Upstream commit 188043c7f4f2bd662f2a55957d684fffa543e600 ]
+
+damon_reclaim_init() allocates a memory chunk for ctx with
+damon_new_ctx().  When damon_select_ops() fails, ctx is not released,
+which will lead to a memory leak.
+
+We should release the ctx with damon_destroy_ctx() when damon_select_ops()
+fails to fix the memory leak.
+
+Link: https://lkml.kernel.org/r/20220714063746.2343549-1-niejianglei2021@163.com
+Fixes: 4d69c3457821 ("mm/damon/reclaim: use damon_select_ops() instead of damon_{v,p}a_set_operations()")
+Signed-off-by: Jianglei Nie <niejianglei2021@163.com>
+Reviewed-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/damon/reclaim.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
+index e34c4d0c4d93..11982685508e 100644
+--- a/mm/damon/reclaim.c
++++ b/mm/damon/reclaim.c
+@@ -384,8 +384,10 @@ static int __init damon_reclaim_init(void)
+       if (!ctx)
+               return -ENOMEM;
+-      if (damon_select_ops(ctx, DAMON_OPS_PADDR))
++      if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
++              damon_destroy_ctx(ctx);
+               return -EINVAL;
++      }
+       ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
+-- 
+2.35.1
+
diff --git a/queue-5.18/net-9p-initialize-the-iounit-field-during-fid-creati.patch b/queue-5.18/net-9p-initialize-the-iounit-field-during-fid-creati.patch
new file mode 100644 (file)
index 0000000..d96916f
--- /dev/null
@@ -0,0 +1,68 @@
+From 54577663faf8efe35cef8b782a6a2be7dbe01e35 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Jul 2022 09:14:02 -0500
+Subject: net/9p: Initialize the iounit field during fid creation
+
+From: Tyler Hicks <tyhicks@linux.microsoft.com>
+
+[ Upstream commit aa7aeee169480e98cf41d83c01290a37e569be6d ]
+
+Ensure that the fid's iounit field is set to zero when a new fid is
+created. Certain 9P operations, such as OPEN and CREATE, allow the
+server to reply with an iounit size which the client code assigns to the
+p9_fid struct shortly after the fid is created by p9_fid_create(). On
+the other hand, an XATTRWALK operation doesn't allow for the server to
+specify an iounit value. The iounit field of the newly allocated p9_fid
+struct remained uninitialized in that case. Depending on allocation
+patterns, the iounit value could have been something reasonable that was
+carried over from previously freed fids or, in the worst case, could
+have been arbitrary values from non-fid related usages of the memory
+location.
+
+The bug was detected in the Windows Subsystem for Linux 2 (WSL2) kernel
+after the uninitialized iounit field resulted in the typical sequence of
+two getxattr(2) syscalls, one to get the size of an xattr and another
+after allocating a sufficiently sized buffer to fit the xattr value, to
+hit an unexpected ERANGE error in the second call to getxattr(2). An
+uninitialized iounit field would sometimes force rsize to be smaller
+than the xattr value size in p9_client_read_once() and the 9P server in
+WSL refused to chunk up the READ on the attr_fid and, instead, returned
+ERANGE to the client. The virtfs server in QEMU seems happy to chunk up
+the READ and this problem goes undetected there.
+
+Link: https://lkml.kernel.org/r/20220710141402.803295-1-tyhicks@linux.microsoft.com
+Fixes: ebf46264a004 ("fs/9p: Add support user. xattr")
+Cc: stable@vger.kernel.org
+Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/9p/client.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/net/9p/client.c b/net/9p/client.c
+index a36a40137caa..87cde948f628 100644
+--- a/net/9p/client.c
++++ b/net/9p/client.c
+@@ -886,16 +886,13 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
+       struct p9_fid *fid;
+       p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);
+-      fid = kmalloc(sizeof(*fid), GFP_KERNEL);
++      fid = kzalloc(sizeof(*fid), GFP_KERNEL);
+       if (!fid)
+               return NULL;
+-      memset(&fid->qid, 0, sizeof(fid->qid));
+       fid->mode = -1;
+       fid->uid = current_fsuid();
+       fid->clnt = clnt;
+-      fid->rdir = NULL;
+-      fid->fid = 0;
+       refcount_set(&fid->count, 1);
+       idr_preload(GFP_KERNEL);
+-- 
+2.35.1
+
diff --git a/queue-5.18/pci-aer-iterate-over-error-counters-instead-of-error.patch b/queue-5.18/pci-aer-iterate-over-error-counters-instead-of-error.patch
new file mode 100644 (file)
index 0000000..d27157e
--- /dev/null
@@ -0,0 +1,61 @@
+From dbe842dddf2be13dbdb8de5eb0b9ee702f315d95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 May 2022 18:14:41 +0000
+Subject: PCI/AER: Iterate over error counters instead of error strings
+
+From: Mohamed Khalfella <mkhalfella@purestorage.com>
+
+[ Upstream commit 5e6ae050955b566484f3cc6a66e3925eae87a0ed ]
+
+Previously we iterated over AER stat *names*, e.g.,
+aer_correctable_error_string[32], but the actual stat *counters* may not be
+that large, e.g., pdev->aer_stats->dev_cor_errs[16], which means that we
+printed junk in the sysfs stats files.
+
+Iterate over the stat counter arrays instead of the names to avoid this
+junk.
+
+Also, added a build time check to make sure all
+counters have entries in strings array.
+
+Fixes: 0678e3109a3c ("PCI/AER: Simplify __aer_print_error()")
+Link: https://lore.kernel.org/r/20220509181441.31884-1-mkhalfella@purestorage.com
+Reported-by: Meeta Saggi <msaggi@purestorage.com>
+Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Meeta Saggi <msaggi@purestorage.com>
+Reviewed-by: Eric Badger <ebadger@purestorage.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/pcie/aer.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
+index 7952e5efd6cf..a1e38ca93cd9 100644
+--- a/drivers/pci/pcie/aer.c
++++ b/drivers/pci/pcie/aer.c
+@@ -538,7 +538,7 @@ static const char *aer_agent_string[] = {
+       u64 *stats = pdev->aer_stats->stats_array;                      \
+       size_t len = 0;                                                 \
+                                                                       \
+-      for (i = 0; i < ARRAY_SIZE(strings_array); i++) {               \
++      for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\
+               if (strings_array[i])                                   \
+                       len += sysfs_emit_at(buf, len, "%s %llu\n",     \
+                                            strings_array[i],          \
+@@ -1347,6 +1347,11 @@ static int aer_probe(struct pcie_device *dev)
+       struct device *device = &dev->device;
+       struct pci_dev *port = dev->port;
++      BUILD_BUG_ON(ARRAY_SIZE(aer_correctable_error_string) <
++                   AER_MAX_TYPEOF_COR_ERRS);
++      BUILD_BUG_ON(ARRAY_SIZE(aer_uncorrectable_error_string) <
++                   AER_MAX_TYPEOF_UNCOR_ERRS);
++
+       /* Limit to Root Ports or Root Complex Event Collectors */
+       if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
+           (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
+-- 
+2.35.1
+
diff --git a/queue-5.18/pci-qcom-power-on-phy-before-ipq8074-dbi-register-ac.patch b/queue-5.18/pci-qcom-power-on-phy-before-ipq8074-dbi-register-ac.patch
new file mode 100644 (file)
index 0000000..be88f13
--- /dev/null
@@ -0,0 +1,111 @@
+From 458d79349bdb8364d0035a65afb591e5eff068d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jun 2022 17:50:03 +0200
+Subject: PCI: qcom: Power on PHY before IPQ8074 DBI register accesses
+
+From: Robert Marko <robimarko@gmail.com>
+
+[ Upstream commit a0e43bb9973b06ce5c666f0901e104e2037c1b34 ]
+
+Currently the Gen2 port in IPQ8074 will cause the system to hang as it
+accesses DBI registers in qcom_pcie_init_2_3_3(), and those are only
+accesible after phy_power_on().
+
+Move the DBI read/writes to a new qcom_pcie_post_init_2_3_3(), which is
+executed after phy_power_on().
+
+Link: https://lore.kernel.org/r/20220623155004.688090-1-robimarko@gmail.com
+Fixes: a0fd361db8e5 ("PCI: dwc: Move "dbi", "dbi2", and "addr_space" resource setup into common code")
+Signed-off-by: Robert Marko <robimarko@gmail.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Cc: stable@vger.kernel.org     # v5.11+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/controller/dwc/pcie-qcom.c | 48 +++++++++++++++-----------
+ 1 file changed, 28 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
+index ab04818f6ed9..340542aab8a5 100644
+--- a/drivers/pci/controller/dwc/pcie-qcom.c
++++ b/drivers/pci/controller/dwc/pcie-qcom.c
+@@ -1036,9 +1036,7 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+       struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3;
+       struct dw_pcie *pci = pcie->pci;
+       struct device *dev = pci->dev;
+-      u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+       int i, ret;
+-      u32 val;
+       for (i = 0; i < ARRAY_SIZE(res->rst); i++) {
+               ret = reset_control_assert(res->rst[i]);
+@@ -1095,6 +1093,33 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+               goto err_clk_aux;
+       }
++      return 0;
++
++err_clk_aux:
++      clk_disable_unprepare(res->ahb_clk);
++err_clk_ahb:
++      clk_disable_unprepare(res->axi_s_clk);
++err_clk_axi_s:
++      clk_disable_unprepare(res->axi_m_clk);
++err_clk_axi_m:
++      clk_disable_unprepare(res->iface);
++err_clk_iface:
++      /*
++       * Not checking for failure, will anyway return
++       * the original failure in 'ret'.
++       */
++      for (i = 0; i < ARRAY_SIZE(res->rst); i++)
++              reset_control_assert(res->rst[i]);
++
++      return ret;
++}
++
++static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie)
++{
++      struct dw_pcie *pci = pcie->pci;
++      u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
++      u32 val;
++
+       writel(SLV_ADDR_SPACE_SZ,
+               pcie->parf + PCIE20_v3_PARF_SLV_ADDR_SPACE_SIZE);
+@@ -1122,24 +1147,6 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+               PCI_EXP_DEVCTL2);
+       return 0;
+-
+-err_clk_aux:
+-      clk_disable_unprepare(res->ahb_clk);
+-err_clk_ahb:
+-      clk_disable_unprepare(res->axi_s_clk);
+-err_clk_axi_s:
+-      clk_disable_unprepare(res->axi_m_clk);
+-err_clk_axi_m:
+-      clk_disable_unprepare(res->iface);
+-err_clk_iface:
+-      /*
+-       * Not checking for failure, will anyway return
+-       * the original failure in 'ret'.
+-       */
+-      for (i = 0; i < ARRAY_SIZE(res->rst); i++)
+-              reset_control_assert(res->rst[i]);
+-
+-      return ret;
+ }
+ static int qcom_pcie_get_resources_2_7_0(struct qcom_pcie *pcie)
+@@ -1465,6 +1472,7 @@ static const struct qcom_pcie_ops ops_2_4_0 = {
+ static const struct qcom_pcie_ops ops_2_3_3 = {
+       .get_resources = qcom_pcie_get_resources_2_3_3,
+       .init = qcom_pcie_init_2_3_3,
++      .post_init = qcom_pcie_post_init_2_3_3,
+       .deinit = qcom_pcie_deinit_2_3_3,
+       .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+ };
+-- 
+2.35.1
+
diff --git a/queue-5.18/powerpc-powernv-kvm-use-darn-for-h_random-on-power9.patch b/queue-5.18/powerpc-powernv-kvm-use-darn-for-h_random-on-power9.patch
new file mode 100644 (file)
index 0000000..2cea5bd
--- /dev/null
@@ -0,0 +1,145 @@
+From 6df1064850462c2a7cac009c6e72ad3fcd7d0fa6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 00:32:18 +1000
+Subject: powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+[ Upstream commit 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f ]
+
+The existing logic in KVM to support guests calling H_RANDOM only works
+on Power8, because it looks for an RNG in the device tree, but on Power9
+we just use darn.
+
+In addition the existing code needs to work in real mode, so we have the
+special cased powernv_get_random_real_mode() to deal with that.
+
+Instead just have KVM call ppc_md.get_random_seed(), and do the real
+mode check inside of there, that way we use whatever RNG is available,
+including darn on Power9.
+
+Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
+Cc: stable@vger.kernel.org # v4.1+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Tested-by: Sachin Sant <sachinp@linux.ibm.com>
+[mpe: Rebase on previous commit, update change log appropriately]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/archrandom.h |  5 ----
+ arch/powerpc/kvm/book3s_hv_builtin.c  |  7 +++---
+ arch/powerpc/platforms/powernv/rng.c  | 36 ++++++---------------------
+ 3 files changed, 12 insertions(+), 36 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
+index 9a53e29680f4..258174304904 100644
+--- a/arch/powerpc/include/asm/archrandom.h
++++ b/arch/powerpc/include/asm/archrandom.h
+@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+ #endif /* CONFIG_ARCH_RANDOM */
+ #ifdef CONFIG_PPC_POWERNV
+-int powernv_hwrng_present(void);
+ int powernv_get_random_long(unsigned long *v);
+-int powernv_get_random_real_mode(unsigned long *v);
+-#else
+-static inline int powernv_hwrng_present(void) { return 0; }
+-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
+ #endif
+ #endif /* _ASM_POWERPC_ARCHRANDOM_H */
+diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
+index 7e52d0beee77..5e4251b76e75 100644
+--- a/arch/powerpc/kvm/book3s_hv_builtin.c
++++ b/arch/powerpc/kvm/book3s_hv_builtin.c
+@@ -19,7 +19,7 @@
+ #include <asm/interrupt.h>
+ #include <asm/kvm_ppc.h>
+ #include <asm/kvm_book3s.h>
+-#include <asm/archrandom.h>
++#include <asm/machdep.h>
+ #include <asm/xics.h>
+ #include <asm/xive.h>
+ #include <asm/dbell.h>
+@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
+ int kvmppc_hwrng_present(void)
+ {
+-      return powernv_hwrng_present();
++      return ppc_md.get_random_seed != NULL;
+ }
+ EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
+ long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
+ {
+-      if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
++      if (ppc_md.get_random_seed &&
++          ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
+               return H_SUCCESS;
+       return H_HARDWARE;
+diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
+index 2287c9cd0cd5..d19305292e1e 100644
+--- a/arch/powerpc/platforms/powernv/rng.c
++++ b/arch/powerpc/platforms/powernv/rng.c
+@@ -29,15 +29,6 @@ struct powernv_rng {
+ static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
+-int powernv_hwrng_present(void)
+-{
+-      struct powernv_rng *rng;
+-
+-      rng = get_cpu_var(powernv_rng);
+-      put_cpu_var(rng);
+-      return rng != NULL;
+-}
+-
+ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
+ {
+       unsigned long parity;
+@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
+       return val;
+ }
+-int powernv_get_random_real_mode(unsigned long *v)
+-{
+-      struct powernv_rng *rng;
+-
+-      rng = raw_cpu_read(powernv_rng);
+-      if (!rng)
+-              return 0;
+-
+-      *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+-
+-      return 1;
+-}
+-
+ static int powernv_get_random_darn(unsigned long *v)
+ {
+       unsigned long val;
+@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
+ {
+       struct powernv_rng *rng;
+-      rng = get_cpu_var(powernv_rng);
+-
+-      *v = rng_whiten(rng, in_be64(rng->regs));
+-
+-      put_cpu_var(rng);
+-
++      if (mfmsr() & MSR_DR) {
++              rng = get_cpu_var(powernv_rng);
++              *v = rng_whiten(rng, in_be64(rng->regs));
++              put_cpu_var(rng);
++      } else {
++              rng = raw_cpu_read(powernv_rng);
++              *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
++      }
+       return 1;
+ }
+ EXPORT_SYMBOL_GPL(powernv_get_random_long);
+-- 
+2.35.1
+
diff --git a/queue-5.18/s390-unwind-fix-fgraph-return-address-recovery.patch b/queue-5.18/s390-unwind-fix-fgraph-return-address-recovery.patch
new file mode 100644 (file)
index 0000000..02c34a0
--- /dev/null
@@ -0,0 +1,46 @@
+From acaca81b30149dc942304fa4d9460deb6ab485ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 18:57:59 +0200
+Subject: s390/unwind: fix fgraph return address recovery
+
+From: Sumanth Korikkar <sumanthk@linux.ibm.com>
+
+[ Upstream commit ded466e1806686794b403ebf031133bbaca76bb2 ]
+
+When HAVE_FUNCTION_GRAPH_RET_ADDR_PTR is defined, the return
+address to the fgraph caller is recovered by tagging it along with the
+stack pointer of ftrace stack. This makes the stack unwinding more
+reliable.
+
+When the fgraph return address is modified to return_to_handler,
+ftrace_graph_ret_addr tries to restore it to the original
+value using tagged stack pointer.
+
+Fix this by passing tagged sp to ftrace_graph_ret_addr.
+
+Fixes: d81675b60d09 ("s390/unwind: recover kretprobe modified return address in stacktrace")
+Cc: <stable@vger.kernel.org> # 5.18
+Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/include/asm/unwind.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
+index 0bf06f1682d8..02462e7100c1 100644
+--- a/arch/s390/include/asm/unwind.h
++++ b/arch/s390/include/asm/unwind.h
+@@ -47,7 +47,7 @@ struct unwind_state {
+ static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
+                                                   unsigned long ip)
+ {
+-      ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
++      ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp);
+       if (is_kretprobe_trampoline(ip))
+               ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
+       return ip;
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-edif-fix-dropped-ike-message.patch b/queue-5.18/scsi-qla2xxx-edif-fix-dropped-ike-message.patch
new file mode 100644 (file)
index 0000000..d421910
--- /dev/null
@@ -0,0 +1,126 @@
+From 983ed3d61efd61bb2643bd8b27af58c45e0dfcce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:40 -0700
+Subject: scsi: qla2xxx: edif: Fix dropped IKE message
+
+From: Quinn Tran <qutran@marvell.com>
+
+[ Upstream commit c019cd656e717349ff22d0c41d6fbfc773f48c52 ]
+
+This patch fixes IKE message being dropped due to error in processing Purex
+IOCB and Continuation IOCBs.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-6-njavali@marvell.com
+Fixes: fac2807946c1 ("scsi: qla2xxx: edif: Add extraction of auth_els from the wire")
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_isr.c | 54 +++++++++++++++-------------------
+ 1 file changed, 24 insertions(+), 30 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index c509bb23af40..125b208cf118 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -3711,12 +3711,11 @@ void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *vha,
+  * Return: 0 all iocbs has arrived, xx- all iocbs have not arrived.
+  */
+ static int qla_chk_cont_iocb_avail(struct scsi_qla_host *vha,
+-      struct rsp_que *rsp, response_t *pkt)
++      struct rsp_que *rsp, response_t *pkt, u32 rsp_q_in)
+ {
+-      int start_pkt_ring_index, end_pkt_ring_index, n_ring_index;
+-      response_t *end_pkt;
++      int start_pkt_ring_index;
++      u32 iocb_cnt = 0;
+       int rc = 0;
+-      u32 rsp_q_in;
+       if (pkt->entry_count == 1)
+               return rc;
+@@ -3727,34 +3726,18 @@ static int qla_chk_cont_iocb_avail(struct scsi_qla_host *vha,
+       else
+               start_pkt_ring_index = rsp->ring_index - 1;
+-      if ((start_pkt_ring_index + pkt->entry_count) >= rsp->length)
+-              end_pkt_ring_index = start_pkt_ring_index + pkt->entry_count -
+-                      rsp->length - 1;
++      if (rsp_q_in < start_pkt_ring_index)
++              /* q in ptr is wrapped */
++              iocb_cnt = rsp->length - start_pkt_ring_index + rsp_q_in;
+       else
+-              end_pkt_ring_index = start_pkt_ring_index + pkt->entry_count - 1;
++              iocb_cnt = rsp_q_in - start_pkt_ring_index;
+-      end_pkt = rsp->ring + end_pkt_ring_index;
+-
+-      /*  next pkt = end_pkt + 1 */
+-      n_ring_index = end_pkt_ring_index + 1;
+-      if (n_ring_index >= rsp->length)
+-              n_ring_index = 0;
+-
+-      rsp_q_in = rsp->qpair->use_shadow_reg ? *rsp->in_ptr :
+-              rd_reg_dword(rsp->rsp_q_in);
+-
+-      /* rsp_q_in is either wrapped or pointing beyond endpkt */
+-      if ((rsp_q_in < start_pkt_ring_index && rsp_q_in < n_ring_index) ||
+-                      rsp_q_in >= n_ring_index)
+-              /* all IOCBs arrived. */
+-              rc = 0;
+-      else
++      if (iocb_cnt < pkt->entry_count)
+               rc = -EIO;
+-      ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x5091,
+-          "%s - ring %p pkt %p end pkt %p entry count %#x rsp_q_in %d rc %d\n",
+-          __func__, rsp->ring, pkt, end_pkt, pkt->entry_count,
+-          rsp_q_in, rc);
++      ql_dbg(ql_dbg_init, vha, 0x5091,
++             "%s - ring %p pkt %p entry count %d iocb_cnt %d rsp_q_in %d rc %d\n",
++             __func__, rsp->ring, pkt, pkt->entry_count, iocb_cnt, rsp_q_in, rc);
+       return rc;
+ }
+@@ -3771,7 +3754,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+       struct qla_hw_data *ha = vha->hw;
+       struct purex_entry_24xx *purex_entry;
+       struct purex_item *pure_item;
+-      u16 rsp_in = 0;
++      u16 rsp_in = 0, cur_ring_index;
+       int follow_inptr, is_shadow_hba;
+       if (!ha->flags.fw_started)
+@@ -3802,6 +3785,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+                      (!follow_inptr &&
+                       rsp->ring_ptr->signature != RESPONSE_PROCESSED)) {
+               pkt = (struct sts_entry_24xx *)rsp->ring_ptr;
++              cur_ring_index = rsp->ring_index;
+               rsp->ring_index++;
+               if (rsp->ring_index == rsp->length) {
+@@ -3922,7 +3906,17 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+                               break;
+                       case ELS_AUTH_ELS:
+-                              if (qla_chk_cont_iocb_avail(vha, rsp, (response_t *)pkt)) {
++                              if (qla_chk_cont_iocb_avail(vha, rsp, (response_t *)pkt, rsp_in)) {
++                                      /*
++                                       * ring_ptr and ring_index were
++                                       * pre-incremented above. Reset them
++                                       * back to current. Wait for next
++                                       * interrupt with all IOCBs to arrive
++                                       * and re-process.
++                                       */
++                                      rsp->ring_ptr = (response_t *)pkt;
++                                      rsp->ring_index = cur_ring_index;
++
+                                       ql_dbg(ql_dbg_init, vha, 0x5091,
+                                           "Defer processing ELS opcode %#x...\n",
+                                           purex_entry->els_frame_payload[3]);
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-fix-crash-due-to-stale-srb-access-aroun.patch b/queue-5.18/scsi-qla2xxx-fix-crash-due-to-stale-srb-access-aroun.patch
new file mode 100644 (file)
index 0000000..3238e59
--- /dev/null
@@ -0,0 +1,125 @@
+From ff24e11b6f46bae7af4d61b2021903bdc747c5ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:02 -0700
+Subject: scsi: qla2xxx: Fix crash due to stale SRB access around I/O timeouts
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit c39587bc0abaf16593f7abcdf8aeec3c038c7d52 ]
+
+Ensure SRB is returned during I/O timeout error escalation. If that is not
+possible fail the escalation path.
+
+Following crash stack was seen:
+
+BUG: unable to handle kernel paging request at 0000002f56aa90f8
+IP: qla_chk_edif_rx_sa_delete_pending+0x14/0x30 [qla2xxx]
+Call Trace:
+ ? qla2x00_status_entry+0x19f/0x1c50 [qla2xxx]
+ ? qla2x00_start_sp+0x116/0x1170 [qla2xxx]
+ ? dma_pool_alloc+0x1d6/0x210
+ ? mempool_alloc+0x54/0x130
+ ? qla24xx_process_response_queue+0x548/0x12b0 [qla2xxx]
+ ? qla_do_work+0x2d/0x40 [qla2xxx]
+ ? process_one_work+0x14c/0x390
+
+Link: https://lore.kernel.org/r/20220616053508.27186-6-njavali@marvell.com
+Fixes: d74595278f4a ("scsi: qla2xxx: Add multiple queue pair functionality.")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_os.c | 43 +++++++++++++++++++++++++----------
+ 1 file changed, 31 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+index daa9a3c3f7b5..f9ad0847782d 100644
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -1342,21 +1342,20 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
+ /*
+  * Returns: QLA_SUCCESS or QLA_FUNCTION_FAILED.
+  */
+-int
+-qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
+-      uint64_t l, enum nexus_wait_type type)
++static int
++__qla2x00_eh_wait_for_pending_commands(struct qla_qpair *qpair, unsigned int t,
++                                     uint64_t l, enum nexus_wait_type type)
+ {
+       int cnt, match, status;
+       unsigned long flags;
+-      struct qla_hw_data *ha = vha->hw;
+-      struct req_que *req;
++      scsi_qla_host_t *vha = qpair->vha;
++      struct req_que *req = qpair->req;
+       srb_t *sp;
+       struct scsi_cmnd *cmd;
+       status = QLA_SUCCESS;
+-      spin_lock_irqsave(&ha->hardware_lock, flags);
+-      req = vha->req;
++      spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+       for (cnt = 1; status == QLA_SUCCESS &&
+               cnt < req->num_outstanding_cmds; cnt++) {
+               sp = req->outstanding_cmds[cnt];
+@@ -1383,12 +1382,32 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
+               if (!match)
+                       continue;
+-              spin_unlock_irqrestore(&ha->hardware_lock, flags);
++              spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+               status = qla2x00_eh_wait_on_command(cmd);
+-              spin_lock_irqsave(&ha->hardware_lock, flags);
++              spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+       }
+-      spin_unlock_irqrestore(&ha->hardware_lock, flags);
++      spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
++
++      return status;
++}
++
++int
++qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
++                                   uint64_t l, enum nexus_wait_type type)
++{
++      struct qla_qpair *qpair;
++      struct qla_hw_data *ha = vha->hw;
++      int i, status = QLA_SUCCESS;
++      status = __qla2x00_eh_wait_for_pending_commands(ha->base_qpair, t, l,
++                                                      type);
++      for (i = 0; status == QLA_SUCCESS && i < ha->max_qpairs; i++) {
++              qpair = ha->queue_pair_map[i];
++              if (!qpair)
++                      continue;
++              status = __qla2x00_eh_wait_for_pending_commands(qpair, t, l,
++                                                              type);
++      }
+       return status;
+ }
+@@ -1425,7 +1444,7 @@ qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
+               return err;
+       if (fcport->deleted)
+-              return SUCCESS;
++              return FAILED;
+       ql_log(ql_log_info, vha, 0x8009,
+           "DEVICE RESET ISSUED nexus=%ld:%d:%llu cmd=%p.\n", vha->host_no,
+@@ -1493,7 +1512,7 @@ qla2xxx_eh_target_reset(struct scsi_cmnd *cmd)
+               return err;
+       if (fcport->deleted)
+-              return SUCCESS;
++              return FAILED;
+       ql_log(ql_log_info, vha, 0x8009,
+           "TARGET RESET ISSUED nexus=%ld:%d cmd=%p.\n", vha->host_no,
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch-4818 b/queue-5.18/scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch-4818
new file mode 100644 (file)
index 0000000..323af66
--- /dev/null
@@ -0,0 +1,116 @@
+From ada2019561e89a831747aab73d489754049451a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:42 -0700
+Subject: scsi: qla2xxx: Fix discovery issues in FC-AL topology
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit 47ccb113cead905bdc236571bf8ac6fed90321b3 ]
+
+A direct attach tape device, when gets swapped with another, was not
+discovered. Fix this by looking at loop map and reinitialize link if there
+are devices present.
+
+Link: https://lore.kernel.org/linux-scsi/baef87c3-5dad-3b47-44c1-6914bfc90108@cybernetics.com/
+Link: https://lore.kernel.org/r/20220713052045.10683-8-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Reported-by: Tony Battersby <tonyb@cybernetics.com>
+Tested-by: Tony Battersby <tonyb@cybernetics.com>
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_gbl.h  |  3 ++-
+ drivers/scsi/qla2xxx/qla_init.c | 29 +++++++++++++++++++++++++++++
+ drivers/scsi/qla2xxx/qla_mbx.c  |  5 ++++-
+ 3 files changed, 35 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
+index 20ae0ef7d078..331b33200f50 100644
+--- a/drivers/scsi/qla2xxx/qla_gbl.h
++++ b/drivers/scsi/qla2xxx/qla_gbl.h
+@@ -436,7 +436,8 @@ extern int
+ qla2x00_get_resource_cnts(scsi_qla_host_t *);
+ extern int
+-qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map);
++qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map,
++              u8 *num_entries);
+ extern int
+ qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *,
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index 01c7eda51d5a..51503a316b10 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -5516,6 +5516,22 @@ static int qla2x00_configure_n2n_loop(scsi_qla_host_t *vha)
+       return QLA_FUNCTION_FAILED;
+ }
++static void
++qla_reinitialize_link(scsi_qla_host_t *vha)
++{
++      int rval;
++
++      atomic_set(&vha->loop_state, LOOP_DOWN);
++      atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
++      rval = qla2x00_full_login_lip(vha);
++      if (rval == QLA_SUCCESS) {
++              ql_dbg(ql_dbg_disc, vha, 0xd050, "Link reinitialized\n");
++      } else {
++              ql_dbg(ql_dbg_disc, vha, 0xd051,
++                      "Link reinitialization failed (%d)\n", rval);
++      }
++}
++
+ /*
+  * qla2x00_configure_local_loop
+  *    Updates Fibre Channel Device Database with local loop devices.
+@@ -5567,6 +5583,19 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
+               spin_unlock_irqrestore(&vha->work_lock, flags);
+               if (vha->scan.scan_retry < MAX_SCAN_RETRIES) {
++                      u8 loop_map_entries = 0;
++                      int rc;
++
++                      rc = qla2x00_get_fcal_position_map(vha, NULL,
++                                              &loop_map_entries);
++                      if (rc == QLA_SUCCESS && loop_map_entries > 1) {
++                              /*
++                               * There are devices that are still not logged
++                               * in. Reinitialize to give them a chance.
++                               */
++                              qla_reinitialize_link(vha);
++                              return QLA_FUNCTION_FAILED;
++                      }
+                       set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
+                       set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
+               }
+diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
+index bcade1deb798..86d8c455c07a 100644
+--- a/drivers/scsi/qla2xxx/qla_mbx.c
++++ b/drivers/scsi/qla2xxx/qla_mbx.c
+@@ -3068,7 +3068,8 @@ qla2x00_get_resource_cnts(scsi_qla_host_t *vha)
+  *    Kernel context.
+  */
+ int
+-qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map)
++qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map,
++              u8 *num_entries)
+ {
+       int rval;
+       mbx_cmd_t mc;
+@@ -3108,6 +3109,8 @@ qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map)
+               if (pos_map)
+                       memcpy(pos_map, pmap, FCAL_MAP_SIZE);
++              if (num_entries)
++                      *num_entries = pmap[0];
+       }
+       dma_pool_free(ha->s_dma_pool, pmap, pmap_dma);
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci.patch b/queue-5.18/scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci.patch
new file mode 100644 (file)
index 0000000..21005b3
--- /dev/null
@@ -0,0 +1,67 @@
+From ed1aa089d6371962ed13e3fe349ef0d02f660393 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:07 -0700
+Subject: scsi: qla2xxx: Fix erroneous mailbox timeout after PCI error
+ injection
+
+From: Quinn Tran <qutran@marvell.com>
+
+[ Upstream commit f260694e6463b63ae550aad25ddefe94cb1904da ]
+
+Clear wait for mailbox interrupt flag to prevent stale mailbox:
+
+Feb 22 05:22:56 ltcden4-lp7 kernel: qla2xxx [0135:90:00.1]-500a:4: LOOP UP detected (16 Gbps).
+Feb 22 05:22:59 ltcden4-lp7 kernel: qla2xxx [0135:90:00.1]-d04c:4: MBX Command timeout for cmd 69, ...
+
+To fix the issue, driver needs to clear the MBX_INTR_WAIT flag on purging
+the mailbox. When the stale mailbox completion does arrive, it will be
+dropped.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-11-njavali@marvell.com
+Fixes: b6faaaf796d7 ("scsi: qla2xxx: Serialize mailbox request")
+Cc: Naresh Bannoth <nbannoth@in.ibm.com>
+Cc: Kyle Mahlkuch <Kyle.Mahlkuch@ibm.com>
+Cc: stable@vger.kernel.org
+Reported-by: Naresh Bannoth <nbannoth@in.ibm.com>
+Tested-by: Naresh Bannoth <nbannoth@in.ibm.com>
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_mbx.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
+index 15d8866046b3..bcade1deb798 100644
+--- a/drivers/scsi/qla2xxx/qla_mbx.c
++++ b/drivers/scsi/qla2xxx/qla_mbx.c
+@@ -276,6 +276,12 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
+               atomic_inc(&ha->num_pend_mbx_stage3);
+               if (!wait_for_completion_timeout(&ha->mbx_intr_comp,
+                   mcp->tov * HZ)) {
++                      ql_dbg(ql_dbg_mbx, vha, 0x117a,
++                          "cmd=%x Timeout.\n", command);
++                      spin_lock_irqsave(&ha->hardware_lock, flags);
++                      clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
++                      spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
+                       if (chip_reset != ha->chip_reset) {
+                               eeh_delay = ha->flags.eeh_busy ? 1 : 0;
+@@ -288,12 +294,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
+                               rval = QLA_ABORTED;
+                               goto premature_exit;
+                       }
+-                      ql_dbg(ql_dbg_mbx, vha, 0x117a,
+-                          "cmd=%x Timeout.\n", command);
+-                      spin_lock_irqsave(&ha->hardware_lock, flags);
+-                      clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
+-                      spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-
+               } else if (ha->flags.purge_mbox ||
+                   chip_reset != ha->chip_reset) {
+                       eeh_delay = ha->flags.eeh_busy ? 1 : 0;
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-fix-excessive-i-o-error-messages-by-def.patch b/queue-5.18/scsi-qla2xxx-fix-excessive-i-o-error-messages-by-def.patch
new file mode 100644 (file)
index 0000000..3d45b9e
--- /dev/null
@@ -0,0 +1,48 @@
+From 1537088b80ff6a934403b341ee3ffb445867f3d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:34:58 -0700
+Subject: scsi: qla2xxx: Fix excessive I/O error messages by default
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit bff4873c709085e09d0ffae0c25b8e65256e3205 ]
+
+Disable printing I/O error messages by default.  The messages will be
+printed only when logging was enabled.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-2-njavali@marvell.com
+Fixes: 8e2d81c6b5be ("scsi: qla2xxx: Fix excessive messages during device logout")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_isr.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index ad55eace66aa..5e99f559230f 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -2637,7 +2637,7 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
+       }
+       if (unlikely(logit))
+-              ql_log(ql_dbg_io, fcport->vha, 0x5060,
++              ql_dbg(ql_dbg_io, fcport->vha, 0x5060,
+                  "NVME-%s ERR Handling - hdl=%x status(%x) tr_len:%x resid=%x  ox_id=%x\n",
+                  sp->name, sp->handle, comp_status,
+                  fd->transferred_length, le32_to_cpu(sts->residual_len),
+@@ -3495,7 +3495,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
+ out:
+       if (logit)
+-              ql_log(ql_dbg_io, fcport->vha, 0x3022,
++              ql_dbg(ql_dbg_io, fcport->vha, 0x3022,
+                      "FCP command status: 0x%x-0x%x (0x%x) nexus=%ld:%d:%llu portid=%02x%02x%02x oxid=0x%x cdb=%10phN len=0x%x rsp_info=0x%x resid=0x%x fw_resid=0x%x sp=%p cp=%p.\n",
+                      comp_status, scsi_status, res, vha->host_no,
+                      cp->device->id, cp->device->lun, fcport->d_id.b.domain,
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-fix-imbalance-vha-vref_count.patch-27970 b/queue-5.18/scsi-qla2xxx-fix-imbalance-vha-vref_count.patch-27970
new file mode 100644 (file)
index 0000000..0b1d6e1
--- /dev/null
@@ -0,0 +1,61 @@
+From 10606f5e0ba0d8d8210cc69be72aa4035377a56d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:41 -0700
+Subject: scsi: qla2xxx: Fix imbalance vha->vref_count
+
+From: Quinn Tran <qutran@marvell.com>
+
+[ Upstream commit 63fa7f2644b4b48e1913af33092c044bf48e9321 ]
+
+vref_count took an extra decrement in the task management path.  Add an
+extra ref count to compensate the imbalance.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-7-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_init.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index 3df66934fb1e..01c7eda51d5a 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -168,6 +168,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
+       struct srb_iocb *abt_iocb;
+       srb_t *sp;
+       int rval = QLA_FUNCTION_FAILED;
++      uint8_t bail;
+       /* ref: INIT for ABTS command */
+       sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport,
+@@ -175,6 +176,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
+       if (!sp)
+               return QLA_MEMORY_ALLOC_FAILED;
++      QLA_VHA_MARK_BUSY(vha, bail);
+       abt_iocb = &sp->u.iocb_cmd;
+       sp->type = SRB_ABT_CMD;
+       sp->name = "abort";
+@@ -2018,12 +2020,14 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
+       struct srb_iocb *tm_iocb;
+       srb_t *sp;
+       int rval = QLA_FUNCTION_FAILED;
++      uint8_t bail;
+       /* ref: INIT */
+       sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+       if (!sp)
+               goto done;
++      QLA_VHA_MARK_BUSY(vha, bail);
+       sp->type = SRB_TM_CMD;
+       sp->name = "tmf";
+       qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha),
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-pe.patch b/queue-5.18/scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-pe.patch
new file mode 100644 (file)
index 0000000..1bb7597
--- /dev/null
@@ -0,0 +1,41 @@
+From eea51e30df139a84ea79448c0a19833387a8bb12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:03 -0700
+Subject: scsi: qla2xxx: Fix losing FCP-2 targets during port perturbation
+ tests
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit 58d1c124cd79ea686b512043c5bd515590b2ed95 ]
+
+When a mix of FCP-2 (tape) and non-FCP-2 targets are present, FCP-2 target
+state was incorrectly transitioned when both of the targets were gone. Fix
+this by ignoring state transition for FCP-2 targets.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-7-njavali@marvell.com
+Fixes: 44c57f205876 ("scsi: qla2xxx: Changes to support FCP2 Target")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_gs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
+index c914b5df9c12..7ca734337000 100644
+--- a/drivers/scsi/qla2xxx/qla_gs.c
++++ b/drivers/scsi/qla2xxx/qla_gs.c
+@@ -3629,7 +3629,7 @@ void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp)
+                               do_delete) {
+                               if (fcport->loop_id != FC_NO_LOOP_ID) {
+                                       if (fcport->flags & FCF_FCP2_DEVICE)
+-                                              fcport->logout_on_delete = 0;
++                                              continue;
+                                       ql_log(ql_log_warn, vha, 0x20f0,
+                                              "%s %d %8phC post del sess\n",
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-d.patch b/queue-5.18/scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-d.patch
new file mode 100644 (file)
index 0000000..86c4dce
--- /dev/null
@@ -0,0 +1,72 @@
+From cd632680dceb7e22c58ec69f997ffaaf7c4c0dac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:06 -0700
+Subject: scsi: qla2xxx: Fix losing FCP-2 targets on long port disable with
+ I/Os
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit 2416ccd3815ba1613e10a6da0a24ef21acfe5633 ]
+
+FCP-2 devices were not coming back online once they were lost, login
+retries exhausted, and then came back up.  Fix this by accepting RSCN when
+the device is not online.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-10-njavali@marvell.com
+Fixes: 44c57f205876 ("scsi: qla2xxx: Changes to support FCP2 Target")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_init.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index a0d26e2e0ce1..3df66934fb1e 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -1832,7 +1832,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
+       case RSCN_PORT_ADDR:
+               fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
+               if (fcport) {
+-                      if (fcport->flags & FCF_FCP2_DEVICE) {
++                      if (fcport->flags & FCF_FCP2_DEVICE &&
++                          atomic_read(&fcport->state) == FCS_ONLINE) {
+                               ql_dbg(ql_dbg_disc, vha, 0x2115,
+                                      "Delaying session delete for FCP2 portid=%06x %8phC ",
+                                       fcport->d_id.b24, fcport->port_name);
+@@ -1864,7 +1865,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
+               break;
+       case RSCN_AREA_ADDR:
+               list_for_each_entry(fcport, &vha->vp_fcports, list) {
+-                      if (fcport->flags & FCF_FCP2_DEVICE)
++                      if (fcport->flags & FCF_FCP2_DEVICE &&
++                          atomic_read(&fcport->state) == FCS_ONLINE)
+                               continue;
+                       if ((ea->id.b24 & 0xffff00) == (fcport->d_id.b24 & 0xffff00)) {
+@@ -1875,7 +1877,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
+               break;
+       case RSCN_DOM_ADDR:
+               list_for_each_entry(fcport, &vha->vp_fcports, list) {
+-                      if (fcport->flags & FCF_FCP2_DEVICE)
++                      if (fcport->flags & FCF_FCP2_DEVICE &&
++                          atomic_read(&fcport->state) == FCS_ONLINE)
+                               continue;
+                       if ((ea->id.b24 & 0xff0000) == (fcport->d_id.b24 & 0xff0000)) {
+@@ -1887,7 +1890,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
+       case RSCN_FAB_ADDR:
+       default:
+               list_for_each_entry(fcport, &vha->vp_fcports, list) {
+-                      if (fcport->flags & FCF_FCP2_DEVICE)
++                      if (fcport->flags & FCF_FCP2_DEVICE &&
++                          atomic_read(&fcport->state) == FCS_ONLINE)
+                               continue;
+                       fcport->scan_needed = 1;
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-fix-losing-target-when-it-reappears-dur.patch b/queue-5.18/scsi-qla2xxx-fix-losing-target-when-it-reappears-dur.patch
new file mode 100644 (file)
index 0000000..0fe225a
--- /dev/null
@@ -0,0 +1,84 @@
+From a9228406b04cbe27522f81b77535befe4c5e7924 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:04 -0700
+Subject: scsi: qla2xxx: Fix losing target when it reappears during delete
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit 118b0c863c8f5629cc5271fc24d72d926e0715d9 ]
+
+FC target disappeared during port perturbation tests due to a race that
+tramples target state.  Fix the issue by adding state checks before
+proceeding.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-8-njavali@marvell.com
+Fixes: 44c57f205876 ("scsi: qla2xxx: Changes to support FCP2 Target")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_attr.c | 24 +++++++++++++++++-------
+ 1 file changed, 17 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
+index 3b3e4234f37a..412ad888bdc1 100644
+--- a/drivers/scsi/qla2xxx/qla_attr.c
++++ b/drivers/scsi/qla2xxx/qla_attr.c
+@@ -2716,17 +2716,24 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
+       if (!fcport)
+               return;
+-      /* Now that the rport has been deleted, set the fcport state to
+-         FCS_DEVICE_DEAD */
+-      qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
++
++      /*
++       * Now that the rport has been deleted, set the fcport state to
++       * FCS_DEVICE_DEAD, if the fcport is still lost.
++       */
++      if (fcport->scan_state != QLA_FCPORT_FOUND)
++              qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
+       /*
+        * Transport has effectively 'deleted' the rport, clear
+        * all local references.
+        */
+       spin_lock_irqsave(host->host_lock, flags);
+-      fcport->rport = fcport->drport = NULL;
+-      *((fc_port_t **)rport->dd_data) = NULL;
++      /* Confirm port has not reappeared before clearing pointers. */
++      if (rport->port_state != FC_PORTSTATE_ONLINE) {
++              fcport->rport = fcport->drport = NULL;
++              *((fc_port_t **)rport->dd_data) = NULL;
++      }
+       spin_unlock_irqrestore(host->host_lock, flags);
+       if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
+@@ -2759,9 +2766,12 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
+       /*
+        * At this point all fcport's software-states are cleared.  Perform any
+        * final cleanup of firmware resources (PCBs and XCBs).
++       *
++       * Attempt to cleanup only lost devices.
+        */
+       if (fcport->loop_id != FC_NO_LOOP_ID) {
+-              if (IS_FWI2_CAPABLE(fcport->vha->hw)) {
++              if (IS_FWI2_CAPABLE(fcport->vha->hw) &&
++                  fcport->scan_state != QLA_FCPORT_FOUND) {
+                       if (fcport->loop_id != FC_NO_LOOP_ID)
+                               fcport->logout_on_delete = 1;
+@@ -2771,7 +2781,7 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
+                                      __LINE__);
+                               qlt_schedule_sess_for_deletion(fcport);
+                       }
+-              } else {
++              } else if (!IS_FWI2_CAPABLE(fcport->vha->hw)) {
+                       qla2x00_port_logout(fcport->vha, fcport);
+               }
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-fix-response-queue-handler-reading-stal.patch b/queue-5.18/scsi-qla2xxx-fix-response-queue-handler-reading-stal.patch
new file mode 100644 (file)
index 0000000..08cf6a6
--- /dev/null
@@ -0,0 +1,128 @@
+From ac1c86d5f6f0826e7897d9395a1c6abba08a0f8d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:39 -0700
+Subject: scsi: qla2xxx: Fix response queue handler reading stale packets
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit b1f707146923335849fb70237eec27d4d1ae7d62 ]
+
+On some platforms, the current logic of relying on finding new packet
+solely based on signature pattern can lead to driver reading stale
+packets. Though this is a bug in those platforms, reduce such exposures by
+limiting reading packets until the IN pointer.
+
+Two module parameters are introduced:
+
+  ql2xrspq_follow_inptr:
+
+    When set, on newer adapters that has queue pointer shadowing, look for
+    response packets only until response queue in pointer.
+
+    When reset, response packets are read based on a signature pattern
+    logic (old way).
+
+  ql2xrspq_follow_inptr_legacy:
+
+    Like ql2xrspq_follow_inptr, but for those adapters where there is no
+    queue pointer shadowing.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-5-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_gbl.h |  2 ++
+ drivers/scsi/qla2xxx/qla_isr.c | 24 +++++++++++++++++++++++-
+ drivers/scsi/qla2xxx/qla_os.c  | 10 ++++++++++
+ 3 files changed, 35 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
+index 84b44454c231..20ae0ef7d078 100644
+--- a/drivers/scsi/qla2xxx/qla_gbl.h
++++ b/drivers/scsi/qla2xxx/qla_gbl.h
+@@ -193,6 +193,8 @@ extern int ql2xsecenable;
+ extern int ql2xenforce_iocb_limit;
+ extern int ql2xabts_wait_nvme;
+ extern u32 ql2xnvme_queues;
++extern int ql2xrspq_follow_inptr;
++extern int ql2xrspq_follow_inptr_legacy;
+ extern int qla2x00_loop_reset(scsi_qla_host_t *);
+ extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index de348628aa53..c509bb23af40 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -3771,6 +3771,8 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+       struct qla_hw_data *ha = vha->hw;
+       struct purex_entry_24xx *purex_entry;
+       struct purex_item *pure_item;
++      u16 rsp_in = 0;
++      int follow_inptr, is_shadow_hba;
+       if (!ha->flags.fw_started)
+               return;
+@@ -3780,7 +3782,25 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+               qla_cpu_update(rsp->qpair, smp_processor_id());
+       }
+-      while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
++#define __update_rsp_in(_update, _is_shadow_hba, _rsp, _rsp_in)               \
++      do {                                                            \
++              if (_update) {                                          \
++                      _rsp_in = _is_shadow_hba ? *(_rsp)->in_ptr :    \
++                              rd_reg_dword_relaxed((_rsp)->rsp_q_in); \
++              }                                                       \
++      } while (0)
++
++      is_shadow_hba = IS_SHADOW_REG_CAPABLE(ha);
++      follow_inptr = is_shadow_hba ? ql2xrspq_follow_inptr :
++                              ql2xrspq_follow_inptr_legacy;
++
++      __update_rsp_in(follow_inptr, is_shadow_hba, rsp, rsp_in);
++
++      while ((likely(follow_inptr &&
++                     rsp->ring_index != rsp_in &&
++                     rsp->ring_ptr->signature != RESPONSE_PROCESSED)) ||
++                     (!follow_inptr &&
++                      rsp->ring_ptr->signature != RESPONSE_PROCESSED)) {
+               pkt = (struct sts_entry_24xx *)rsp->ring_ptr;
+               rsp->ring_index++;
+@@ -3893,6 +3913,8 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+                               }
+                               pure_item = qla27xx_copy_fpin_pkt(vha,
+                                                         (void **)&pkt, &rsp);
++                              __update_rsp_in(follow_inptr, is_shadow_hba,
++                                              rsp, rsp_in);
+                               if (!pure_item)
+                                       break;
+                               qla24xx_queue_purex_item(vha, pure_item,
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+index f9ad0847782d..3bbfce3ccf2e 100644
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -338,6 +338,16 @@ module_param(ql2xdelay_before_pci_error_handling, uint, 0644);
+ MODULE_PARM_DESC(ql2xdelay_before_pci_error_handling,
+       "Number of seconds delayed before qla begin PCI error self-handling (default: 5).\n");
++int ql2xrspq_follow_inptr = 1;
++module_param(ql2xrspq_follow_inptr, int, 0644);
++MODULE_PARM_DESC(ql2xrspq_follow_inptr,
++               "Follow RSP IN pointer for RSP updates for HBAs 27xx and newer (default: 1).");
++
++int ql2xrspq_follow_inptr_legacy = 1;
++module_param(ql2xrspq_follow_inptr_legacy, int, 0644);
++MODULE_PARM_DESC(ql2xrspq_follow_inptr_legacy,
++               "Follow RSP IN pointer for RSP updates for HBAs older than 27XX. (default: 1).");
++
+ static void qla2x00_clear_drv_active(struct qla_hw_data *);
+ static void qla2x00_free_device(scsi_qla_host_t *);
+ static int qla2xxx_map_queues(struct Scsi_Host *shost);
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch-18430 b/queue-5.18/scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch-18430
new file mode 100644 (file)
index 0000000..710fef0
--- /dev/null
@@ -0,0 +1,68 @@
+From 6467d2dbfbfe7cc5abe98d997e428cbfb58354d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:01 -0700
+Subject: scsi: qla2xxx: Turn off multi-queue for 8G adapters
+
+From: Quinn Tran <qutran@marvell.com>
+
+[ Upstream commit 5304673bdb1635e27555bd636fd5d6956f1cd552 ]
+
+For 8G adapters, multi-queue was enabled accidentally. Make sure
+multi-queue is not enabled.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-5-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_def.h |  4 ++--
+ drivers/scsi/qla2xxx/qla_isr.c | 16 ++++++----------
+ 2 files changed, 8 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
+index 4cbaea4b993e..01cdd5f8723c 100644
+--- a/drivers/scsi/qla2xxx/qla_def.h
++++ b/drivers/scsi/qla2xxx/qla_def.h
+@@ -4268,8 +4268,8 @@ struct qla_hw_data {
+ #define IS_OEM_001(ha)          ((ha)->device_type & DT_OEM_001)
+ #define HAS_EXTENDED_IDS(ha)    ((ha)->device_type & DT_EXTENDED_IDS)
+ #define IS_CT6_SUPPORTED(ha)  ((ha)->device_type & DT_CT6_SUPPORTED)
+-#define IS_MQUE_CAPABLE(ha)   ((ha)->mqenable || IS_QLA83XX(ha) || \
+-                              IS_QLA27XX(ha) || IS_QLA28XX(ha))
++#define IS_MQUE_CAPABLE(ha)   (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \
++                               IS_QLA28XX(ha))
+ #define IS_BIDI_CAPABLE(ha) \
+     (IS_QLA25XX(ha) || IS_QLA2031(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
+ /* Bit 21 of fw_attributes decides the MCTP capabilities */
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index 5e99f559230f..de348628aa53 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -4419,16 +4419,12 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
+       }
+       /* Enable MSI-X vector for response queue update for queue 0 */
+-      if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
+-              if (ha->msixbase && ha->mqiobase &&
+-                  (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 ||
+-                   ql2xmqsupport))
+-                      ha->mqenable = 1;
+-      } else
+-              if (ha->mqiobase &&
+-                  (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 ||
+-                   ql2xmqsupport))
+-                      ha->mqenable = 1;
++      if (IS_MQUE_CAPABLE(ha) &&
++          (ha->msixbase && ha->mqiobase && ha->max_qpairs))
++              ha->mqenable = 1;
++      else
++              ha->mqenable = 0;
++
+       ql_dbg(ql_dbg_multiq, vha, 0xc005,
+           "mqiobase=%p, max_rsp_queues=%d, max_req_queues=%d.\n",
+           ha->mqiobase, ha->max_rsp_queues, ha->max_req_queues);
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-update-manufacturer-details.patch b/queue-5.18/scsi-qla2xxx-update-manufacturer-details.patch
new file mode 100644 (file)
index 0000000..74b239d
--- /dev/null
@@ -0,0 +1,52 @@
+From fc0719b2de5e78eb7fd2b53cd1e1f8a06e81653f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:44 -0700
+Subject: scsi: qla2xxx: Update manufacturer details
+
+From: Bikash Hazarika <bhazarika@marvell.com>
+
+[ Upstream commit 1ccad27716ecad1fd58c35e579bedb81fa5e1ad5 ]
+
+Update manufacturer details to indicate Marvell Semiconductors.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-10-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Bikash Hazarika <bhazarika@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_def.h | 2 +-
+ drivers/scsi/qla2xxx/qla_gs.c  | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
+index 01cdd5f8723c..17b8a4e86ffb 100644
+--- a/drivers/scsi/qla2xxx/qla_def.h
++++ b/drivers/scsi/qla2xxx/qla_def.h
+@@ -78,7 +78,7 @@ typedef union {
+ #include "qla_nvme.h"
+ #define QLA2XXX_DRIVER_NAME   "qla2xxx"
+ #define QLA2XXX_APIDEV                "ql2xapidev"
+-#define QLA2XXX_MANUFACTURER  "QLogic Corporation"
++#define QLA2XXX_MANUFACTURER  "Marvell Semiconductor, Inc."
+ /*
+  * We have MAILBOX_REGISTER_COUNT sized arrays in a few places,
+diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
+index 7ca734337000..64ab070b8716 100644
+--- a/drivers/scsi/qla2xxx/qla_gs.c
++++ b/drivers/scsi/qla2xxx/qla_gs.c
+@@ -1616,7 +1616,7 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries,
+       eiter->type = cpu_to_be16(FDMI_HBA_MANUFACTURER);
+       alen = scnprintf(
+               eiter->a.manufacturer, sizeof(eiter->a.manufacturer),
+-              "%s", "QLogic Corporation");
++              "%s", QLA2XXX_MANUFACTURER);
+       alen += FDMI_ATTR_ALIGNMENT(alen);
+       alen += FDMI_ATTR_TYPELEN(eiter);
+       eiter->len = cpu_to_be16(alen);
+-- 
+2.35.1
+
diff --git a/queue-5.18/scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch-27996 b/queue-5.18/scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch-27996
new file mode 100644 (file)
index 0000000..978938a
--- /dev/null
@@ -0,0 +1,210 @@
+From 7d35e2215d13472f85fd7000bc0f76847bc4d08e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:00 -0700
+Subject: scsi: qla2xxx: Wind down adapter after PCIe error
+
+From: Quinn Tran <qutran@marvell.com>
+
+[ Upstream commit d3117c83ba316b3200d9f2fe900f2b9a5525a25c ]
+
+Put adapter into a wind down state if OS does not make any attempt to
+recover the adapter after PCIe error.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-4-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_bsg.c  | 10 ++++++-
+ drivers/scsi/qla2xxx/qla_def.h  |  4 +++
+ drivers/scsi/qla2xxx/qla_init.c | 20 ++++++++++++++
+ drivers/scsi/qla2xxx/qla_os.c   | 48 +++++++++++++++++++++++++++++++++
+ 4 files changed, 81 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
+index c2f00f076f79..726af9e40572 100644
+--- a/drivers/scsi/qla2xxx/qla_bsg.c
++++ b/drivers/scsi/qla2xxx/qla_bsg.c
+@@ -2975,6 +2975,13 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
+       ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n",
+           __func__, bsg_job);
++
++      if (qla2x00_isp_reg_stat(ha)) {
++              ql_log(ql_log_info, vha, 0x9007,
++                  "PCI/Register disconnect.\n");
++              qla_pci_set_eeh_busy(vha);
++      }
++
+       /* find the bsg job from the active list of commands */
+       spin_lock_irqsave(&ha->hardware_lock, flags);
+       for (que = 0; que < ha->max_req_queues; que++) {
+@@ -2992,7 +2999,8 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
+                           sp->u.bsg_job == bsg_job) {
+                               req->outstanding_cmds[cnt] = NULL;
+                               spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-                              if (ha->isp_ops->abort_command(sp)) {
++
++                              if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) {
+                                       ql_log(ql_log_warn, vha, 0x7089,
+                                           "mbx abort_command failed.\n");
+                                       bsg_reply->result = -EIO;
+diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
+index 4062d46f33a6..4cbaea4b993e 100644
+--- a/drivers/scsi/qla2xxx/qla_def.h
++++ b/drivers/scsi/qla2xxx/qla_def.h
+@@ -4048,6 +4048,9 @@ struct qla_hw_data {
+               uint32_t        n2n_fw_acc_sec:1;
+               uint32_t        plogi_template_valid:1;
+               uint32_t        port_isolated:1;
++              uint32_t        eeh_flush:2;
++#define EEH_FLUSH_RDY  1
++#define EEH_FLUSH_DONE 2
+       } flags;
+       uint16_t max_exchg;
+@@ -4082,6 +4085,7 @@ struct qla_hw_data {
+       uint32_t                rsp_que_len;
+       uint32_t                req_que_off;
+       uint32_t                rsp_que_off;
++      unsigned long           eeh_jif;
+       /* Multi queue data structs */
+       device_reg_t *mqiobase;
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index 7bd10b4ed9ed..a0d26e2e0ce1 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -47,6 +47,7 @@ qla2x00_sp_timeout(struct timer_list *t)
+ {
+       srb_t *sp = from_timer(sp, t, u.iocb_cmd.timer);
+       struct srb_iocb *iocb;
++      scsi_qla_host_t *vha = sp->vha;
+       WARN_ON(irqs_disabled());
+       iocb = &sp->u.iocb_cmd;
+@@ -54,6 +55,12 @@ qla2x00_sp_timeout(struct timer_list *t)
+       /* ref: TMR */
+       kref_put(&sp->cmd_kref, qla2x00_sp_release);
++
++      if (vha && qla2x00_isp_reg_stat(vha->hw)) {
++              ql_log(ql_log_info, vha, 0x9008,
++                  "PCI/Register disconnect.\n");
++              qla_pci_set_eeh_busy(vha);
++      }
+ }
+ void qla2x00_sp_free(srb_t *sp)
+@@ -9669,6 +9676,12 @@ int qla2xxx_disable_port(struct Scsi_Host *host)
+       vha->hw->flags.port_isolated = 1;
++      if (qla2x00_isp_reg_stat(vha->hw)) {
++              ql_log(ql_log_info, vha, 0x9006,
++                  "PCI/Register disconnect, exiting.\n");
++              qla_pci_set_eeh_busy(vha);
++              return FAILED;
++      }
+       if (qla2x00_chip_is_down(vha))
+               return 0;
+@@ -9684,6 +9697,13 @@ int qla2xxx_enable_port(struct Scsi_Host *host)
+ {
+       scsi_qla_host_t *vha = shost_priv(host);
++      if (qla2x00_isp_reg_stat(vha->hw)) {
++              ql_log(ql_log_info, vha, 0x9001,
++                  "PCI/Register disconnect, exiting.\n");
++              qla_pci_set_eeh_busy(vha);
++              return FAILED;
++      }
++
+       vha->hw->flags.port_isolated = 0;
+       /* Set the flag to 1, so that isp_abort can proceed */
+       vha->flags.online = 1;
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+index 3c68dad00d04..daa9a3c3f7b5 100644
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -333,6 +333,11 @@ MODULE_PARM_DESC(ql2xabts_wait_nvme,
+                "To wait for ABTS response on I/O timeouts for NVMe. (default: 1)");
++u32 ql2xdelay_before_pci_error_handling = 5;
++module_param(ql2xdelay_before_pci_error_handling, uint, 0644);
++MODULE_PARM_DESC(ql2xdelay_before_pci_error_handling,
++      "Number of seconds delayed before qla begin PCI error self-handling (default: 5).\n");
++
+ static void qla2x00_clear_drv_active(struct qla_hw_data *);
+ static void qla2x00_free_device(scsi_qla_host_t *);
+ static int qla2xxx_map_queues(struct Scsi_Host *shost);
+@@ -7239,6 +7244,44 @@ static void qla_heart_beat(struct scsi_qla_host *vha, u16 dpc_started)
+       }
+ }
++static void qla_wind_down_chip(scsi_qla_host_t *vha)
++{
++      struct qla_hw_data *ha = vha->hw;
++
++      if (!ha->flags.eeh_busy)
++              return;
++      if (ha->pci_error_state)
++              /* system is trying to recover */
++              return;
++
++      /*
++       * Current system is not handling PCIE error.  At this point, this is
++       * best effort to wind down the adapter.
++       */
++      if (time_after_eq(jiffies, ha->eeh_jif + ql2xdelay_before_pci_error_handling * HZ) &&
++          !ha->flags.eeh_flush) {
++              ql_log(ql_log_info, vha, 0x9009,
++                  "PCI Error detected, attempting to reset hardware.\n");
++
++              ha->isp_ops->reset_chip(vha);
++              ha->isp_ops->disable_intrs(ha);
++
++              ha->flags.eeh_flush = EEH_FLUSH_RDY;
++              ha->eeh_jif = jiffies;
++
++      } else if (ha->flags.eeh_flush == EEH_FLUSH_RDY &&
++          time_after_eq(jiffies, ha->eeh_jif +  5 * HZ)) {
++              pci_clear_master(ha->pdev);
++
++              /* flush all command */
++              qla2x00_abort_isp_cleanup(vha);
++              ha->flags.eeh_flush = EEH_FLUSH_DONE;
++
++              ql_log(ql_log_info, vha, 0x900a,
++                  "PCI Error handling complete, all IOs aborted.\n");
++      }
++}
++
+ /**************************************************************************
+ *   qla2x00_timer
+ *
+@@ -7262,6 +7305,8 @@ qla2x00_timer(struct timer_list *t)
+       fc_port_t *fcport = NULL;
+       if (ha->flags.eeh_busy) {
++              qla_wind_down_chip(vha);
++
+               ql_dbg(ql_dbg_timer, vha, 0x6000,
+                   "EEH = %d, restarting timer.\n",
+                   ha->flags.eeh_busy);
+@@ -7842,6 +7887,9 @@ void qla_pci_set_eeh_busy(struct scsi_qla_host *vha)
+       spin_lock_irqsave(&base_vha->work_lock, flags);
+       if (!ha->flags.eeh_busy) {
++              ha->eeh_jif = jiffies;
++              ha->flags.eeh_flush = 0;
++
+               ha->flags.eeh_busy = 1;
+               do_cleanup = true;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.18/serial-8250-add-proper-clock-handling-for-oxsemi-pci.patch b/queue-5.18/serial-8250-add-proper-clock-handling-for-oxsemi-pci.patch
new file mode 100644 (file)
index 0000000..aa0068a
--- /dev/null
@@ -0,0 +1,764 @@
+From 6690821f686011456d2aa183db7777be5132b6b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Apr 2022 16:27:33 +0100
+Subject: serial: 8250: Add proper clock handling for OxSemi PCIe devices
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+[ Upstream commit 366f6c955d4d1a5125ffcd6875ead26a3c7a2a1c ]
+
+Oxford Semiconductor PCIe (Tornado) 950 serial port devices are driven
+by a fixed 62.5MHz clock input derived from the 100MHz PCI Express clock.
+
+We currently drive the device using its default oversampling rate of 16
+and the clock prescaler disabled, consequently yielding the baud base of
+3906250.  This base is inadequate for some of the high-speed baud rates
+such as 460800bps, for which the closest rate possible can be obtained
+by dividing the baud base by 8, yielding the baud rate of 488281.25bps,
+which is off by 5.9638%.  This is enough for data communication to break
+with the remote end talking actual 460800bps, where missed stop bits
+have been observed.
+
+We can do better however, by taking advantage of a reduced oversampling
+rate, which can be set to any integer value from 4 to 16 inclusive by
+programming the TCR register, and by using the clock prescaler, which
+can be set to any value from 1 to 63.875 in increments of 0.125 in the
+CPR/CPR2 register pair.  The prescaler has to be explicitly enabled
+though by setting bit 7 in the MCR or otherwise it is bypassed (in the
+enhanced mode that we enable) as if the value of 1 was used.
+
+Make use of these features then as follows:
+
+- Set the baud base to 15625000, reflecting the minimum oversampling
+  rate of 4 with the clock prescaler and divisor both set to 1.
+
+- Override the `set_mctrl' and set the MCR shadow there so as to have
+  MCR[7] always set and have the 8250 core propagate these settings.
+
+- Override the `get_divisor' handler and determine a good combination of
+  parameters by using a lookup table with predetermined value pairs of
+  the oversampling rate and the clock prescaler and finding a pair that
+  divides the input clock such that the quotient, when rounded to the
+  nearest integer, deviates the least from the exact result.  Calculate
+  the clock divisor accordingly.
+
+  Scale the resulting oversampling rate (only by powers of two) if
+  possible so as to maximise it, reducing the divisor accordingly, and
+  avoid a divisor overflow for very low baud rates by scaling the
+  oversampling rate and/or the prescaler even if that causes some
+  accuracy loss.
+
+  Also handle the historic spd_cust feature so as to allow one to set
+  all the three parameters manually to arbitrary values, by keeping the
+  low 16 bits for the divisor and then putting TCR in bits 19:16 and
+  CPR/CPR2 in bits 28:20, sanitising the bit pattern supplied such as
+  to clamp CPR/CPR2 values between 0.000 and 0.875 inclusive to 33.875.
+  This preserves compatibility with any existing setups, that is where
+  requesting a custom divisor that only has any bits set among the low
+  16 the oversampling rate of 16 and the clock prescaler of 33.875 will
+  be used as with the original 8250.
+
+  Finally abuse the `frac' argument to store the determined bit patterns
+  for the TCR, CPR and CPR2 registers.
+
+- Override the `set_divisor' handler so as to set the TCR, CPR and CPR2
+  registers from the `frac' value supplied.  Set the divisor as usual.
+
+With the baud base set to 15625000 and the unsigned 16-bit UART_DIV_MAX
+limitation imposed by `serial8250_get_baud_rate' standard baud rates
+below 300bps become unavailable in the regular way, e.g. the rate of
+200bps requires the baud base to be divided by 78125 and that is beyond
+the unsigned 16-bit range.  The historic spd_cust feature can still be
+used to obtain such rates if so required.
+
+See Documentation/tty/device_drivers/oxsemi-tornado.rst for more details.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Link: https://lore.kernel.org/r/alpine.DEB.2.21.2204181519450.9383@angie.orcam.me.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../tty/device_drivers/oxsemi-tornado.rst     | 129 +++++++
+ drivers/tty/serial/8250/8250_pci.c            | 339 ++++++++++++++----
+ 2 files changed, 400 insertions(+), 68 deletions(-)
+ create mode 100644 Documentation/tty/device_drivers/oxsemi-tornado.rst
+
+diff --git a/Documentation/tty/device_drivers/oxsemi-tornado.rst b/Documentation/tty/device_drivers/oxsemi-tornado.rst
+new file mode 100644
+index 000000000000..0180d8bb0881
+--- /dev/null
++++ b/Documentation/tty/device_drivers/oxsemi-tornado.rst
+@@ -0,0 +1,129 @@
++.. SPDX-License-Identifier: GPL-2.0
++
++====================================================================
++Notes on Oxford Semiconductor PCIe (Tornado) 950 serial port devices
++====================================================================
++
++Oxford Semiconductor PCIe (Tornado) 950 serial port devices are driven
++by a fixed 62.5MHz clock input derived from the 100MHz PCI Express clock.
++
++The baud rate produced by the baud generator is obtained from this input
++frequency by dividing it by the clock prescaler, which can be set to any
++value from 1 to 63.875 in increments of 0.125, and then the usual 16-bit
++divisor is used as with the original 8250, to divide the frequency by a
++value from 1 to 65535.  Finally a programmable oversampling rate is used
++that can take any value from 4 to 16 to divide the frequency further and
++determine the actual baud rate used.  Baud rates from 15625000bps down
++to 0.933bps can be obtained this way.
++
++By default the oversampling rate is set to 16 and the clock prescaler is
++set to 33.875, meaning that the frequency to be used as the reference
++for the usual 16-bit divisor is 115313.653, which is close enough to the
++frequency of 115200 used by the original 8250 for the same values to be
++used for the divisor to obtain the requested baud rates by software that
++is unaware of the extra clock controls available.
++
++The oversampling rate is programmed with the TCR register and the clock
++prescaler is programmed with the CPR/CPR2 register pair[1][2][3][4].
++To switch away from the default value of 33.875 for the prescaler the
++the enhanced mode has to be explicitly enabled though, by setting bit 4
++of the EFR.  In that mode setting bit 7 in the MCR enables the prescaler
++or otherwise it is bypassed as if the value of 1 was used.  Additionally
++writing any value to CPR clears CPR2 for compatibility with old software
++written for older conventional PCI Oxford Semiconductor devices that do
++not have the extra prescaler's 9th bit in CPR2, so the CPR/CPR2 register
++pair has to be programmed in the right order.
++
++By using these parameters rates from 15625000bps down to 1bps can be
++obtained, with either exact or highly-accurate actual bit rates for
++standard and many non-standard rates.
++
++Here are the figures for the standard and some non-standard baud rates
++(including those quoted in Oxford Semiconductor documentation), giving
++the requested rate (r), the actual rate yielded (a) and its deviation
++from the requested rate (d), and the values of the oversampling rate
++(tcr), the clock prescaler (cpr) and the divisor (div) produced by the
++new `get_divisor' handler:
++
++r: 15625000, a: 15625000.00, d:  0.0000%, tcr:  4, cpr:  1.000, div:     1
++r: 12500000, a: 12500000.00, d:  0.0000%, tcr:  5, cpr:  1.000, div:     1
++r: 10416666, a: 10416666.67, d:  0.0000%, tcr:  6, cpr:  1.000, div:     1
++r:  8928571, a:  8928571.43, d:  0.0000%, tcr:  7, cpr:  1.000, div:     1
++r:  7812500, a:  7812500.00, d:  0.0000%, tcr:  8, cpr:  1.000, div:     1
++r:  4000000, a:  4000000.00, d:  0.0000%, tcr:  5, cpr:  3.125, div:     1
++r:  3686400, a:  3676470.59, d: -0.2694%, tcr:  8, cpr:  2.125, div:     1
++r:  3500000, a:  3496503.50, d: -0.0999%, tcr: 13, cpr:  1.375, div:     1
++r:  3000000, a:  2976190.48, d: -0.7937%, tcr: 14, cpr:  1.500, div:     1
++r:  2500000, a:  2500000.00, d:  0.0000%, tcr: 10, cpr:  2.500, div:     1
++r:  2000000, a:  2000000.00, d:  0.0000%, tcr: 10, cpr:  3.125, div:     1
++r:  1843200, a:  1838235.29, d: -0.2694%, tcr: 16, cpr:  2.125, div:     1
++r:  1500000, a:  1492537.31, d: -0.4975%, tcr:  5, cpr:  8.375, div:     1
++r:  1152000, a:  1152073.73, d:  0.0064%, tcr: 14, cpr:  3.875, div:     1
++r:   921600, a:   919117.65, d: -0.2694%, tcr: 16, cpr:  2.125, div:     2
++r:   576000, a:   576036.87, d:  0.0064%, tcr: 14, cpr:  3.875, div:     2
++r:   460800, a:   460829.49, d:  0.0064%, tcr:  7, cpr:  3.875, div:     5
++r:   230400, a:   230414.75, d:  0.0064%, tcr: 14, cpr:  3.875, div:     5
++r:   115200, a:   115207.37, d:  0.0064%, tcr: 14, cpr:  1.250, div:    31
++r:    57600, a:    57603.69, d:  0.0064%, tcr:  8, cpr:  3.875, div:    35
++r:    38400, a:    38402.46, d:  0.0064%, tcr: 14, cpr:  3.875, div:    30
++r:    19200, a:    19201.23, d:  0.0064%, tcr:  8, cpr:  3.875, div:   105
++r:     9600, a:     9600.06, d:  0.0006%, tcr:  9, cpr:  1.125, div:   643
++r:     4800, a:     4799.98, d: -0.0004%, tcr:  7, cpr:  2.875, div:   647
++r:     2400, a:     2400.02, d:  0.0008%, tcr:  9, cpr:  2.250, div:  1286
++r:     1200, a:     1200.00, d:  0.0000%, tcr: 14, cpr:  2.875, div:  1294
++r:      300, a:      300.00, d:  0.0000%, tcr: 11, cpr:  2.625, div:  7215
++r:      200, a:      200.00, d:  0.0000%, tcr: 16, cpr:  1.250, div: 15625
++r:      150, a:      150.00, d:  0.0000%, tcr: 13, cpr:  2.250, div: 14245
++r:      134, a:      134.00, d:  0.0000%, tcr: 11, cpr:  2.625, div: 16153
++r:      110, a:      110.00, d:  0.0000%, tcr: 12, cpr:  1.000, div: 47348
++r:       75, a:       75.00, d:  0.0000%, tcr:  4, cpr:  5.875, div: 35461
++r:       50, a:       50.00, d:  0.0000%, tcr: 16, cpr:  1.250, div: 62500
++r:       25, a:       25.00, d:  0.0000%, tcr: 16, cpr:  2.500, div: 62500
++r:        4, a:        4.00, d:  0.0000%, tcr: 16, cpr: 20.000, div: 48828
++r:        2, a:        2.00, d:  0.0000%, tcr: 16, cpr: 40.000, div: 48828
++r:        1, a:        1.00, d:  0.0000%, tcr: 16, cpr: 63.875, div: 61154
++
++With the baud base set to 15625000 and the unsigned 16-bit UART_DIV_MAX
++limitation imposed by `serial8250_get_baud_rate' standard baud rates
++below 300bps become unavailable in the regular way, e.g. the rate of
++200bps requires the baud base to be divided by 78125 and that is beyond
++the unsigned 16-bit range.  The historic spd_cust feature can still be
++used by encoding the values for, the prescaler, the oversampling rate
++and the clock divisor (DLM/DLL) as follows to obtain such rates if so
++required:
++
++ 31 29 28             20 19   16 15                            0
+++-----+-----------------+-------+-------------------------------+
++|0 0 0|    CPR2:CPR     |  TCR  |            DLM:DLL            |
+++-----+-----------------+-------+-------------------------------+
++
++Use a value such encoded for the `custom_divisor' field along with the
++ASYNC_SPD_CUST flag set in the `flags' field in `struct serial_struct'
++passed with the TIOCSSERIAL ioctl(2), such as with the setserial(8)
++utility and its `divisor' and `spd_cust' parameters, and the select
++the baud rate of 38400bps.  Note that the value of 0 in TCR sets the
++oversampling rate to 16 and prescaler values below 1 in CPR2/CPR are
++clamped by the driver to 1.
++
++For example the value of 0x1f4004e2 will set CPR2/CPR, TCR and DLM/DLL
++respectively to 0x1f4, 0x0 and 0x04e2, choosing the prescaler value,
++the oversampling rate and the clock divisor of 62.500, 16 and 1250
++respectively.  These parameters will set the baud rate for the serial
++port to 62500000 / 62.500 / 1250 / 16 = 50bps.
++
++References:
++
++[1] "OXPCIe200 PCI Express Multi-Port Bridge", Oxford Semiconductor,
++    Inc., DS-0045, 10 Nov 2008, Section "950 Mode", pp. 64-65
++
++[2] "OXPCIe952 PCI Express Bridge to Dual Serial & Parallel Port",
++    Oxford Semiconductor, Inc., DS-0046, Mar 06 08, Section "950 Mode",
++    p. 20
++
++[3] "OXPCIe954 PCI Express Bridge to Quad Serial Port", Oxford
++    Semiconductor, Inc., DS-0047, Feb 08, Section "950 Mode", p. 20
++
++[4] "OXPCIe958 PCI Express Bridge to Octal Serial Port", Oxford
++    Semiconductor, Inc., DS-0048, Feb 08, Section "950 Mode", p. 20
++
++Maciej W. Rozycki  <macro@orcam.me.uk>
+diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
+index 4b0e84e01e55..818ed6cd3132 100644
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -11,6 +11,7 @@
+ #include <linux/pci.h>
+ #include <linux/string.h>
+ #include <linux/kernel.h>
++#include <linux/math.h>
+ #include <linux/slab.h>
+ #include <linux/delay.h>
+ #include <linux/tty.h>
+@@ -1044,6 +1045,208 @@ static int pci_oxsemi_tornado_init(struct pci_dev *dev)
+       return number_uarts;
+ }
++/* Tornado-specific constants for the TCR and CPR registers; see below.  */
++#define OXSEMI_TORNADO_TCR_MASK       0xf
++#define OXSEMI_TORNADO_CPR_MASK       0x1ff
++#define OXSEMI_TORNADO_CPR_MIN        0x008
++#define OXSEMI_TORNADO_CPR_DEF        0x10f
++
++/*
++ * Determine the oversampling rate, the clock prescaler, and the clock
++ * divisor for the requested baud rate.  The clock rate is 62.5 MHz,
++ * which is four times the baud base, and the prescaler increments in
++ * steps of 1/8.  Therefore to make calculations on integers we need
++ * to use a scaled clock rate, which is the baud base multiplied by 32
++ * (or our assumed UART clock rate multiplied by 2).
++ *
++ * The allowed oversampling rates are from 4 up to 16 inclusive (values
++ * from 0 to 3 inclusive map to 16).  Likewise the clock prescaler allows
++ * values between 1.000 and 63.875 inclusive (operation for values from
++ * 0.000 to 0.875 has not been specified).  The clock divisor is the usual
++ * unsigned 16-bit integer.
++ *
++ * For the most accurate baud rate we use a table of predetermined
++ * oversampling rates and clock prescalers that records all possible
++ * products of the two parameters in the range from 4 up to 255 inclusive,
++ * and additionally 335 for the 1500000bps rate, with the prescaler scaled
++ * by 8.  The table is sorted by the decreasing value of the oversampling
++ * rate and ties are resolved by sorting by the decreasing value of the
++ * product.  This way preference is given to higher oversampling rates.
++ *
++ * We iterate over the table and choose the product of an oversampling
++ * rate and a clock prescaler that gives the lowest integer division
++ * result deviation, or if an exact integer divider is found we stop
++ * looking for it right away.  We do some fixup if the resulting clock
++ * divisor required would be out of its unsigned 16-bit integer range.
++ *
++ * Finally we abuse the supposed fractional part returned to encode the
++ * 4-bit value of the oversampling rate and the 9-bit value of the clock
++ * prescaler which will end up in the TCR and CPR/CPR2 registers.
++ */
++static unsigned int pci_oxsemi_tornado_get_divisor(struct uart_port *port,
++                                                 unsigned int baud,
++                                                 unsigned int *frac)
++{
++      static u8 p[][2] = {
++              { 16, 14, }, { 16, 13, }, { 16, 12, }, { 16, 11, },
++              { 16, 10, }, { 16,  9, }, { 16,  8, }, { 15, 17, },
++              { 15, 16, }, { 15, 15, }, { 15, 14, }, { 15, 13, },
++              { 15, 12, }, { 15, 11, }, { 15, 10, }, { 15,  9, },
++              { 15,  8, }, { 14, 18, }, { 14, 17, }, { 14, 14, },
++              { 14, 13, }, { 14, 12, }, { 14, 11, }, { 14, 10, },
++              { 14,  9, }, { 14,  8, }, { 13, 19, }, { 13, 18, },
++              { 13, 17, }, { 13, 13, }, { 13, 12, }, { 13, 11, },
++              { 13, 10, }, { 13,  9, }, { 13,  8, }, { 12, 19, },
++              { 12, 18, }, { 12, 17, }, { 12, 11, }, { 12,  9, },
++              { 12,  8, }, { 11, 23, }, { 11, 22, }, { 11, 21, },
++              { 11, 20, }, { 11, 19, }, { 11, 18, }, { 11, 17, },
++              { 11, 11, }, { 11, 10, }, { 11,  9, }, { 11,  8, },
++              { 10, 25, }, { 10, 23, }, { 10, 20, }, { 10, 19, },
++              { 10, 17, }, { 10, 10, }, { 10,  9, }, { 10,  8, },
++              {  9, 27, }, {  9, 23, }, {  9, 21, }, {  9, 19, },
++              {  9, 18, }, {  9, 17, }, {  9,  9, }, {  9,  8, },
++              {  8, 31, }, {  8, 29, }, {  8, 23, }, {  8, 19, },
++              {  8, 17, }, {  8,  8, }, {  7, 35, }, {  7, 31, },
++              {  7, 29, }, {  7, 25, }, {  7, 23, }, {  7, 21, },
++              {  7, 19, }, {  7, 17, }, {  7, 15, }, {  7, 14, },
++              {  7, 13, }, {  7, 12, }, {  7, 11, }, {  7, 10, },
++              {  7,  9, }, {  7,  8, }, {  6, 41, }, {  6, 37, },
++              {  6, 31, }, {  6, 29, }, {  6, 23, }, {  6, 19, },
++              {  6, 17, }, {  6, 13, }, {  6, 11, }, {  6, 10, },
++              {  6,  9, }, {  6,  8, }, {  5, 67, }, {  5, 47, },
++              {  5, 43, }, {  5, 41, }, {  5, 37, }, {  5, 31, },
++              {  5, 29, }, {  5, 25, }, {  5, 23, }, {  5, 19, },
++              {  5, 17, }, {  5, 15, }, {  5, 13, }, {  5, 11, },
++              {  5, 10, }, {  5,  9, }, {  5,  8, }, {  4, 61, },
++              {  4, 59, }, {  4, 53, }, {  4, 47, }, {  4, 43, },
++              {  4, 41, }, {  4, 37, }, {  4, 31, }, {  4, 29, },
++              {  4, 23, }, {  4, 19, }, {  4, 17, }, {  4, 13, },
++              {  4,  9, }, {  4,  8, },
++      };
++      /* Scale the quotient for comparison to get the fractional part.  */
++      const unsigned int quot_scale = 65536;
++      unsigned int sclk = port->uartclk * 2;
++      unsigned int sdiv = DIV_ROUND_CLOSEST(sclk, baud);
++      unsigned int best_squot;
++      unsigned int squot;
++      unsigned int quot;
++      u16 cpr;
++      u8 tcr;
++      int i;
++
++      /* Old custom speed handling.  */
++      if (baud == 38400 && (port->flags & UPF_SPD_MASK) == UPF_SPD_CUST) {
++              unsigned int cust_div = port->custom_divisor;
++
++              quot = cust_div & UART_DIV_MAX;
++              tcr = (cust_div >> 16) & OXSEMI_TORNADO_TCR_MASK;
++              cpr = (cust_div >> 20) & OXSEMI_TORNADO_CPR_MASK;
++              if (cpr < OXSEMI_TORNADO_CPR_MIN)
++                      cpr = OXSEMI_TORNADO_CPR_DEF;
++      } else {
++              best_squot = quot_scale;
++              for (i = 0; i < ARRAY_SIZE(p); i++) {
++                      unsigned int spre;
++                      unsigned int srem;
++                      u8 cp;
++                      u8 tc;
++
++                      tc = p[i][0];
++                      cp = p[i][1];
++                      spre = tc * cp;
++
++                      srem = sdiv % spre;
++                      if (srem > spre / 2)
++                              srem = spre - srem;
++                      squot = DIV_ROUND_CLOSEST(srem * quot_scale, spre);
++
++                      if (srem == 0) {
++                              tcr = tc;
++                              cpr = cp;
++                              quot = sdiv / spre;
++                              break;
++                      } else if (squot < best_squot) {
++                              best_squot = squot;
++                              tcr = tc;
++                              cpr = cp;
++                              quot = DIV_ROUND_CLOSEST(sdiv, spre);
++                      }
++              }
++              while (tcr <= (OXSEMI_TORNADO_TCR_MASK + 1) >> 1 &&
++                     quot % 2 == 0) {
++                      quot >>= 1;
++                      tcr <<= 1;
++              }
++              while (quot > UART_DIV_MAX) {
++                      if (tcr <= (OXSEMI_TORNADO_TCR_MASK + 1) >> 1) {
++                              quot >>= 1;
++                              tcr <<= 1;
++                      } else if (cpr <= OXSEMI_TORNADO_CPR_MASK >> 1) {
++                              quot >>= 1;
++                              cpr <<= 1;
++                      } else {
++                              quot = quot * cpr / OXSEMI_TORNADO_CPR_MASK;
++                              cpr = OXSEMI_TORNADO_CPR_MASK;
++                      }
++              }
++      }
++
++      *frac = (cpr << 8) | (tcr & OXSEMI_TORNADO_TCR_MASK);
++      return quot;
++}
++
++/*
++ * Set the oversampling rate in the transmitter clock cycle register (TCR),
++ * the clock prescaler in the clock prescaler register (CPR and CPR2), and
++ * the clock divisor in the divisor latch (DLL and DLM).  Note that for
++ * backwards compatibility any write to CPR clears CPR2 and therefore CPR
++ * has to be written first, followed by CPR2, which occupies the location
++ * of CKS used with earlier UART designs.
++ */
++static void pci_oxsemi_tornado_set_divisor(struct uart_port *port,
++                                         unsigned int baud,
++                                         unsigned int quot,
++                                         unsigned int quot_frac)
++{
++      struct uart_8250_port *up = up_to_u8250p(port);
++      u8 cpr2 = quot_frac >> 16;
++      u8 cpr = quot_frac >> 8;
++      u8 tcr = quot_frac;
++
++      serial_icr_write(up, UART_TCR, tcr);
++      serial_icr_write(up, UART_CPR, cpr);
++      serial_icr_write(up, UART_CKS, cpr2);
++      serial8250_do_set_divisor(port, baud, quot, 0);
++}
++
++/*
++ * For Tornado devices we force MCR[7] set for the Divide-by-M N/8 baud rate
++ * generator prescaler (CPR and CPR2).  Otherwise no prescaler would be used.
++ */
++static void pci_oxsemi_tornado_set_mctrl(struct uart_port *port,
++                                       unsigned int mctrl)
++{
++      struct uart_8250_port *up = up_to_u8250p(port);
++
++      up->mcr |= UART_MCR_CLKSEL;
++      serial8250_do_set_mctrl(port, mctrl);
++}
++
++static int pci_oxsemi_tornado_setup(struct serial_private *priv,
++                                  const struct pciserial_board *board,
++                                  struct uart_8250_port *up, int idx)
++{
++      struct pci_dev *dev = priv->dev;
++
++      if (pci_oxsemi_tornado_p(dev)) {
++              up->port.get_divisor = pci_oxsemi_tornado_get_divisor;
++              up->port.set_divisor = pci_oxsemi_tornado_set_divisor;
++              up->port.set_mctrl = pci_oxsemi_tornado_set_mctrl;
++      }
++
++      return pci_default_setup(priv, board, up, idx);
++}
++
+ static int pci_asix_setup(struct serial_private *priv,
+                 const struct pciserial_board *board,
+                 struct uart_8250_port *port, int idx)
+@@ -2245,7 +2448,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .init           = pci_oxsemi_tornado_init,
+-              .setup          = pci_default_setup,
++              .setup          = pci_oxsemi_tornado_setup,
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_MAINPINE,
+@@ -2253,7 +2456,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+               .init           = pci_oxsemi_tornado_init,
+-              .setup          = pci_default_setup,
++              .setup          = pci_oxsemi_tornado_setup,
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_DIGI,
+@@ -2261,7 +2464,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
+               .subvendor              = PCI_SUBVENDOR_ID_IBM,
+               .subdevice              = PCI_ANY_ID,
+               .init                   = pci_oxsemi_tornado_init,
+-              .setup          = pci_default_setup,
++              .setup          = pci_oxsemi_tornado_setup,
+       },
+       {
+               .vendor         = PCI_VENDOR_ID_INTEL,
+@@ -2578,7 +2781,7 @@ enum pci_board_num_t {
+       pbn_b0_2_1843200,
+       pbn_b0_4_1843200,
+-      pbn_b0_1_3906250,
++      pbn_b0_1_15625000,
+       pbn_b0_bt_1_115200,
+       pbn_b0_bt_2_115200,
+@@ -2657,10 +2860,10 @@ enum pci_board_num_t {
+       pbn_panacom4,
+       pbn_plx_romulus,
+       pbn_oxsemi,
+-      pbn_oxsemi_1_3906250,
+-      pbn_oxsemi_2_3906250,
+-      pbn_oxsemi_4_3906250,
+-      pbn_oxsemi_8_3906250,
++      pbn_oxsemi_1_15625000,
++      pbn_oxsemi_2_15625000,
++      pbn_oxsemi_4_15625000,
++      pbn_oxsemi_8_15625000,
+       pbn_intel_i960,
+       pbn_sgi_ioc3,
+       pbn_computone_4,
+@@ -2803,10 +3006,10 @@ static struct pciserial_board pci_boards[] = {
+               .uart_offset    = 8,
+       },
+-      [pbn_b0_1_3906250] = {
++      [pbn_b0_1_15625000] = {
+               .flags          = FL_BASE0,
+               .num_ports      = 1,
+-              .base_baud      = 3906250,
++              .base_baud      = 15625000,
+               .uart_offset    = 8,
+       },
+@@ -3187,31 +3390,31 @@ static struct pciserial_board pci_boards[] = {
+               .base_baud      = 115200,
+               .uart_offset    = 8,
+       },
+-      [pbn_oxsemi_1_3906250] = {
++      [pbn_oxsemi_1_15625000] = {
+               .flags          = FL_BASE0,
+               .num_ports      = 1,
+-              .base_baud      = 3906250,
++              .base_baud      = 15625000,
+               .uart_offset    = 0x200,
+               .first_offset   = 0x1000,
+       },
+-      [pbn_oxsemi_2_3906250] = {
++      [pbn_oxsemi_2_15625000] = {
+               .flags          = FL_BASE0,
+               .num_ports      = 2,
+-              .base_baud      = 3906250,
++              .base_baud      = 15625000,
+               .uart_offset    = 0x200,
+               .first_offset   = 0x1000,
+       },
+-      [pbn_oxsemi_4_3906250] = {
++      [pbn_oxsemi_4_15625000] = {
+               .flags          = FL_BASE0,
+               .num_ports      = 4,
+-              .base_baud      = 3906250,
++              .base_baud      = 15625000,
+               .uart_offset    = 0x200,
+               .first_offset   = 0x1000,
+       },
+-      [pbn_oxsemi_8_3906250] = {
++      [pbn_oxsemi_8_15625000] = {
+               .flags          = FL_BASE0,
+               .num_ports      = 8,
+-              .base_baud      = 3906250,
++              .base_baud      = 15625000,
+               .uart_offset    = 0x200,
+               .first_offset   = 0x1000,
+       },
+@@ -4192,165 +4395,165 @@ static const struct pci_device_id serial_pci_tbl[] = {
+        */
+       {       PCI_VENDOR_ID_OXSEMI, 0xc101,    /* OXPCIe952 1 Legacy UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_b0_1_3906250 },
++              pbn_b0_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc105,    /* OXPCIe952 1 Legacy UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_b0_1_3906250 },
++              pbn_b0_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc11b,    /* OXPCIe952 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc11f,    /* OXPCIe952 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc120,    /* OXPCIe952 1 Legacy UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_b0_1_3906250 },
++              pbn_b0_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc124,    /* OXPCIe952 1 Legacy UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_b0_1_3906250 },
++              pbn_b0_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc138,    /* OXPCIe952 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc13d,    /* OXPCIe952 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc140,    /* OXPCIe952 1 Legacy UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_b0_1_3906250 },
++              pbn_b0_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc141,    /* OXPCIe952 1 Legacy UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_b0_1_3906250 },
++              pbn_b0_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc144,    /* OXPCIe952 1 Legacy UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_b0_1_3906250 },
++              pbn_b0_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc145,    /* OXPCIe952 1 Legacy UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_b0_1_3906250 },
++              pbn_b0_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc158,    /* OXPCIe952 2 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_2_3906250 },
++              pbn_oxsemi_2_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc15d,    /* OXPCIe952 2 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_2_3906250 },
++              pbn_oxsemi_2_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc208,    /* OXPCIe954 4 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_4_3906250 },
++              pbn_oxsemi_4_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc20d,    /* OXPCIe954 4 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_4_3906250 },
++              pbn_oxsemi_4_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc308,    /* OXPCIe958 8 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_8_3906250 },
++              pbn_oxsemi_8_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc30d,    /* OXPCIe958 8 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_8_3906250 },
++              pbn_oxsemi_8_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc40b,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc40f,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc41b,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc41f,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc42b,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc42f,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc43b,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc43f,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc44b,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc44f,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc45b,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc45f,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc46b,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc46f,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc47b,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc47f,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc48b,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc48f,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc49b,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc49f,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc4ab,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc4af,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc4bb,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc4bf,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc4cb,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_OXSEMI, 0xc4cf,    /* OXPCIe200 1 Native UART */
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       /*
+        * Mainpine Inc. IQ Express "Rev3" utilizing OxSemi Tornado
+        */
+       {       PCI_VENDOR_ID_MAINPINE, 0x4000, /* IQ Express 1 Port V.34 Super-G3 Fax */
+               PCI_VENDOR_ID_MAINPINE, 0x4001, 0, 0,
+-              pbn_oxsemi_1_3906250 },
++              pbn_oxsemi_1_15625000 },
+       {       PCI_VENDOR_ID_MAINPINE, 0x4000, /* IQ Express 2 Port V.34 Super-G3 Fax */
+               PCI_VENDOR_ID_MAINPINE, 0x4002, 0, 0,
+-              pbn_oxsemi_2_3906250 },
++              pbn_oxsemi_2_15625000 },
+       {       PCI_VENDOR_ID_MAINPINE, 0x4000, /* IQ Express 4 Port V.34 Super-G3 Fax */
+               PCI_VENDOR_ID_MAINPINE, 0x4004, 0, 0,
+-              pbn_oxsemi_4_3906250 },
++              pbn_oxsemi_4_15625000 },
+       {       PCI_VENDOR_ID_MAINPINE, 0x4000, /* IQ Express 8 Port V.34 Super-G3 Fax */
+               PCI_VENDOR_ID_MAINPINE, 0x4008, 0, 0,
+-              pbn_oxsemi_8_3906250 },
++              pbn_oxsemi_8_15625000 },
+       /*
+        * Digi/IBM PCIe 2-port Async EIA-232 Adapter utilizing OxSemi Tornado
+        */
+       {       PCI_VENDOR_ID_DIGI, PCIE_DEVICE_ID_NEO_2_OX_IBM,
+               PCI_SUBVENDOR_ID_IBM, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_2_3906250 },
++              pbn_oxsemi_2_15625000 },
+       /*
+        * EndRun Technologies. PCI express device range.
+        * EndRun PTP/1588 has 2 Native UARTs utilizing OxSemi 952.
+        */
+       {       PCI_VENDOR_ID_ENDRUN, PCI_DEVICE_ID_ENDRUN_1588,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_oxsemi_2_3906250 },
++              pbn_oxsemi_2_15625000 },
+       /*
+        * SBS Technologies, Inc. P-Octal and PMC-OCTPRO cards,
+-- 
+2.35.1
+
diff --git a/queue-5.18/serial-8250-fold-endrun-device-support-into-oxsemi-t.patch b/queue-5.18/serial-8250-fold-endrun-device-support-into-oxsemi-t.patch
new file mode 100644 (file)
index 0000000..8a05005
--- /dev/null
@@ -0,0 +1,177 @@
+From cfb171de62f71cfb83a28429ef85c60e8be57a08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Apr 2022 16:27:22 +0100
+Subject: serial: 8250: Fold EndRun device support into OxSemi Tornado code
+
+From: Maciej W. Rozycki <macro@orcam.me.uk>
+
+[ Upstream commit 1f32c65bad24b9787d3e52843de375430e3df822 ]
+
+The EndRun PTP/1588 dual serial port device is based on the Oxford
+Semiconductor OXPCIe952 UART device with the PCI vendor:device ID set
+for EndRun Technologies and uses the same sequence to determine the
+number of ports available.  Despite that we have duplicate code
+specific to the EndRun device.
+
+Remove redundant code then and factor out OxSemi Tornado device
+detection.
+
+Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Link: https://lore.kernel.org/r/alpine.DEB.2.21.2204181516220.9383@angie.orcam.me.uk
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/8250/8250_pci.c | 76 ++++++++++--------------------
+ 1 file changed, 25 insertions(+), 51 deletions(-)
+
+diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
+index a293e9f107d0..4b0e84e01e55 100644
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -994,41 +994,29 @@ static void pci_ite887x_exit(struct pci_dev *dev)
+ }
+ /*
+- * EndRun Technologies.
+- * Determine the number of ports available on the device.
++ * Oxford Semiconductor Inc.
++ * Check if an OxSemi device is part of the Tornado range of devices.
+  */
+ #define PCI_VENDOR_ID_ENDRUN                  0x7401
+ #define PCI_DEVICE_ID_ENDRUN_1588     0xe100
+-static int pci_endrun_init(struct pci_dev *dev)
++static bool pci_oxsemi_tornado_p(struct pci_dev *dev)
+ {
+-      u8 __iomem *p;
+-      unsigned long deviceID;
+-      unsigned int  number_uarts = 0;
++      /* OxSemi Tornado devices are all 0xCxxx */
++      if (dev->vendor == PCI_VENDOR_ID_OXSEMI &&
++          (dev->device & 0xf000) != 0xc000)
++              return false;
+-      /* EndRun device is all 0xexxx */
++      /* EndRun devices are all 0xExxx */
+       if (dev->vendor == PCI_VENDOR_ID_ENDRUN &&
+-              (dev->device & 0xf000) != 0xe000)
+-              return 0;
+-
+-      p = pci_iomap(dev, 0, 5);
+-      if (p == NULL)
+-              return -ENOMEM;
++          (dev->device & 0xf000) != 0xe000)
++              return false;
+-      deviceID = ioread32(p);
+-      /* EndRun device */
+-      if (deviceID == 0x07000200) {
+-              number_uarts = ioread8(p + 4);
+-              pci_dbg(dev, "%d ports detected on EndRun PCI Express device\n", number_uarts);
+-      }
+-      pci_iounmap(dev, p);
+-      return number_uarts;
++      return true;
+ }
+ /*
+- * Oxford Semiconductor Inc.
+- * Check that device is part of the Tornado range of devices, then determine
+- * the number of ports available on the device.
++ * Determine the number of ports available on a Tornado device.
+  */
+ static int pci_oxsemi_tornado_init(struct pci_dev *dev)
+ {
+@@ -1036,9 +1024,7 @@ static int pci_oxsemi_tornado_init(struct pci_dev *dev)
+       unsigned long deviceID;
+       unsigned int  number_uarts = 0;
+-      /* OxSemi Tornado devices are all 0xCxxx */
+-      if (dev->vendor == PCI_VENDOR_ID_OXSEMI &&
+-          (dev->device & 0xF000) != 0xC000)
++      if (!pci_oxsemi_tornado_p(dev))
+               return 0;
+       p = pci_iomap(dev, 0, 5);
+@@ -1049,7 +1035,10 @@ static int pci_oxsemi_tornado_init(struct pci_dev *dev)
+       /* Tornado device */
+       if (deviceID == 0x07000200) {
+               number_uarts = ioread8(p + 4);
+-              pci_dbg(dev, "%d ports detected on Oxford PCI Express device\n", number_uarts);
++              pci_dbg(dev, "%d ports detected on %s PCI Express device\n",
++                      number_uarts,
++                      dev->vendor == PCI_VENDOR_ID_ENDRUN ?
++                      "EndRun" : "Oxford");
+       }
+       pci_iounmap(dev, p);
+       return number_uarts;
+@@ -2244,7 +2233,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
+               .device         = PCI_ANY_ID,
+               .subvendor      = PCI_ANY_ID,
+               .subdevice      = PCI_ANY_ID,
+-              .init           = pci_endrun_init,
++              .init           = pci_oxsemi_tornado_init,
+               .setup          = pci_default_setup,
+       },
+       /*
+@@ -2667,7 +2656,6 @@ enum pci_board_num_t {
+       pbn_panacom2,
+       pbn_panacom4,
+       pbn_plx_romulus,
+-      pbn_endrun_2_3906250,
+       pbn_oxsemi,
+       pbn_oxsemi_1_3906250,
+       pbn_oxsemi_2_3906250,
+@@ -3189,20 +3177,6 @@ static struct pciserial_board pci_boards[] = {
+               .first_offset   = 0x03,
+       },
+-      /*
+-       * EndRun Technologies
+-      * Uses the size of PCI Base region 0 to
+-      * signal now many ports are available
+-      * 2 port 952 Uart support
+-      */
+-      [pbn_endrun_2_3906250] = {
+-              .flags          = FL_BASE0,
+-              .num_ports      = 2,
+-              .base_baud      = 3906250,
+-              .uart_offset    = 0x200,
+-              .first_offset   = 0x1000,
+-      },
+-
+       /*
+        * This board uses the size of PCI Base region 0 to
+        * signal now many ports are available
+@@ -4109,13 +4083,6 @@ static const struct pci_device_id serial_pci_tbl[] = {
+       {       PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_ROMULUS,
+               0x10b5, 0x106a, 0, 0,
+               pbn_plx_romulus },
+-      /*
+-      * EndRun Technologies. PCI express device range.
+-      *    EndRun PTP/1588 has 2 Native UARTs.
+-      */
+-      {       PCI_VENDOR_ID_ENDRUN, PCI_DEVICE_ID_ENDRUN_1588,
+-              PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+-              pbn_endrun_2_3906250 },
+       /*
+        * Quatech cards. These actually have configurable clocks but for
+        * now we just use the default.
+@@ -4377,6 +4344,13 @@ static const struct pci_device_id serial_pci_tbl[] = {
+       {       PCI_VENDOR_ID_DIGI, PCIE_DEVICE_ID_NEO_2_OX_IBM,
+               PCI_SUBVENDOR_ID_IBM, PCI_ANY_ID, 0, 0,
+               pbn_oxsemi_2_3906250 },
++      /*
++       * EndRun Technologies. PCI express device range.
++       * EndRun PTP/1588 has 2 Native UARTs utilizing OxSemi 952.
++       */
++      {       PCI_VENDOR_ID_ENDRUN, PCI_DEVICE_ID_ENDRUN_1588,
++              PCI_ANY_ID, PCI_ANY_ID, 0, 0,
++              pbn_oxsemi_2_3906250 },
+       /*
+        * SBS Technologies, Inc. P-Octal and PMC-OCTPRO cards,
+-- 
+2.35.1
+
index a89abb32b57d52d57d3cb31acdde048f5ab79abf..1a109963018dab1007abe5378667e9da716d83f9 100644 (file)
@@ -987,3 +987,112 @@ input-gscps2-check-return-value-of-ioremap-in-gscps2_probe.patch
 __follow_mount_rcu-verify-that-mount_lock-remains-unchanged.patch
 spmi-trace-fix-stack-out-of-bound-access-in-spmi-tracing-functions.patch
 drivers-base-fix-userspace-break-from-using-bin_attributes-for-cpumap-and-cpulist.patch
+drm-mediatek-keep-dsi-as-lp00-before-dcs-cmds-transf.patch
+crypto-blake2s-remove-shash-module.patch
+drm-dp-mst-read-the-extended-dpcd-capabilities-durin.patch
+scsi-qla2xxx-fix-excessive-i-o-error-messages-by-def.patch
+scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch-27996
+scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch-18430
+scsi-qla2xxx-fix-crash-due-to-stale-srb-access-aroun.patch
+scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-pe.patch
+scsi-qla2xxx-fix-losing-target-when-it-reappears-dur.patch
+scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-d.patch
+scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci.patch
+drm-vc4-drv-adopt-the-dma-configuration-from-the-hvs.patch
+usbnet-smsc95xx-don-t-clear-read-only-phy-interrupt.patch
+usbnet-smsc95xx-avoid-link-settings-race-on-interrup.patch
+usbnet-smsc95xx-forward-phy-interrupts-to-phy-driver.patch
+usbnet-smsc95xx-fix-deadlock-on-runtime-resume.patch
+firmware-arm_scpi-ensure-scpi_info-is-not-assigned-i.patch
+__follow_mount_rcu-verify-that-mount_lock-remains-un.patch
+intel_th-pci-add-meteor-lake-p-support.patch
+intel_th-pci-add-raptor-lake-s-pch-support.patch
+intel_th-pci-add-raptor-lake-s-cpu-support.patch
+kvm-set_msr_mce-permit-guests-to-ignore-single-bit-e.patch
+kvm-x86-signal-gp-not-eperm-on-bad-wrmsr-mci_ctl-sta.patch
+iommu-vt-d-avoid-invalid-memory-access-via-node_onli.patch
+pci-aer-iterate-over-error-counters-instead-of-error.patch
+pci-qcom-power-on-phy-before-ipq8074-dbi-register-ac.patch
+serial-8250-fold-endrun-device-support-into-oxsemi-t.patch
+serial-8250-add-proper-clock-handling-for-oxsemi-pci.patch
+tty-8250-add-support-for-brainboxes-px-cards.patch
+dm-writecache-set-a-default-max_writeback_jobs.patch
+x86-olpc-fix-logical-not-is-only-applied-to-the-left.patch
+drivers-base-fix-userspace-break-from-using-bin_attr.patch
+kexec_file-drop-weak-attribute-from-functions.patch
+kexec-clean-up-arch_kexec_kernel_verify_sig.patch
+kexec-keys-s390-make-use-of-built-in-and-secondary-k.patch
+tracing-events-add-__vstring-and-__assign_vstr-helpe.patch
+dm-thin-fix-use-after-free-crash-in-dm_sm_register_t.patch
+net-9p-initialize-the-iounit-field-during-fid-creati.patch
+timekeeping-contribute-wall-clock-to-rng-on-time-cha.patch
+scsi-qla2xxx-fix-response-queue-handler-reading-stal.patch
+scsi-qla2xxx-edif-fix-dropped-ike-message.patch
+scsi-qla2xxx-fix-imbalance-vha-vref_count.patch-27970
+scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch-4818
+scsi-qla2xxx-update-manufacturer-details.patch
+locking-csd_lock-change-csdlock_debug-from-early_par.patch
+block-serialize-all-debugfs-operations-using-q-debug.patch
+block-don-t-allow-the-same-type-rq_qos-add-more-than.patch
+spmi-trace-fix-stack-out-of-bound-access-in-spmi-tra.patch
+btrfs-tree-log-make-the-return-value-for-log-syncing.patch
+btrfs-ensure-pages-are-unlocked-on-cow_file_range-fa.patch
+btrfs-fix-error-handling-of-fallback-uncompress-writ.patch
+btrfs-reset-block-group-chunk-force-if-we-have-to-wa.patch
+btrfs-properly-flag-filesystem-with-btrfs_feature_in.patch
+block-add-a-bdev_max_zone_append_sectors-helper.patch
+block-add-bdev_max_segments-helper.patch
+btrfs-zoned-revive-max_zone_append_bytes.patch
+btrfs-replace-btrfs_max_extent_size-with-fs_info-max.patch
+btrfs-let-can_allocate_chunk-return-error.patch
+btrfs-zoned-finish-least-available-block-group-on-da.patch
+btrfs-zoned-disable-metadata-overcommit-for-zoned.patch
+btrfs-make-the-bg_reclaim_threshold-per-space-info.patch
+btrfs-zoned-introduce-btrfs_zoned_bg_is_full.patch
+btrfs-store-chunk-size-in-space-info-struct.patch
+btrfs-zoned-introduce-space_info-active_total_bytes.patch
+btrfs-zoned-activate-metadata-block-group-on-flush_s.patch
+btrfs-zoned-activate-necessary-block-group.patch
+btrfs-zoned-write-out-partially-allocated-region.patch
+btrfs-zoned-wait-until-zone-is-finished-when-allocat.patch
+intel_idle-add-alderlake-support.patch
+intel_idle-make-spr-c1-and-c1e-be-independent.patch
+acpi-cppc-do-not-prevent-cppc-from-working-in-the-fu.patch
+powerpc-powernv-kvm-use-darn-for-h_random-on-power9.patch
+s390-unwind-fix-fgraph-return-address-recovery.patch
+kvm-x86-pmu-introduce-the-ctrl_mask-value-for-fixed-.patch
+kvm-vmx-mark-all-perf_global_-ovf-_ctrl-bits-reserve.patch
+kvm-x86-pmu-ignore-pmu-global_ctrl-check-if-vpmu-doe.patch
+kvm-vmx-add-helper-to-check-if-the-guest-pmu-has-per.patch
+kvm-nvmx-attempt-to-load-perf_global_ctrl-on-nvmx-xf.patch
+dm-raid-fix-address-sanitizer-warning-in-raid_status.patch
+dm-raid-fix-address-sanitizer-warning-in-raid_resume.patch
+mm-damon-reclaim-fix-potential-memory-leak-in-damon_.patch
+hugetlb_cgroup-fix-wrong-hugetlb-cgroup-numa-stat.patch
+batman-adv-tracing-use-the-new-__vstring-helper.patch
+ftrace-x86-add-back-ftrace_expected-assignment.patch-2936
+tracing-use-a-struct-alignof-to-determine-trace-even.patch
+ksmbd-validate-length-in-smb2_write.patch
+ksmbd-smbd-change-prototypes-of-rdma-read-write-rela.patch
+ksmbd-smbd-introduce-read-write-credits-for-rdma-rea.patch
+ksmbd-add-smbd-max-io-size-parameter.patch
+ksmbd-fix-wrong-smbd-max-read-write-size-check.patch
+ksmbd-prevent-out-of-bound-read-for-smb2_write.patch
+input-gscps2-check-return-value-of-ioremap-in-gscps2.patch
+x86-kprobes-update-kcb-status-flag-after-singlestepp.patch
+ext4-update-s_overhead_clusters-in-the-superblock-du.patch
+ext4-fix-extent-status-tree-race-in-writeback-error-.patch
+ext4-add-ext4_inode_has_xattr_space-macro-in-xattr.h.patch
+ext4-fix-use-after-free-in-ext4_xattr_set_entry.patch
+ext4-correct-max_inline_xattr_value_size-computing.patch
+ext4-correct-the-misjudgment-in-ext4_iget_extra_inod.patch
+ext4-fix-warning-in-ext4_iomap_begin-as-race-between.patch
+ext4-check-if-directory-block-is-within-i_size.patch
+ext4-make-sure-ext4_append-always-allocates-new-bloc.patch
+ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch
+ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch
+ext4-unindent-codeblock-in-ext4_xattr_block_set.patch
+ext4-fix-race-when-reusing-xattr-blocks.patch
+keys-asymmetric-enforce-sm2-signature-use-pkey-algo.patch
+tpm-eventlog-fix-section-mismatch-for-debug_section_.patch
+tpm-add-check-for-failure-mode-for-tpm2-modules.patch
diff --git a/queue-5.18/spmi-trace-fix-stack-out-of-bound-access-in-spmi-tra.patch b/queue-5.18/spmi-trace-fix-stack-out-of-bound-access-in-spmi-tra.patch
new file mode 100644 (file)
index 0000000..5d00c0e
--- /dev/null
@@ -0,0 +1,115 @@
+From 950c726de64542319fc82b60b84e385645aff774 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Jun 2022 16:55:12 -0700
+Subject: spmi: trace: fix stack-out-of-bound access in SPMI tracing functions
+
+From: David Collins <quic_collinsd@quicinc.com>
+
+[ Upstream commit 2af28b241eea816e6f7668d1954f15894b45d7e3 ]
+
+trace_spmi_write_begin() and trace_spmi_read_end() both call
+memcpy() with a length of "len + 1".  This leads to one extra
+byte being read beyond the end of the specified buffer.  Fix
+this out-of-bound memory access by using a length of "len"
+instead.
+
+Here is a KASAN log showing the issue:
+
+BUG: KASAN: stack-out-of-bounds in trace_event_raw_event_spmi_read_end+0x1d0/0x234
+Read of size 2 at addr ffffffc0265b7540 by task thermal@2.0-ser/1314
+...
+Call trace:
+ dump_backtrace+0x0/0x3e8
+ show_stack+0x2c/0x3c
+ dump_stack_lvl+0xdc/0x11c
+ print_address_description+0x74/0x384
+ kasan_report+0x188/0x268
+ kasan_check_range+0x270/0x2b0
+ memcpy+0x90/0xe8
+ trace_event_raw_event_spmi_read_end+0x1d0/0x234
+ spmi_read_cmd+0x294/0x3ac
+ spmi_ext_register_readl+0x84/0x9c
+ regmap_spmi_ext_read+0x144/0x1b0 [regmap_spmi]
+ _regmap_raw_read+0x40c/0x754
+ regmap_raw_read+0x3a0/0x514
+ regmap_bulk_read+0x418/0x494
+ adc5_gen3_poll_wait_hs+0xe8/0x1e0 [qcom_spmi_adc5_gen3]
+ ...
+ __arm64_sys_read+0x4c/0x60
+ invoke_syscall+0x80/0x218
+ el0_svc_common+0xec/0x1c8
+ ...
+
+addr ffffffc0265b7540 is located in stack of task thermal@2.0-ser/1314 at offset 32 in frame:
+ adc5_gen3_poll_wait_hs+0x0/0x1e0 [qcom_spmi_adc5_gen3]
+
+this frame has 1 object:
+ [32, 33) 'status'
+
+Memory state around the buggy address:
+ ffffffc0265b7400: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1
+ ffffffc0265b7480: 04 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
+>ffffffc0265b7500: 00 00 00 00 f1 f1 f1 f1 01 f3 f3 f3 00 00 00 00
+                                           ^
+ ffffffc0265b7580: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ffffffc0265b7600: f1 f1 f1 f1 01 f2 07 f2 f2 f2 01 f3 00 00 00 00
+==================================================================
+
+Fixes: a9fce374815d ("spmi: add command tracepoints for SPMI")
+Cc: stable@vger.kernel.org
+Reviewed-by: Stephen Boyd <sboyd@kernel.org>
+Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: David Collins <quic_collinsd@quicinc.com>
+Link: https://lore.kernel.org/r/20220627235512.2272783-1-quic_collinsd@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/spmi.h | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/include/trace/events/spmi.h b/include/trace/events/spmi.h
+index 8b60efe18ba6..a6819fd85cdf 100644
+--- a/include/trace/events/spmi.h
++++ b/include/trace/events/spmi.h
+@@ -21,15 +21,15 @@ TRACE_EVENT(spmi_write_begin,
+               __field         ( u8,         sid       )
+               __field         ( u16,        addr      )
+               __field         ( u8,         len       )
+-              __dynamic_array ( u8,   buf,  len + 1   )
++              __dynamic_array ( u8,   buf,  len       )
+       ),
+       TP_fast_assign(
+               __entry->opcode = opcode;
+               __entry->sid    = sid;
+               __entry->addr   = addr;
+-              __entry->len    = len + 1;
+-              memcpy(__get_dynamic_array(buf), buf, len + 1);
++              __entry->len    = len;
++              memcpy(__get_dynamic_array(buf), buf, len);
+       ),
+       TP_printk("opc=%d sid=%02d addr=0x%04x len=%d buf=0x[%*phD]",
+@@ -92,7 +92,7 @@ TRACE_EVENT(spmi_read_end,
+               __field         ( u16,        addr      )
+               __field         ( int,        ret       )
+               __field         ( u8,         len       )
+-              __dynamic_array ( u8,   buf,  len + 1   )
++              __dynamic_array ( u8,   buf,  len       )
+       ),
+       TP_fast_assign(
+@@ -100,8 +100,8 @@ TRACE_EVENT(spmi_read_end,
+               __entry->sid    = sid;
+               __entry->addr   = addr;
+               __entry->ret    = ret;
+-              __entry->len    = len + 1;
+-              memcpy(__get_dynamic_array(buf), buf, len + 1);
++              __entry->len    = len;
++              memcpy(__get_dynamic_array(buf), buf, len);
+       ),
+       TP_printk("opc=%d sid=%02d addr=0x%04x ret=%d len=%02d buf=0x[%*phD]",
+-- 
+2.35.1
+
diff --git a/queue-5.18/timekeeping-contribute-wall-clock-to-rng-on-time-cha.patch b/queue-5.18/timekeeping-contribute-wall-clock-to-rng-on-time-cha.patch
new file mode 100644 (file)
index 0000000..53a6e47
--- /dev/null
@@ -0,0 +1,74 @@
+From 9b10f29c1c139d6a3b1b563093243b9621df2322 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 17 Jul 2022 23:53:34 +0200
+Subject: timekeeping: contribute wall clock to rng on time change
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+[ Upstream commit b8ac29b40183a6038919768b5d189c9bd91ce9b4 ]
+
+The rng's random_init() function contributes the real time to the rng at
+boot time, so that events can at least start in relation to something
+particular in the real world. But this clock might not yet be set that
+point in boot, so nothing is contributed. In addition, the relation
+between minor clock changes from, say, NTP, and the cycle counter is
+potentially useful entropic data.
+
+This commit addresses this by mixing in a time stamp on calls to
+settimeofday and adjtimex. No entropy is credited in doing so, so it
+doesn't make initialization faster, but it is still useful input to
+have.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Cc: stable@vger.kernel.org
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/time/timekeeping.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index 871c912860ed..d6a0ff68df41 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -23,6 +23,7 @@
+ #include <linux/pvclock_gtod.h>
+ #include <linux/compiler.h>
+ #include <linux/audit.h>
++#include <linux/random.h>
+ #include "tick-internal.h"
+ #include "ntp_internal.h"
+@@ -1326,8 +1327,10 @@ int do_settimeofday64(const struct timespec64 *ts)
+       /* Signal hrtimers about time change */
+       clock_was_set(CLOCK_SET_WALL);
+-      if (!ret)
++      if (!ret) {
+               audit_tk_injoffset(ts_delta);
++              add_device_randomness(ts, sizeof(*ts));
++      }
+       return ret;
+ }
+@@ -2413,6 +2416,7 @@ int do_adjtimex(struct __kernel_timex *txc)
+       ret = timekeeping_validate_timex(txc);
+       if (ret)
+               return ret;
++      add_device_randomness(txc, sizeof(*txc));
+       if (txc->modes & ADJ_SETOFFSET) {
+               struct timespec64 delta;
+@@ -2430,6 +2434,7 @@ int do_adjtimex(struct __kernel_timex *txc)
+       audit_ntp_init(&ad);
+       ktime_get_real_ts64(&ts);
++      add_device_randomness(&ts, sizeof(ts));
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&tk_core.seq);
+-- 
+2.35.1
+
diff --git a/queue-5.18/tpm-add-check-for-failure-mode-for-tpm2-modules.patch b/queue-5.18/tpm-add-check-for-failure-mode-for-tpm2-modules.patch
new file mode 100644 (file)
index 0000000..80c0227
--- /dev/null
@@ -0,0 +1,54 @@
+From 9c1985649dac8ce39d9305a8087aa7e7a44b6a11 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Aug 2022 15:57:03 +0200
+Subject: tpm: Add check for Failure mode for TPM2 modules
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mårten Lindahl <marten.lindahl@axis.com>
+
+[ Upstream commit 863ed94c589fcd1984f4e3080f069d30508044bb ]
+
+In commit 0aa698787aa2 ("tpm: Add Upgrade/Reduced mode support for
+TPM2 modules") it was said that:
+
+"If the TPM is in Failure mode, it will successfully respond to both
+tpm2_do_selftest() and tpm2_startup() calls. Although, will fail to
+answer to tpm2_get_cc_attrs_tbl(). Use this fact to conclude that TPM
+is in Failure mode."
+
+But a check was never added in the commit when calling
+tpm2_get_cc_attrs_tbl() to conclude that the TPM is in Failure mode.
+This commit corrects this by adding a check.
+
+Fixes: 0aa698787aa2 ("tpm: Add Upgrade/Reduced mode support for TPM2 modules")
+Cc: stable@vger.kernel.org # v5.17+
+Signed-off-by: Mårten Lindahl <marten.lindahl@axis.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/char/tpm/tpm2-cmd.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
+index 04a3e23a4afc..4419593d9531 100644
+--- a/drivers/char/tpm/tpm2-cmd.c
++++ b/drivers/char/tpm/tpm2-cmd.c
+@@ -752,6 +752,12 @@ int tpm2_auto_startup(struct tpm_chip *chip)
+       }
+       rc = tpm2_get_cc_attrs_tbl(chip);
++      if (rc == TPM2_RC_FAILURE || (rc < 0 && rc != -ENOMEM)) {
++              dev_info(&chip->dev,
++                       "TPM in field failure mode, requires firmware upgrade\n");
++              chip->flags |= TPM_CHIP_FLAG_FIRMWARE_UPGRADE;
++              rc = 0;
++      }
+ out:
+       if (rc == TPM2_RC_UPGRADE) {
+-- 
+2.35.1
+
diff --git a/queue-5.18/tpm-eventlog-fix-section-mismatch-for-debug_section_.patch b/queue-5.18/tpm-eventlog-fix-section-mismatch-for-debug_section_.patch
new file mode 100644 (file)
index 0000000..a2f014e
--- /dev/null
@@ -0,0 +1,47 @@
+From 965d4271f4ecbc74ebfd59eee61471d44d2e4694 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Jul 2022 09:17:38 +0800
+Subject: tpm: eventlog: Fix section mismatch for DEBUG_SECTION_MISMATCH
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+[ Upstream commit bed4593645366ad7362a3aa7bc0d100d8d8236a8 ]
+
+If DEBUG_SECTION_MISMATCH enabled, __calc_tpm2_event_size() will not be
+inlined, this cause section mismatch like this:
+
+WARNING: modpost: vmlinux.o(.text.unlikely+0xe30c): Section mismatch in reference from the variable L0 to the function .init.text:early_ioremap()
+The function L0() references
+the function __init early_memremap().
+This is often because L0 lacks a __init
+annotation or the annotation of early_ioremap is wrong.
+
+Fix it by using __always_inline instead of inline for the called-once
+function __calc_tpm2_event_size().
+
+Fixes: 44038bc514a2 ("tpm: Abstract crypto agile event size calculations")
+Cc: stable@vger.kernel.org # v5.3
+Reported-by: WANG Xuerui <git@xen0n.name>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/tpm_eventlog.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
+index 739ba9a03ec1..20c0ff54b7a0 100644
+--- a/include/linux/tpm_eventlog.h
++++ b/include/linux/tpm_eventlog.h
+@@ -157,7 +157,7 @@ struct tcg_algorithm_info {
+  * Return: size of the event on success, 0 on failure
+  */
+-static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
++static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+                                        struct tcg_pcr_event *event_header,
+                                        bool do_mapping)
+ {
+-- 
+2.35.1
+
diff --git a/queue-5.18/tracing-events-add-__vstring-and-__assign_vstr-helpe.patch b/queue-5.18/tracing-events-add-__vstring-and-__assign_vstr-helpe.patch
new file mode 100644 (file)
index 0000000..0716990
--- /dev/null
@@ -0,0 +1,196 @@
+From 3a68f50bc6f54e4ef743e38ffb6064be1ef5e481 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Jul 2022 18:44:54 -0400
+Subject: tracing/events: Add __vstring() and __assign_vstr() helper macros
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit 0563231f93c6d1f582b168a47753b345c1e20d81 ]
+
+There's several places that open code the following logic:
+
+  TP_STRUCT__entry(__dynamic_array(char, msg, MSG_MAX)),
+  TP_fast_assign(vsnprintf(__get_str(msg), MSG_MAX, vaf->fmt, *vaf->va);)
+
+To load a string created by variable array va_list.
+
+The main issue with this approach is that "MSG_MAX" usage in the
+__dynamic_array() portion. That actually just reserves the MSG_MAX in the
+event, and even wastes space because there's dynamic meta data also saved
+in the event to denote the offset and size of the dynamic array. It would
+have been better to just use a static __array() field.
+
+Instead, create __vstring() and __assign_vstr() that work like __string
+and __assign_str() but instead of taking a destination string to copy,
+take a format string and a va_list pointer and fill in the values.
+
+It uses the helper:
+
+ #define __trace_event_vstr_len(fmt, va)               \
+ ({                                                    \
+       va_list __ap;                                   \
+       int __ret;                                      \
+                                                       \
+       va_copy(__ap, *(va));                           \
+       __ret = vsnprintf(NULL, 0, fmt, __ap) + 1;      \
+       va_end(__ap);                                   \
+                                                       \
+       min(__ret, TRACE_EVENT_STR_MAX);                \
+ })
+
+To figure out the length to store the string. It may be slightly slower as
+it needs to run the vsnprintf() twice, but it now saves space on the ring
+buffer.
+
+Link: https://lkml.kernel.org/r/20220705224749.053570613@goodmis.org
+
+Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Leon Romanovsky <leon@kernel.org>
+Cc: Kalle Valo <kvalo@kernel.org>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Arend van Spriel <aspriel@gmail.com>
+Cc: Franky Lin <franky.lin@broadcom.com>
+Cc: Hante Meuleman <hante.meuleman@broadcom.com>
+Cc: Gregory Greenman <gregory.greenman@intel.com>
+Cc: Peter Chen <peter.chen@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Mathias Nyman <mathias.nyman@intel.com>
+Cc: Chunfeng Yun <chunfeng.yun@mediatek.com>
+Cc: Bin Liu <b-liu@ti.com>
+Cc: Marek Lindner <mareklindner@neomailbox.ch>
+Cc: Simon Wunderlich <sw@simonwunderlich.de>
+Cc: Antonio Quartulli <a@unstable.cc>
+Cc: Sven Eckelmann <sven@narfation.org>
+Cc: Johannes Berg <johannes@sipsolutions.net>
+Cc: Jim Cromie <jim.cromie@gmail.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/trace_events.h                 | 18 ++++++++++++++++++
+ include/trace/stages/stage1_struct_define.h  |  3 +++
+ include/trace/stages/stage2_data_offsets.h   |  3 +++
+ include/trace/stages/stage4_event_fields.h   |  3 +++
+ include/trace/stages/stage5_get_offsets.h    |  4 ++++
+ include/trace/stages/stage6_event_callback.h |  7 +++++++
+ 6 files changed, 38 insertions(+)
+
+diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
+index e6e95a9f07a5..b18759a673c6 100644
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -916,6 +916,24 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
+ #endif
++#define TRACE_EVENT_STR_MAX   512
++
++/*
++ * gcc warns that you can not use a va_list in an inlined
++ * function. But lets me make it into a macro :-/
++ */
++#define __trace_event_vstr_len(fmt, va)                       \
++({                                                    \
++      va_list __ap;                                   \
++      int __ret;                                      \
++                                                      \
++      va_copy(__ap, *(va));                           \
++      __ret = vsnprintf(NULL, 0, fmt, __ap) + 1;      \
++      va_end(__ap);                                   \
++                                                      \
++      min(__ret, TRACE_EVENT_STR_MAX);                \
++})
++
+ #endif /* _LINUX_TRACE_EVENT_H */
+ /*
+diff --git a/include/trace/stages/stage1_struct_define.h b/include/trace/stages/stage1_struct_define.h
+index a16783419687..1b7bab60434c 100644
+--- a/include/trace/stages/stage1_struct_define.h
++++ b/include/trace/stages/stage1_struct_define.h
+@@ -26,6 +26,9 @@
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, -1)
++#undef __vstring
++#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1)
++
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
+diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h
+index 42fd1e8813ec..1b7a8f764fdd 100644
+--- a/include/trace/stages/stage2_data_offsets.h
++++ b/include/trace/stages/stage2_data_offsets.h
+@@ -32,6 +32,9 @@
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, -1)
++#undef __vstring
++#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1)
++
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
+index e80cdc397a43..c3790ec7a453 100644
+--- a/include/trace/stages/stage4_event_fields.h
++++ b/include/trace/stages/stage4_event_fields.h
+@@ -38,6 +38,9 @@
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, -1)
++#undef __vstring
++#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1)
++
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h
+index 7ee5931300e6..fba4c24ed9e6 100644
+--- a/include/trace/stages/stage5_get_offsets.h
++++ b/include/trace/stages/stage5_get_offsets.h
+@@ -39,6 +39,10 @@
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1)
++#undef __vstring
++#define __vstring(item, fmt, ap) __dynamic_array(char, item,          \
++                    __trace_event_vstr_len(fmt, ap))
++
+ #undef __rel_dynamic_array
+ #define __rel_dynamic_array(type, item, len)                          \
+       __item_length = (len) * sizeof(type);                           \
+diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h
+index e1724f73594b..0f51f6b3ab70 100644
+--- a/include/trace/stages/stage6_event_callback.h
++++ b/include/trace/stages/stage6_event_callback.h
+@@ -24,6 +24,9 @@
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, -1)
++#undef __vstring
++#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1)
++
+ #undef __assign_str
+ #define __assign_str(dst, src)                                                \
+       strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)");
+@@ -35,6 +38,10 @@
+               __get_str(dst)[len] = '\0';                             \
+       } while(0)
++#undef __assign_vstr
++#define __assign_vstr(dst, fmt, va)                                   \
++      vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, *(va))
++
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+-- 
+2.35.1
+
diff --git a/queue-5.18/tracing-use-a-struct-alignof-to-determine-trace-even.patch b/queue-5.18/tracing-use-a-struct-alignof-to-determine-trace-even.patch
new file mode 100644 (file)
index 0000000..1ea5bee
--- /dev/null
@@ -0,0 +1,80 @@
+From 1551cb64c9964c8e7535b88210b5993eb066ab5c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 31 Jul 2022 01:59:28 -0400
+Subject: tracing: Use a struct alignof to determine trace event field
+ alignment
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit 4c3d2f9388d36eb28640a220a6f908328442d873 ]
+
+alignof() gives an alignment of types as they would be as standalone
+variables. But alignment in structures might be different, and when
+building the fields of events, the alignment must be the actual
+alignment otherwise the field offsets may not match what they actually
+are.
+
+This caused trace-cmd to crash, as libtraceevent did not check if the
+field offset was bigger than the event. The write_msr and read_msr
+events on 32 bit had their fields incorrect, because it had a u64 field
+between two ints. alignof(u64) would give 8, but the u64 field was at a
+4 byte alignment.
+
+Define a macro as:
+
+   ALIGN_STRUCTFIELD(type) ((int)(offsetof(struct {char a; type b;}, b)))
+
+which gives the actual alignment of types in a structure.
+
+Link: https://lkml.kernel.org/r/20220731015928.7ab3a154@rorschach.local.home
+
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: stable@vger.kernel.org
+Fixes: 04ae87a52074e ("ftrace: Rework event_create_dir()")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/stages/stage4_event_fields.h | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
+index c3790ec7a453..80d34f396555 100644
+--- a/include/trace/stages/stage4_event_fields.h
++++ b/include/trace/stages/stage4_event_fields.h
+@@ -2,16 +2,18 @@
+ /* Stage 4 definitions for creating trace events */
++#define ALIGN_STRUCTFIELD(type) ((int)(offsetof(struct {char a; type b;}, b)))
++
+ #undef __field_ext
+ #define __field_ext(_type, _item, _filter_type) {                     \
+       .type = #_type, .name = #_item,                                 \
+-      .size = sizeof(_type), .align = __alignof__(_type),             \
++      .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type),       \
+       .is_signed = is_signed_type(_type), .filter_type = _filter_type },
+ #undef __field_struct_ext
+ #define __field_struct_ext(_type, _item, _filter_type) {              \
+       .type = #_type, .name = #_item,                                 \
+-      .size = sizeof(_type), .align = __alignof__(_type),             \
++      .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type),       \
+       0, .filter_type = _filter_type },
+ #undef __field
+@@ -23,7 +25,7 @@
+ #undef __array
+ #define __array(_type, _item, _len) {                                 \
+       .type = #_type"["__stringify(_len)"]", .name = #_item,          \
+-      .size = sizeof(_type[_len]), .align = __alignof__(_type),       \
++      .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \
+       .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
+ #undef __dynamic_array
+-- 
+2.35.1
+
diff --git a/queue-5.18/tty-8250-add-support-for-brainboxes-px-cards.patch b/queue-5.18/tty-8250-add-support-for-brainboxes-px-cards.patch
new file mode 100644 (file)
index 0000000..b7778fa
--- /dev/null
@@ -0,0 +1,147 @@
+From 16376f07f879be2fc73315678d2b0862b01399b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Jul 2022 16:35:10 +0100
+Subject: tty: 8250: Add support for Brainboxes PX cards.
+
+From: Cameron Williams <cang1@live.co.uk>
+
+[ Upstream commit ef5a03a26c87a760bc3d86b5af7b773e82f8b1b7 ]
+
+Add support for some of the Brainboxes PCIe (PX) range of
+serial cards, including the PX-101, PX-235/PX-246,
+PX-203/PX-257, PX-260/PX-701, PX-310, PX-313,
+PX-320/PX-324/PX-376/PX-387, PX-335/PX-346, PX-368, PX-420,
+PX-803 and PX-846.
+
+Signed-off-by: Cameron Williams <cang1@live.co.uk>
+Cc: stable <stable@kernel.org>
+Link: https://lore.kernel.org/r/AM5PR0202MB2564669252BDC59BF55A6E87C4879@AM5PR0202MB2564.eurprd02.prod.outlook.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/8250/8250_pci.c | 109 +++++++++++++++++++++++++++++
+ 1 file changed, 109 insertions(+)
+
+diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
+index 818ed6cd3132..aeac20f7cbb2 100644
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -5063,6 +5063,115 @@ static const struct pci_device_id serial_pci_tbl[] = {
+               PCI_ANY_ID, PCI_ANY_ID,
+               0, 0,
+               pbn_b2_4_115200 },
++      /*
++       * Brainboxes PX-101
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4005,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_2_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4019,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_2_15625000 },
++      /*
++       * Brainboxes PX-235/246
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4004,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_1_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4016,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_1_15625000 },
++      /*
++       * Brainboxes PX-203/PX-257
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4006,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_2_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4015,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_4_15625000 },
++      /*
++       * Brainboxes PX-260/PX-701
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x400A,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_4_15625000 },
++      /*
++       * Brainboxes PX-310
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x400E,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_2_15625000 },
++      /*
++       * Brainboxes PX-313
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x400C,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_2_15625000 },
++      /*
++       * Brainboxes PX-320/324/PX-376/PX-387
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x400B,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_1_15625000 },
++      /*
++       * Brainboxes PX-335/346
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x400F,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_4_15625000 },
++      /*
++       * Brainboxes PX-368
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4010,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_4_15625000 },
++      /*
++       * Brainboxes PX-420
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4000,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_4_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4011,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_4_15625000 },
++      /*
++       * Brainboxes PX-803
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4009,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_1_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x401E,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_1_15625000 },
++      /*
++       * Brainboxes PX-846
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4008,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_1_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4017,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_1_15625000 },
++
+       /*
+        * Perle PCI-RAS cards
+        */
+-- 
+2.35.1
+
diff --git a/queue-5.18/usbnet-smsc95xx-avoid-link-settings-race-on-interrup.patch b/queue-5.18/usbnet-smsc95xx-avoid-link-settings-race-on-interrup.patch
new file mode 100644 (file)
index 0000000..affb52b
--- /dev/null
@@ -0,0 +1,122 @@
+From a7b73d5774f2b3d6656d48dbee4b2f995629a2c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 May 2022 10:42:04 +0200
+Subject: usbnet: smsc95xx: Avoid link settings race on interrupt reception
+
+From: Lukas Wunner <lukas@wunner.de>
+
+[ Upstream commit 8960f878e39fadc03d74292a6731f1e914cf2019 ]
+
+When a PHY interrupt is signaled, the SMSC LAN95xx driver updates the
+MAC full duplex mode and PHY flow control registers based on cached data
+in struct phy_device:
+
+  smsc95xx_status()                 # raises EVENT_LINK_RESET
+    usbnet_deferred_kevent()
+      smsc95xx_link_reset()         # uses cached data in phydev
+
+Simultaneously, phylib polls link status once per second and updates
+that cached data:
+
+  phy_state_machine()
+    phy_check_link_status()
+      phy_read_status()
+        lan87xx_read_status()
+          genphy_read_status()      # updates cached data in phydev
+
+If smsc95xx_link_reset() wins the race against genphy_read_status(),
+the registers may be updated based on stale data.
+
+E.g. if the link was previously down, phydev->duplex is set to
+DUPLEX_UNKNOWN and that's what smsc95xx_link_reset() will use, even
+though genphy_read_status() may update it to DUPLEX_FULL afterwards.
+
+PHY interrupts are currently only enabled on suspend to trigger wakeup,
+so the impact of the race is limited, but we're about to enable them
+perpetually.
+
+Avoid the race by delaying execution of smsc95xx_link_reset() until
+phy_state_machine() has done its job and calls back via
+smsc95xx_handle_link_change().
+
+Signaling EVENT_LINK_RESET on wakeup is not necessary because phylib
+picks up link status changes through polling.  So drop the declaration
+of a ->link_reset() callback.
+
+Note that the semicolon on a line by itself added in smsc95xx_status()
+is a placeholder for a function call which will be added in a subsequent
+commit.  That function call will actually handle the INT_ENP_PHY_INT_
+interrupt.
+
+Tested-by: Oleksij Rempel <o.rempel@pengutronix.de> # LAN9514/9512/9500
+Tested-by: Ferry Toth <fntoth@gmail.com> # LAN9514
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/smsc95xx.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
+index 2cb44d65bbc3..f5a208948d22 100644
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -566,7 +566,7 @@ static int smsc95xx_phy_update_flowcontrol(struct usbnet *dev)
+       return smsc95xx_write_reg(dev, AFC_CFG, afc_cfg);
+ }
+-static int smsc95xx_link_reset(struct usbnet *dev)
++static void smsc95xx_mac_update_fullduplex(struct usbnet *dev)
+ {
+       struct smsc95xx_priv *pdata = dev->driver_priv;
+       unsigned long flags;
+@@ -583,14 +583,16 @@ static int smsc95xx_link_reset(struct usbnet *dev)
+       spin_unlock_irqrestore(&pdata->mac_cr_lock, flags);
+       ret = smsc95xx_write_reg(dev, MAC_CR, pdata->mac_cr);
+-      if (ret < 0)
+-              return ret;
++      if (ret < 0) {
++              if (ret != -ENODEV)
++                      netdev_warn(dev->net,
++                                  "Error updating MAC full duplex mode\n");
++              return;
++      }
+       ret = smsc95xx_phy_update_flowcontrol(dev);
+       if (ret < 0)
+               netdev_warn(dev->net, "Error updating PHY flow control\n");
+-
+-      return ret;
+ }
+ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
+@@ -607,7 +609,7 @@ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
+       netif_dbg(dev, link, dev->net, "intdata: 0x%08X\n", intdata);
+       if (intdata & INT_ENP_PHY_INT_)
+-              usbnet_defer_kevent(dev, EVENT_LINK_RESET);
++              ;
+       else
+               netdev_warn(dev->net, "unexpected interrupt, intdata=0x%08X\n",
+                           intdata);
+@@ -1088,6 +1090,7 @@ static void smsc95xx_handle_link_change(struct net_device *net)
+       struct usbnet *dev = netdev_priv(net);
+       phy_print_status(net->phydev);
++      smsc95xx_mac_update_fullduplex(dev);
+       usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
+ }
+@@ -1993,7 +1996,6 @@ static const struct driver_info smsc95xx_info = {
+       .description    = "smsc95xx USB 2.0 Ethernet",
+       .bind           = smsc95xx_bind,
+       .unbind         = smsc95xx_unbind,
+-      .link_reset     = smsc95xx_link_reset,
+       .reset          = smsc95xx_reset,
+       .check_connect  = smsc95xx_start_phy,
+       .stop           = smsc95xx_stop,
+-- 
+2.35.1
+
diff --git a/queue-5.18/usbnet-smsc95xx-don-t-clear-read-only-phy-interrupt.patch b/queue-5.18/usbnet-smsc95xx-don-t-clear-read-only-phy-interrupt.patch
new file mode 100644 (file)
index 0000000..9ae8efb
--- /dev/null
@@ -0,0 +1,51 @@
+From 4a8d053acd3acd06d45b12cd8902178fdaf18acc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 May 2022 10:42:02 +0200
+Subject: usbnet: smsc95xx: Don't clear read-only PHY interrupt
+
+From: Lukas Wunner <lukas@wunner.de>
+
+[ Upstream commit 3108871f19221372b251f7da1ac38736928b5b3a ]
+
+Upon receiving data from the Interrupt Endpoint, the SMSC LAN95xx driver
+attempts to clear the signaled interrupts by writing "all ones" to the
+Interrupt Status Register.
+
+However the driver only ever enables a single type of interrupt, namely
+the PHY Interrupt.  And according to page 119 of the LAN950x datasheet,
+its bit in the Interrupt Status Register is read-only.  There's no other
+way to clear it than in a separate PHY register:
+
+https://www.microchip.com/content/dam/mchp/documents/UNG/ProductDocuments/DataSheets/LAN950x-Data-Sheet-DS00001875D.pdf
+
+Consequently, writing "all ones" to the Interrupt Status Register is
+pointless and can be dropped.
+
+Tested-by: Oleksij Rempel <o.rempel@pengutronix.de> # LAN9514/9512/9500
+Tested-by: Ferry Toth <fntoth@gmail.com> # LAN9514
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/smsc95xx.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
+index edf0492ad489..2cb44d65bbc3 100644
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -572,10 +572,6 @@ static int smsc95xx_link_reset(struct usbnet *dev)
+       unsigned long flags;
+       int ret;
+-      ret = smsc95xx_write_reg(dev, INT_STS, INT_STS_CLEAR_ALL_);
+-      if (ret < 0)
+-              return ret;
+-
+       spin_lock_irqsave(&pdata->mac_cr_lock, flags);
+       if (pdata->phydev->duplex != DUPLEX_FULL) {
+               pdata->mac_cr &= ~MAC_CR_FDPX_;
+-- 
+2.35.1
+
diff --git a/queue-5.18/usbnet-smsc95xx-fix-deadlock-on-runtime-resume.patch b/queue-5.18/usbnet-smsc95xx-fix-deadlock-on-runtime-resume.patch
new file mode 100644 (file)
index 0000000..53522e1
--- /dev/null
@@ -0,0 +1,193 @@
+From df274cfed28127dbf7d1d06f05318fa92faec8b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Jul 2022 22:47:51 +0200
+Subject: usbnet: smsc95xx: Fix deadlock on runtime resume
+
+From: Lukas Wunner <lukas@wunner.de>
+
+[ Upstream commit 7b960c967f2aa01ab8f45c5a0bd78e754cffdeee ]
+
+Commit 05b35e7eb9a1 ("smsc95xx: add phylib support") amended
+smsc95xx_resume() to call phy_init_hw().  That function waits for the
+device to runtime resume even though it is placed in the runtime resume
+path, causing a deadlock.
+
+The problem is that phy_init_hw() calls down to smsc95xx_mdiobus_read(),
+which never uses the _nopm variant of usbnet_read_cmd().
+
+Commit b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with
+reset operation") causes a similar deadlock on resume if the device was
+already runtime suspended when entering system sleep:
+
+That's because the commit introduced smsc95xx_reset_resume(), which
+calls down to smsc95xx_reset(), which neglects to use _nopm accessors.
+
+Fix by auto-detecting whether a device access is performed by the
+suspend/resume task_struct and use the _nopm variant if so.  This works
+because the PM core guarantees that suspend/resume callbacks are run in
+task context.
+
+Stacktrace for posterity:
+
+  INFO: task kworker/2:1:49 blocked for more than 122 seconds.
+  Workqueue: usb_hub_wq hub_event
+  schedule
+  rpm_resume
+  __pm_runtime_resume
+  usb_autopm_get_interface
+  usbnet_read_cmd
+  __smsc95xx_read_reg
+  __smsc95xx_phy_wait_not_busy
+  __smsc95xx_mdio_read
+  smsc95xx_mdiobus_read
+  __mdiobus_read
+  mdiobus_read
+  smsc_phy_reset
+  phy_init_hw
+  smsc95xx_resume
+  usb_resume_interface
+  usb_resume_both
+  usb_runtime_resume
+  __rpm_callback
+  rpm_callback
+  rpm_resume
+  __pm_runtime_resume
+  usb_autoresume_device
+  hub_event
+  process_one_work
+
+Fixes: b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation")
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Cc: stable@vger.kernel.org # v3.16+
+Cc: Andre Edich <andre.edich@microchip.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/smsc95xx.c | 26 ++++++++++++++++++++------
+ 1 file changed, 20 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
+index 358b170cc8fb..515363d74078 100644
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -71,6 +71,7 @@ struct smsc95xx_priv {
+       struct fwnode_handle *irqfwnode;
+       struct mii_bus *mdiobus;
+       struct phy_device *phydev;
++      struct task_struct *pm_task;
+ };
+ static bool turbo_mode = true;
+@@ -80,13 +81,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
+ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
+                                           u32 *data, int in_pm)
+ {
++      struct smsc95xx_priv *pdata = dev->driver_priv;
+       u32 buf;
+       int ret;
+       int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
+       BUG_ON(!dev);
+-      if (!in_pm)
++      if (current != pdata->pm_task)
+               fn = usbnet_read_cmd;
+       else
+               fn = usbnet_read_cmd_nopm;
+@@ -110,13 +112,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
+ static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
+                                            u32 data, int in_pm)
+ {
++      struct smsc95xx_priv *pdata = dev->driver_priv;
+       u32 buf;
+       int ret;
+       int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
+       BUG_ON(!dev);
+-      if (!in_pm)
++      if (current != pdata->pm_task)
+               fn = usbnet_write_cmd;
+       else
+               fn = usbnet_write_cmd_nopm;
+@@ -1508,9 +1511,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
+       u32 val, link_up;
+       int ret;
++      pdata->pm_task = current;
++
+       ret = usbnet_suspend(intf, message);
+       if (ret < 0) {
+               netdev_warn(dev->net, "usbnet_suspend error\n");
++              pdata->pm_task = NULL;
+               return ret;
+       }
+@@ -1750,6 +1756,7 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
+       if (ret && PMSG_IS_AUTO(message))
+               usbnet_resume(intf);
++      pdata->pm_task = NULL;
+       return ret;
+ }
+@@ -1770,29 +1777,31 @@ static int smsc95xx_resume(struct usb_interface *intf)
+       /* do this first to ensure it's cleared even in error case */
+       pdata->suspend_flags = 0;
++      pdata->pm_task = current;
++
+       if (suspend_flags & SUSPEND_ALLMODES) {
+               /* clear wake-up sources */
+               ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val);
+               if (ret < 0)
+-                      return ret;
++                      goto done;
+               val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_);
+               ret = smsc95xx_write_reg_nopm(dev, WUCSR, val);
+               if (ret < 0)
+-                      return ret;
++                      goto done;
+               /* clear wake-up status */
+               ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
+               if (ret < 0)
+-                      return ret;
++                      goto done;
+               val &= ~PM_CTL_WOL_EN_;
+               val |= PM_CTL_WUPS_;
+               ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
+               if (ret < 0)
+-                      return ret;
++                      goto done;
+       }
+       phy_init_hw(pdata->phydev);
+@@ -1801,15 +1810,20 @@ static int smsc95xx_resume(struct usb_interface *intf)
+       if (ret < 0)
+               netdev_warn(dev->net, "usbnet_resume error\n");
++done:
++      pdata->pm_task = NULL;
+       return ret;
+ }
+ static int smsc95xx_reset_resume(struct usb_interface *intf)
+ {
+       struct usbnet *dev = usb_get_intfdata(intf);
++      struct smsc95xx_priv *pdata = dev->driver_priv;
+       int ret;
++      pdata->pm_task = current;
+       ret = smsc95xx_reset(dev);
++      pdata->pm_task = NULL;
+       if (ret < 0)
+               return ret;
+-- 
+2.35.1
+
diff --git a/queue-5.18/usbnet-smsc95xx-forward-phy-interrupts-to-phy-driver.patch b/queue-5.18/usbnet-smsc95xx-forward-phy-interrupts-to-phy-driver.patch
new file mode 100644 (file)
index 0000000..4fa83b0
--- /dev/null
@@ -0,0 +1,318 @@
+From 0a0b027602d0b6c0b8dcdfc10ac37e8ec0d648d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 May 2022 10:42:05 +0200
+Subject: usbnet: smsc95xx: Forward PHY interrupts to PHY driver to avoid
+ polling
+
+From: Lukas Wunner <lukas@wunner.de>
+
+[ Upstream commit 1ce8b37241ed291af56f7a49bbdbf20c08728e88 ]
+
+Link status of SMSC LAN95xx chips is polled once per second, even though
+they're capable of signaling PHY interrupts through the MAC layer.
+
+Forward those interrupts to the PHY driver to avoid polling.  Benefits
+are reduced bus traffic, reduced CPU overhead and quicker interface
+bringup.
+
+Polling was introduced in 2016 by commit d69d16949346 ("usbnet:
+smsc95xx: fix link detection for disabled autonegotiation").
+Back then, the LAN95xx driver neglected to enable the ENERGYON interrupt,
+hence couldn't detect link-up events when auto-negotiation was disabled.
+The proper solution would have been to enable the ENERGYON interrupt
+instead of polling.
+
+Since then, PHY handling was moved from the LAN95xx driver to the SMSC
+PHY driver with commit 05b35e7eb9a1 ("smsc95xx: add phylib support").
+That PHY driver is capable of link detection with auto-negotiation
+disabled because it enables the ENERGYON interrupt.
+
+Note that signaling interrupts through the MAC layer not only works with
+the integrated PHY, but also with an external PHY, provided its
+interrupt pin is attached to LAN95xx's nPHY_INT pin.
+
+In the unlikely event that the interrupt pin of an external PHY is
+attached to a GPIO of the SoC (or not connected at all), the driver can
+be amended to retrieve the irq from the PHY's of_node.
+
+To forward PHY interrupts to phylib, it is not sufficient to call
+phy_mac_interrupt().  Instead, the PHY's interrupt handler needs to run
+so that PHY interrupts are cleared.  That's because according to page
+119 of the LAN950x datasheet, "The source of this interrupt is a level.
+The interrupt persists until it is cleared in the PHY."
+
+https://www.microchip.com/content/dam/mchp/documents/UNG/ProductDocuments/DataSheets/LAN950x-Data-Sheet-DS00001875D.pdf
+
+Therefore, create an IRQ domain with a single IRQ for the PHY.  In the
+future, the IRQ domain may be extended to support the 11 GPIOs on the
+LAN95xx.
+
+Normally the PHY interrupt should be masked until the PHY driver has
+cleared it.  However masking requires a (sleeping) USB transaction and
+interrupts are received in (non-sleepable) softirq context.  I decided
+not to mask the interrupt at all (by using the dummy_irq_chip's noop
+->irq_mask() callback):  The USB interrupt endpoint is polled in 1 msec
+intervals and normally that's sufficient to wake the PHY driver's IRQ
+thread and have it clear the interrupt.  If it does take longer, worst
+thing that can happen is the IRQ thread is woken again.  No big deal.
+
+Because PHY interrupts are now perpetually enabled, there's no need to
+selectively enable them on suspend.  So remove all invocations of
+smsc95xx_enable_phy_wakeup_interrupts().
+
+In smsc95xx_resume(), move the call of phy_init_hw() before
+usbnet_resume() (which restarts the status URB) to ensure that the PHY
+is fully initialized when an interrupt is handled.
+
+Tested-by: Oleksij Rempel <o.rempel@pengutronix.de> # LAN9514/9512/9500
+Tested-by: Ferry Toth <fntoth@gmail.com> # LAN9514
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch> # from a PHY perspective
+Cc: Andre Edich <andre.edich@microchip.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/smsc95xx.c | 113 ++++++++++++++++++++-----------------
+ 1 file changed, 61 insertions(+), 52 deletions(-)
+
+diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
+index f5a208948d22..358b170cc8fb 100644
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -18,6 +18,8 @@
+ #include <linux/usb/usbnet.h>
+ #include <linux/slab.h>
+ #include <linux/of_net.h>
++#include <linux/irq.h>
++#include <linux/irqdomain.h>
+ #include <linux/mdio.h>
+ #include <linux/phy.h>
+ #include <net/selftests.h>
+@@ -53,6 +55,9 @@
+ #define SUSPEND_ALLMODES              (SUSPEND_SUSPEND0 | SUSPEND_SUSPEND1 | \
+                                        SUSPEND_SUSPEND2 | SUSPEND_SUSPEND3)
++#define SMSC95XX_NR_IRQS              (1) /* raise to 12 for GPIOs */
++#define PHY_HWIRQ                     (SMSC95XX_NR_IRQS - 1)
++
+ struct smsc95xx_priv {
+       u32 mac_cr;
+       u32 hash_hi;
+@@ -61,6 +66,9 @@ struct smsc95xx_priv {
+       spinlock_t mac_cr_lock;
+       u8 features;
+       u8 suspend_flags;
++      struct irq_chip irqchip;
++      struct irq_domain *irqdomain;
++      struct fwnode_handle *irqfwnode;
+       struct mii_bus *mdiobus;
+       struct phy_device *phydev;
+ };
+@@ -597,6 +605,8 @@ static void smsc95xx_mac_update_fullduplex(struct usbnet *dev)
+ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
+ {
++      struct smsc95xx_priv *pdata = dev->driver_priv;
++      unsigned long flags;
+       u32 intdata;
+       if (urb->actual_length != 4) {
+@@ -608,11 +618,15 @@ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
+       intdata = get_unaligned_le32(urb->transfer_buffer);
+       netif_dbg(dev, link, dev->net, "intdata: 0x%08X\n", intdata);
++      local_irq_save(flags);
++
+       if (intdata & INT_ENP_PHY_INT_)
+-              ;
++              generic_handle_domain_irq(pdata->irqdomain, PHY_HWIRQ);
+       else
+               netdev_warn(dev->net, "unexpected interrupt, intdata=0x%08X\n",
+                           intdata);
++
++      local_irq_restore(flags);
+ }
+ /* Enable or disable Tx & Rx checksum offload engines */
+@@ -1098,8 +1112,9 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
+ {
+       struct smsc95xx_priv *pdata;
+       bool is_internal_phy;
++      char usb_path[64];
++      int ret, phy_irq;
+       u32 val;
+-      int ret;
+       printk(KERN_INFO SMSC_CHIPNAME " v" SMSC_DRIVER_VERSION "\n");
+@@ -1139,10 +1154,38 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
+       if (ret)
+               goto free_pdata;
++      /* create irq domain for use by PHY driver and GPIO consumers */
++      usb_make_path(dev->udev, usb_path, sizeof(usb_path));
++      pdata->irqfwnode = irq_domain_alloc_named_fwnode(usb_path);
++      if (!pdata->irqfwnode) {
++              ret = -ENOMEM;
++              goto free_pdata;
++      }
++
++      pdata->irqdomain = irq_domain_create_linear(pdata->irqfwnode,
++                                                  SMSC95XX_NR_IRQS,
++                                                  &irq_domain_simple_ops,
++                                                  pdata);
++      if (!pdata->irqdomain) {
++              ret = -ENOMEM;
++              goto free_irqfwnode;
++      }
++
++      phy_irq = irq_create_mapping(pdata->irqdomain, PHY_HWIRQ);
++      if (!phy_irq) {
++              ret = -ENOENT;
++              goto remove_irqdomain;
++      }
++
++      pdata->irqchip = dummy_irq_chip;
++      pdata->irqchip.name = SMSC_CHIPNAME;
++      irq_set_chip_and_handler_name(phy_irq, &pdata->irqchip,
++                                    handle_simple_irq, "phy");
++
+       pdata->mdiobus = mdiobus_alloc();
+       if (!pdata->mdiobus) {
+               ret = -ENOMEM;
+-              goto free_pdata;
++              goto dispose_irq;
+       }
+       ret = smsc95xx_read_reg(dev, HW_CFG, &val);
+@@ -1175,6 +1218,7 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
+               goto unregister_mdio;
+       }
++      pdata->phydev->irq = phy_irq;
+       pdata->phydev->is_internal = is_internal_phy;
+       /* detect device revision as different features may be available */
+@@ -1217,6 +1261,15 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
+ free_mdio:
+       mdiobus_free(pdata->mdiobus);
++dispose_irq:
++      irq_dispose_mapping(phy_irq);
++
++remove_irqdomain:
++      irq_domain_remove(pdata->irqdomain);
++
++free_irqfwnode:
++      irq_domain_free_fwnode(pdata->irqfwnode);
++
+ free_pdata:
+       kfree(pdata);
+       return ret;
+@@ -1229,6 +1282,9 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf)
+       phy_disconnect(dev->net->phydev);
+       mdiobus_unregister(pdata->mdiobus);
+       mdiobus_free(pdata->mdiobus);
++      irq_dispose_mapping(irq_find_mapping(pdata->irqdomain, PHY_HWIRQ));
++      irq_domain_remove(pdata->irqdomain);
++      irq_domain_free_fwnode(pdata->irqfwnode);
+       netif_dbg(dev, ifdown, dev->net, "free pdata\n");
+       kfree(pdata);
+ }
+@@ -1253,29 +1309,6 @@ static u32 smsc_crc(const u8 *buffer, size_t len, int filter)
+       return crc << ((filter % 2) * 16);
+ }
+-static int smsc95xx_enable_phy_wakeup_interrupts(struct usbnet *dev, u16 mask)
+-{
+-      int ret;
+-
+-      netdev_dbg(dev->net, "enabling PHY wakeup interrupts\n");
+-
+-      /* read to clear */
+-      ret = smsc95xx_mdio_read_nopm(dev, PHY_INT_SRC);
+-      if (ret < 0)
+-              return ret;
+-
+-      /* enable interrupt source */
+-      ret = smsc95xx_mdio_read_nopm(dev, PHY_INT_MASK);
+-      if (ret < 0)
+-              return ret;
+-
+-      ret |= mask;
+-
+-      smsc95xx_mdio_write_nopm(dev, PHY_INT_MASK, ret);
+-
+-      return 0;
+-}
+-
+ static int smsc95xx_link_ok_nopm(struct usbnet *dev)
+ {
+       int ret;
+@@ -1442,7 +1475,6 @@ static int smsc95xx_enter_suspend3(struct usbnet *dev)
+ static int smsc95xx_autosuspend(struct usbnet *dev, u32 link_up)
+ {
+       struct smsc95xx_priv *pdata = dev->driver_priv;
+-      int ret;
+       if (!netif_running(dev->net)) {
+               /* interface is ifconfig down so fully power down hw */
+@@ -1461,27 +1493,10 @@ static int smsc95xx_autosuspend(struct usbnet *dev, u32 link_up)
+               }
+               netdev_dbg(dev->net, "autosuspend entering SUSPEND1\n");
+-
+-              /* enable PHY wakeup events for if cable is attached */
+-              ret = smsc95xx_enable_phy_wakeup_interrupts(dev,
+-                      PHY_INT_MASK_ANEG_COMP_);
+-              if (ret < 0) {
+-                      netdev_warn(dev->net, "error enabling PHY wakeup ints\n");
+-                      return ret;
+-              }
+-
+               netdev_info(dev->net, "entering SUSPEND1 mode\n");
+               return smsc95xx_enter_suspend1(dev);
+       }
+-      /* enable PHY wakeup events so we remote wakeup if cable is pulled */
+-      ret = smsc95xx_enable_phy_wakeup_interrupts(dev,
+-              PHY_INT_MASK_LINK_DOWN_);
+-      if (ret < 0) {
+-              netdev_warn(dev->net, "error enabling PHY wakeup ints\n");
+-              return ret;
+-      }
+-
+       netdev_dbg(dev->net, "autosuspend entering SUSPEND3\n");
+       return smsc95xx_enter_suspend3(dev);
+ }
+@@ -1547,13 +1562,6 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
+       }
+       if (pdata->wolopts & WAKE_PHY) {
+-              ret = smsc95xx_enable_phy_wakeup_interrupts(dev,
+-                      (PHY_INT_MASK_ANEG_COMP_ | PHY_INT_MASK_LINK_DOWN_));
+-              if (ret < 0) {
+-                      netdev_warn(dev->net, "error enabling PHY wakeup ints\n");
+-                      goto done;
+-              }
+-
+               /* if link is down then configure EDPD and enter SUSPEND1,
+                * otherwise enter SUSPEND0 below
+                */
+@@ -1787,11 +1795,12 @@ static int smsc95xx_resume(struct usb_interface *intf)
+                       return ret;
+       }
++      phy_init_hw(pdata->phydev);
++
+       ret = usbnet_resume(intf);
+       if (ret < 0)
+               netdev_warn(dev->net, "usbnet_resume error\n");
+-      phy_init_hw(pdata->phydev);
+       return ret;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.18/x86-kprobes-update-kcb-status-flag-after-singlestepp.patch b/queue-5.18/x86-kprobes-update-kcb-status-flag-after-singlestepp.patch
new file mode 100644 (file)
index 0000000..0a98447
--- /dev/null
@@ -0,0 +1,67 @@
+From 2eb8d6ab6b01411d529d883d9a23396b251b4c91 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Aug 2022 15:04:16 +0900
+Subject: x86/kprobes: Update kcb status flag after singlestepping
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit dec8784c9088b131a1523f582c2194cfc8107dc0 ]
+
+Fix kprobes to update kcb (kprobes control block) status flag to
+KPROBE_HIT_SSDONE even if the kp->post_handler is not set.
+
+This bug may cause a kernel panic if another INT3 user runs right
+after kprobes because kprobe_int3_handler() misunderstands the
+INT3 is kprobe's single stepping INT3.
+
+Fixes: 6256e668b7af ("x86/kprobes: Use int3 instead of debug trap for single-step")
+Reported-by: Daniel Müller <deso@posteo.net>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Daniel Müller <deso@posteo.net>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20220727210136.jjgc3lpqeq42yr3m@muellerd-fedora-PC2BDTX9
+Link: https://lore.kernel.org/r/165942025658.342061.12452378391879093249.stgit@devnote2
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/kprobes/core.c | 18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
+index 7c4ab8870da4..74167dc5f55e 100644
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -814,16 +814,20 @@ set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
+                              struct kprobe_ctlblk *kcb)
+ {
+-      if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+-              kcb->kprobe_status = KPROBE_HIT_SSDONE;
+-              cur->post_handler(cur, regs, 0);
+-      }
+-
+       /* Restore back the original saved kprobes variables and continue. */
+-      if (kcb->kprobe_status == KPROBE_REENTER)
++      if (kcb->kprobe_status == KPROBE_REENTER) {
++              /* This will restore both kcb and current_kprobe */
+               restore_previous_kprobe(kcb);
+-      else
++      } else {
++              /*
++               * Always update the kcb status because
++               * reset_curent_kprobe() doesn't update kcb.
++               */
++              kcb->kprobe_status = KPROBE_HIT_SSDONE;
++              if (cur->post_handler)
++                      cur->post_handler(cur, regs, 0);
+               reset_current_kprobe();
++      }
+ }
+ NOKPROBE_SYMBOL(kprobe_post_process);
+-- 
+2.35.1
+
diff --git a/queue-5.18/x86-olpc-fix-logical-not-is-only-applied-to-the-left.patch b/queue-5.18/x86-olpc-fix-logical-not-is-only-applied-to-the-left.patch
new file mode 100644 (file)
index 0000000..1b88106
--- /dev/null
@@ -0,0 +1,54 @@
+From b63e4693e67826d5eb07e8254daed08277df7133 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 17:15:36 +0200
+Subject: x86/olpc: fix 'logical not is only applied to the left hand side'
+
+From: Alexander Lobakin <alexandr.lobakin@intel.com>
+
+[ Upstream commit 3a2ba42cbd0b669ce3837ba400905f93dd06c79f ]
+
+The bitops compile-time optimization series revealed one more
+problem in olpc-xo1-sci.c:send_ebook_state(), resulted in GCC
+warnings:
+
+arch/x86/platform/olpc/olpc-xo1-sci.c: In function 'send_ebook_state':
+arch/x86/platform/olpc/olpc-xo1-sci.c:83:63: warning: logical not is only applied to the left hand side of comparison [-Wlogical-not-parentheses]
+   83 |         if (!!test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == state)
+      |                                                               ^~
+arch/x86/platform/olpc/olpc-xo1-sci.c:83:13: note: add parentheses around left hand side expression to silence this warning
+
+Despite this code working as intended, this redundant double
+negation of boolean value, together with comparing to `char`
+with no explicit conversion to bool, makes compilers think
+the author made some unintentional logical mistakes here.
+Make it the other way around and negate the char instead
+to silence the warnings.
+
+Fixes: d2aa37411b8e ("x86/olpc/xo1/sci: Produce wakeup events for buttons and switches")
+Cc: stable@vger.kernel.org # 3.5+
+Reported-by: Guenter Roeck <linux@roeck-us.net>
+Reported-by: kernel test robot <lkp@intel.com>
+Reviewed-and-tested-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Signed-off-by: Yury Norov <yury.norov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/platform/olpc/olpc-xo1-sci.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
+index f03a6883dcc6..89f25af4b3c3 100644
+--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
++++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
+@@ -80,7 +80,7 @@ static void send_ebook_state(void)
+               return;
+       }
+-      if (!!test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == state)
++      if (test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == !!state)
+               return; /* Nothing new to report. */
+       input_report_switch(ebook_switch_idev, SW_TABLET_MODE, state);
+-- 
+2.35.1
+