From: Greg Kroah-Hartman Date: Sun, 16 Oct 2022 11:25:26 +0000 (+0200) Subject: 5.19-stable patches X-Git-Tag: v5.4.219~138 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2684c70331f49ea58a10f51931fde3518074497e;p=thirdparty%2Fkernel%2Fstable-queue.git 5.19-stable patches added patches: cpufreq-qcom-cpufreq-hw-fix-uninitialized-throttled_freq-warning.patch mm-damon-validate-if-the-pmd-entry-is-present-before-accessing.patch mm-hugetlb-fix-races-when-looking-up-a-cont-pte-pmd-size-hugetlb-page.patch mm-mmap-undo-mmap-when-arch_validate_flags-fails.patch mm-uffd-fix-warning-without-pte_marker_uffd_wp-compiled-in.patch nfsd-protect-against-send-buffer-overflow-in-nfsv2-read.patch nfsd-protect-against-send-buffer-overflow-in-nfsv3-read.patch nfsd-protect-against-send-buffer-overflow-in-nfsv3-readdir.patch pci-sanitise-firmware-bar-assignments-behind-a-pci-pci-bridge.patch powercap-intel_rapl-use-standard-energy-unit-for-spr-dram-rapl-domain.patch powerpc-boot-explicitly-disable-usage-of-spe-instructions.patch powerpc-kconfig-fix-non-existing-config_ppc_fsl_booke.patch scsi-lpfc-rework-mib-rx-monitor-debug-info-logic.patch scsi-qedf-populate-sysfs-attributes-for-vport.patch serial-8250-let-drivers-request-full-16550a-feature-probing.patch serial-8250-request-full-16550a-feature-probing-for-oxsemi-pcie-devices.patch serial-cpm_uart-don-t-request-irq-too-early-for-console-port.patch serial-stm32-deassert-transmit-enable-on-rs485_config.patch slimbus-qcom-ngd-cleanup-in-probe-error-path.patch slimbus-qcom-ngd-use-correct-error-in-message-of-pdr_add_lookup-failure.patch xen-gntdev-accommodate-vma-splitting.patch xen-gntdev-prevent-leaking-grants.patch --- diff --git a/queue-5.19/cpufreq-qcom-cpufreq-hw-fix-uninitialized-throttled_freq-warning.patch b/queue-5.19/cpufreq-qcom-cpufreq-hw-fix-uninitialized-throttled_freq-warning.patch new file mode 100644 index 00000000000..1ec264150e7 --- /dev/null +++ b/queue-5.19/cpufreq-qcom-cpufreq-hw-fix-uninitialized-throttled_freq-warning.patch @@ -0,0 +1,52 @@ +From 91dc90fdb8b8199519a3aac9c46a433b02223c5b Mon Sep 17 00:00:00 2001 +From: Viresh Kumar +Date: Wed, 21 Sep 2022 12:30:38 +0530 +Subject: cpufreq: qcom-cpufreq-hw: Fix uninitialized throttled_freq warning + +From: Viresh Kumar + +commit 91dc90fdb8b8199519a3aac9c46a433b02223c5b upstream. + +Commit 6240aaad75e1 was supposed to drop the reference count to the OPP, +instead it avoided more stuff if the OPP isn't found. This isn't +entirely correct. We already have a frequency value available, we just +couldn't align it with an OPP in case of IS_ERR(opp). + +Lets continue with updating thermal pressure, etc, even if we aren't +able to find an OPP here. + +This fixes warning generated by the 'smatch' tool. + +Fixes: 6240aaad75e1 ("cpufreq: qcom-hw: fix the opp entries refcounting") +Cc: v5.18+ # v5.18+ +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Reviewed-by: Neil Armstrong +Signed-off-by: Viresh Kumar +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/qcom-cpufreq-hw.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/cpufreq/qcom-cpufreq-hw.c ++++ b/drivers/cpufreq/qcom-cpufreq-hw.c +@@ -317,14 +317,14 @@ static void qcom_lmh_dcvs_notify(struct + if (IS_ERR(opp)) { + dev_warn(dev, "Can't find the OPP for throttling: %pe!\n", opp); + } else { +- throttled_freq = freq_hz / HZ_PER_KHZ; +- +- /* Update thermal pressure (the boost frequencies are accepted) */ +- arch_update_thermal_pressure(policy->related_cpus, throttled_freq); +- + dev_pm_opp_put(opp); + } + ++ throttled_freq = freq_hz / HZ_PER_KHZ; ++ ++ /* Update thermal pressure (the boost frequencies are accepted) */ ++ arch_update_thermal_pressure(policy->related_cpus, throttled_freq); ++ + /* + * In the unlikely case policy is unregistered do not enable + * polling or h/w interrupt diff --git a/queue-5.19/mm-damon-validate-if-the-pmd-entry-is-present-before-accessing.patch b/queue-5.19/mm-damon-validate-if-the-pmd-entry-is-present-before-accessing.patch new file mode 100644 index 00000000000..14994e19ab1 --- /dev/null +++ b/queue-5.19/mm-damon-validate-if-the-pmd-entry-is-present-before-accessing.patch @@ -0,0 +1,66 @@ +From c8b9aff419303e4d4219b5ff64b1c7e062dee48e Mon Sep 17 00:00:00 2001 +From: Baolin Wang +Date: Thu, 18 Aug 2022 15:37:43 +0800 +Subject: mm/damon: validate if the pmd entry is present before accessing + +From: Baolin Wang + +commit c8b9aff419303e4d4219b5ff64b1c7e062dee48e upstream. + +pmd_huge() is used to validate if the pmd entry is mapped by a huge page, +also including the case of non-present (migration or hwpoisoned) pmd entry +on arm64 or x86 architectures. This means that pmd_pfn() can not get the +correct pfn number for a non-present pmd entry, which will cause +damon_get_page() to get an incorrect page struct (also may be NULL by +pfn_to_online_page()), making the access statistics incorrect. + +This means that the DAMON may make incorrect decision according to the +incorrect statistics, for example, DAMON may can not reclaim cold page +in time due to this cold page was regarded as accessed mistakenly if +DAMOS_PAGEOUT operation is specified. + +Moreover it does not make sense that we still waste time to get the page +of the non-present entry. Just treat it as not-accessed and skip it, +which maintains consistency with non-present pte level entries. + +So add pmd entry present validation to fix the above issues. + +Link: https://lkml.kernel.org/r/58b1d1f5fbda7db49ca886d9ef6783e3dcbbbc98.1660805030.git.baolin.wang@linux.alibaba.com +Fixes: 3f49584b262c ("mm/damon: implement primitives for the virtual memory address spaces") +Signed-off-by: Baolin Wang +Reviewed-by: SeongJae Park +Reviewed-by: Muchun Song +Cc: Mike Kravetz +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/vaddr.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/mm/damon/vaddr.c ++++ b/mm/damon/vaddr.c +@@ -304,6 +304,11 @@ static int damon_mkold_pmd_entry(pmd_t * + + if (pmd_huge(*pmd)) { + ptl = pmd_lock(walk->mm, pmd); ++ if (!pmd_present(*pmd)) { ++ spin_unlock(ptl); ++ return 0; ++ } ++ + if (pmd_huge(*pmd)) { + damon_pmdp_mkold(pmd, walk->mm, addr); + spin_unlock(ptl); +@@ -431,6 +436,11 @@ static int damon_young_pmd_entry(pmd_t * + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_huge(*pmd)) { + ptl = pmd_lock(walk->mm, pmd); ++ if (!pmd_present(*pmd)) { ++ spin_unlock(ptl); ++ return 0; ++ } ++ + if (!pmd_huge(*pmd)) { + spin_unlock(ptl); + goto regular_page; diff --git a/queue-5.19/mm-hugetlb-fix-races-when-looking-up-a-cont-pte-pmd-size-hugetlb-page.patch b/queue-5.19/mm-hugetlb-fix-races-when-looking-up-a-cont-pte-pmd-size-hugetlb-page.patch new file mode 100644 index 00000000000..ba4223e3360 --- /dev/null +++ b/queue-5.19/mm-hugetlb-fix-races-when-looking-up-a-cont-pte-pmd-size-hugetlb-page.patch @@ -0,0 +1,170 @@ +From fac35ba763ed07ba93154c95ffc0c4a55023707f Mon Sep 17 00:00:00 2001 +From: Baolin Wang +Date: Thu, 1 Sep 2022 18:41:31 +0800 +Subject: mm/hugetlb: fix races when looking up a CONT-PTE/PMD size hugetlb page + +From: Baolin Wang + +commit fac35ba763ed07ba93154c95ffc0c4a55023707f upstream. + +On some architectures (like ARM64), it can support CONT-PTE/PMD size +hugetlb, which means it can support not only PMD/PUD size hugetlb (2M and +1G), but also CONT-PTE/PMD size(64K and 32M) if a 4K page size specified. + +So when looking up a CONT-PTE size hugetlb page by follow_page(), it will +use pte_offset_map_lock() to get the pte entry lock for the CONT-PTE size +hugetlb in follow_page_pte(). However this pte entry lock is incorrect +for the CONT-PTE size hugetlb, since we should use huge_pte_lock() to get +the correct lock, which is mm->page_table_lock. + +That means the pte entry of the CONT-PTE size hugetlb under current pte +lock is unstable in follow_page_pte(), we can continue to migrate or +poison the pte entry of the CONT-PTE size hugetlb, which can cause some +potential race issues, even though they are under the 'pte lock'. + +For example, suppose thread A is trying to look up a CONT-PTE size hugetlb +page by move_pages() syscall under the lock, however antoher thread B can +migrate the CONT-PTE hugetlb page at the same time, which will cause +thread A to get an incorrect page, if thread A also wants to do page +migration, then data inconsistency error occurs. + +Moreover we have the same issue for CONT-PMD size hugetlb in +follow_huge_pmd(). + +To fix above issues, rename the follow_huge_pmd() as follow_huge_pmd_pte() +to handle PMD and PTE level size hugetlb, which uses huge_pte_lock() to +get the correct pte entry lock to make the pte entry stable. + +Mike said: + +Support for CONT_PMD/_PTE was added with bb9dd3df8ee9 ("arm64: hugetlb: +refactor find_num_contig()"). Patch series "Support for contiguous pte +hugepages", v4. However, I do not believe these code paths were +executed until migration support was added with 5480280d3f2d ("arm64/mm: +enable HugeTLB migration for contiguous bit HugeTLB pages") I would go +with 5480280d3f2d for the Fixes: targe. + +Link: https://lkml.kernel.org/r/635f43bdd85ac2615a58405da82b4d33c6e5eb05.1662017562.git.baolin.wang@linux.alibaba.com +Fixes: 5480280d3f2d ("arm64/mm: enable HugeTLB migration for contiguous bit HugeTLB pages") +Signed-off-by: Baolin Wang +Suggested-by: Mike Kravetz +Reviewed-by: Mike Kravetz +Cc: David Hildenbrand +Cc: Muchun Song +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/hugetlb.h | 8 ++++---- + mm/gup.c | 14 +++++++++++++- + mm/hugetlb.c | 27 +++++++++++++-------------- + 3 files changed, 30 insertions(+), 19 deletions(-) + +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -203,8 +203,8 @@ struct page *follow_huge_addr(struct mm_ + struct page *follow_huge_pd(struct vm_area_struct *vma, + unsigned long address, hugepd_t hpd, + int flags, int pdshift); +-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, +- pmd_t *pmd, int flags); ++struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, ++ int flags); + struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int flags); + struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, +@@ -308,8 +308,8 @@ static inline struct page *follow_huge_p + return NULL; + } + +-static inline struct page *follow_huge_pmd(struct mm_struct *mm, +- unsigned long address, pmd_t *pmd, int flags) ++static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, ++ unsigned long address, int flags) + { + return NULL; + } +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -531,6 +531,18 @@ static struct page *follow_page_pte(stru + if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == + (FOLL_PIN | FOLL_GET))) + return ERR_PTR(-EINVAL); ++ ++ /* ++ * Considering PTE level hugetlb, like continuous-PTE hugetlb on ++ * ARM64 architecture. ++ */ ++ if (is_vm_hugetlb_page(vma)) { ++ page = follow_huge_pmd_pte(vma, address, flags); ++ if (page) ++ return page; ++ return no_page_table(vma, flags); ++ } ++ + retry: + if (unlikely(pmd_bad(*pmd))) + return no_page_table(vma, flags); +@@ -663,7 +675,7 @@ static struct page *follow_pmd_mask(stru + if (pmd_none(pmdval)) + return no_page_table(vma, flags); + if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) { +- page = follow_huge_pmd(mm, address, pmd, flags); ++ page = follow_huge_pmd_pte(vma, address, flags); + if (page) + return page; + return no_page_table(vma, flags); +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -6906,12 +6906,13 @@ follow_huge_pd(struct vm_area_struct *vm + } + + struct page * __weak +-follow_huge_pmd(struct mm_struct *mm, unsigned long address, +- pmd_t *pmd, int flags) ++follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags) + { ++ struct hstate *h = hstate_vma(vma); ++ struct mm_struct *mm = vma->vm_mm; + struct page *page = NULL; + spinlock_t *ptl; +- pte_t pte; ++ pte_t *ptep, pte; + + /* + * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via +@@ -6921,17 +6922,15 @@ follow_huge_pmd(struct mm_struct *mm, un + return NULL; + + retry: +- ptl = pmd_lockptr(mm, pmd); +- spin_lock(ptl); +- /* +- * make sure that the address range covered by this pmd is not +- * unmapped from other threads. +- */ +- if (!pmd_huge(*pmd)) +- goto out; +- pte = huge_ptep_get((pte_t *)pmd); ++ ptep = huge_pte_offset(mm, address, huge_page_size(h)); ++ if (!ptep) ++ return NULL; ++ ++ ptl = huge_pte_lock(h, mm, ptep); ++ pte = huge_ptep_get(ptep); + if (pte_present(pte)) { +- page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); ++ page = pte_page(pte) + ++ ((address & ~huge_page_mask(h)) >> PAGE_SHIFT); + /* + * try_grab_page() should always succeed here, because: a) we + * hold the pmd (ptl) lock, and b) we've just checked that the +@@ -6947,7 +6946,7 @@ retry: + } else { + if (is_hugetlb_entry_migration(pte)) { + spin_unlock(ptl); +- __migration_entry_wait_huge((pte_t *)pmd, ptl); ++ __migration_entry_wait_huge(ptep, ptl); + goto retry; + } + /* diff --git a/queue-5.19/mm-mmap-undo-mmap-when-arch_validate_flags-fails.patch b/queue-5.19/mm-mmap-undo-mmap-when-arch_validate_flags-fails.patch new file mode 100644 index 00000000000..df4360c6381 --- /dev/null +++ b/queue-5.19/mm-mmap-undo-mmap-when-arch_validate_flags-fails.patch @@ -0,0 +1,112 @@ +From deb0f6562884b5b4beb883d73e66a7d3a1b96d99 Mon Sep 17 00:00:00 2001 +From: Carlos Llamas +Date: Fri, 30 Sep 2022 00:38:43 +0000 +Subject: mm/mmap: undo ->mmap() when arch_validate_flags() fails + +From: Carlos Llamas + +commit deb0f6562884b5b4beb883d73e66a7d3a1b96d99 upstream. + +Commit c462ac288f2c ("mm: Introduce arch_validate_flags()") added a late +check in mmap_region() to let architectures validate vm_flags. The check +needs to happen after calling ->mmap() as the flags can potentially be +modified during this callback. + +If arch_validate_flags() check fails we unmap and free the vma. However, +the error path fails to undo the ->mmap() call that previously succeeded +and depending on the specific ->mmap() implementation this translates to +reference increments, memory allocations and other operations what will +not be cleaned up. + +There are several places (mainly device drivers) where this is an issue. +However, one specific example is bpf_map_mmap() which keeps count of the +mappings in map->writecnt. The count is incremented on ->mmap() and then +decremented on vm_ops->close(). When arch_validate_flags() fails this +count is off since bpf_map_mmap_close() is never called. + +One can reproduce this issue in arm64 devices with MTE support. Here the +vm_flags are checked to only allow VM_MTE if VM_MTE_ALLOWED has been set +previously. From userspace then is enough to pass the PROT_MTE flag to +mmap() syscall to trigger the arch_validate_flags() failure. + +The following program reproduces this issue: + + #include + #include + #include + #include + #include + + int main(void) + { + union bpf_attr attr = { + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(long long), + .max_entries = 256, + .map_flags = BPF_F_MMAPABLE, + }; + int fd; + + fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); + mmap(NULL, 4096, PROT_WRITE | PROT_MTE, MAP_SHARED, fd, 0); + + return 0; + } + +By manually adding some log statements to the vm_ops callbacks we can +confirm that when passing PROT_MTE to mmap() the map->writecnt is off upon +->release(): + +With PROT_MTE flag: + root@debian:~# ./bpf-test + [ 111.263874] bpf_map_write_active_inc: map=9 writecnt=1 + [ 111.288763] bpf_map_release: map=9 writecnt=1 + +Without PROT_MTE flag: + root@debian:~# ./bpf-test + [ 157.816912] bpf_map_write_active_inc: map=10 writecnt=1 + [ 157.830442] bpf_map_write_active_dec: map=10 writecnt=0 + [ 157.832396] bpf_map_release: map=10 writecnt=0 + +This patch fixes the above issue by calling vm_ops->close() when the +arch_validate_flags() check fails, after this we can proceed to unmap and +free the vma on the error path. + +Link: https://lkml.kernel.org/r/20220930003844.1210987-1-cmllamas@google.com +Fixes: c462ac288f2c ("mm: Introduce arch_validate_flags()") +Signed-off-by: Carlos Llamas +Reviewed-by: Catalin Marinas +Acked-by: Andrii Nakryiko +Reviewed-by: Liam Howlett +Cc: Christian Brauner (Microsoft) +Cc: Michal Hocko +Cc: Suren Baghdasaryan +Cc: [5.10+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/mmap.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -1845,7 +1845,7 @@ unsigned long mmap_region(struct file *f + if (!arch_validate_flags(vma->vm_flags)) { + error = -EINVAL; + if (file) +- goto unmap_and_free_vma; ++ goto close_and_free_vma; + else + goto free_vma; + } +@@ -1892,6 +1892,9 @@ out: + + return addr; + ++close_and_free_vma: ++ if (vma->vm_ops && vma->vm_ops->close) ++ vma->vm_ops->close(vma); + unmap_and_free_vma: + fput(vma->vm_file); + vma->vm_file = NULL; diff --git a/queue-5.19/mm-uffd-fix-warning-without-pte_marker_uffd_wp-compiled-in.patch b/queue-5.19/mm-uffd-fix-warning-without-pte_marker_uffd_wp-compiled-in.patch new file mode 100644 index 00000000000..65399b888fd --- /dev/null +++ b/queue-5.19/mm-uffd-fix-warning-without-pte_marker_uffd_wp-compiled-in.patch @@ -0,0 +1,96 @@ +From 515778e2d790652a38a24554fdb7f21420d91efc Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 30 Sep 2022 20:25:55 -0400 +Subject: mm/uffd: fix warning without PTE_MARKER_UFFD_WP compiled in + +From: Peter Xu + +commit 515778e2d790652a38a24554fdb7f21420d91efc upstream. + +When PTE_MARKER_UFFD_WP not configured, it's still possible to reach pte +marker code and trigger an warning. Add a few CONFIG_PTE_MARKER_UFFD_WP +ifdefs to make sure the code won't be reached when not compiled in. + +Link: https://lkml.kernel.org/r/YzeR+R6b4bwBlBHh@x1n +Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs") +Signed-off-by: Peter Xu +Reported-by: +Cc: Axel Rasmussen +Cc: Brian Geffon +Cc: Edward Liaw +Cc: Liu Shixin +Cc: Mike Kravetz +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 4 ++++ + mm/memory.c | 2 ++ + mm/mprotect.c | 2 ++ + 3 files changed, 8 insertions(+) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5050,6 +5050,7 @@ static void __unmap_hugepage_range(struc + * unmapped and its refcount is dropped, so just clear pte here. + */ + if (unlikely(!pte_present(pte))) { ++#ifdef CONFIG_PTE_MARKER_UFFD_WP + /* + * If the pte was wr-protected by uffd-wp in any of the + * swap forms, meanwhile the caller does not want to +@@ -5061,6 +5062,7 @@ static void __unmap_hugepage_range(struc + set_huge_pte_at(mm, address, ptep, + make_pte_marker(PTE_MARKER_UFFD_WP)); + else ++#endif + huge_pte_clear(mm, address, ptep, sz); + spin_unlock(ptl); + continue; +@@ -5089,11 +5091,13 @@ static void __unmap_hugepage_range(struc + tlb_remove_huge_tlb_entry(h, tlb, ptep, address); + if (huge_pte_dirty(pte)) + set_page_dirty(page); ++#ifdef CONFIG_PTE_MARKER_UFFD_WP + /* Leave a uffd-wp pte marker if needed */ + if (huge_pte_uffd_wp(pte) && + !(zap_flags & ZAP_FLAG_DROP_MARKER)) + set_huge_pte_at(mm, address, ptep, + make_pte_marker(PTE_MARKER_UFFD_WP)); ++#endif + hugetlb_count_sub(pages_per_huge_page(h), mm); + page_remove_rmap(page, vma, true); + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1385,10 +1385,12 @@ zap_install_uffd_wp_if_needed(struct vm_ + unsigned long addr, pte_t *pte, + struct zap_details *details, pte_t pteval) + { ++#ifdef CONFIG_PTE_MARKER_UFFD_WP + if (zap_drop_file_uffd_wp(details)) + return; + + pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); ++#endif + } + + static unsigned long zap_pte_range(struct mmu_gather *tlb, +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -222,6 +222,7 @@ static unsigned long change_pte_range(st + } else { + /* It must be an none page, or what else?.. */ + WARN_ON_ONCE(!pte_none(oldpte)); ++#ifdef CONFIG_PTE_MARKER_UFFD_WP + if (unlikely(uffd_wp && !vma_is_anonymous(vma))) { + /* + * For file-backed mem, we need to be able to +@@ -233,6 +234,7 @@ static unsigned long change_pte_range(st + make_pte_marker(PTE_MARKER_UFFD_WP)); + pages++; + } ++#endif + } + } while (pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); diff --git a/queue-5.19/nfsd-protect-against-send-buffer-overflow-in-nfsv2-read.patch b/queue-5.19/nfsd-protect-against-send-buffer-overflow-in-nfsv2-read.patch new file mode 100644 index 00000000000..c9b4d2df9d5 --- /dev/null +++ b/queue-5.19/nfsd-protect-against-send-buffer-overflow-in-nfsv2-read.patch @@ -0,0 +1,45 @@ +From 401bc1f90874280a80b93f23be33a0e7e2d1f912 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Thu, 1 Sep 2022 15:10:18 -0400 +Subject: NFSD: Protect against send buffer overflow in NFSv2 READ + +From: Chuck Lever + +commit 401bc1f90874280a80b93f23be33a0e7e2d1f912 upstream. + +Since before the git era, NFSD has conserved the number of pages +held by each nfsd thread by combining the RPC receive and send +buffers into a single array of pages. This works because there are +no cases where an operation needs a large RPC Call message and a +large RPC Reply at the same time. + +Once an RPC Call has been received, svc_process() updates +svc_rqst::rq_res to describe the part of rq_pages that can be +used for constructing the Reply. This means that the send buffer +(rq_res) shrinks when the received RPC record containing the RPC +Call is large. + +A client can force this shrinkage on TCP by sending a correctly- +formed RPC Call header contained in an RPC record that is +excessively large. The full maximum payload size cannot be +constructed in that case. + +Cc: +Signed-off-by: Chuck Lever +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfsproc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -182,6 +182,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) + argp->count, argp->offset); + + argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); ++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); + + v = 0; + len = argp->count; diff --git a/queue-5.19/nfsd-protect-against-send-buffer-overflow-in-nfsv3-read.patch b/queue-5.19/nfsd-protect-against-send-buffer-overflow-in-nfsv3-read.patch new file mode 100644 index 00000000000..d123a456f21 --- /dev/null +++ b/queue-5.19/nfsd-protect-against-send-buffer-overflow-in-nfsv3-read.patch @@ -0,0 +1,55 @@ +From fa6be9cc6e80ec79892ddf08a8c10cabab9baf38 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Thu, 1 Sep 2022 15:10:24 -0400 +Subject: NFSD: Protect against send buffer overflow in NFSv3 READ + +From: Chuck Lever + +commit fa6be9cc6e80ec79892ddf08a8c10cabab9baf38 upstream. + +Since before the git era, NFSD has conserved the number of pages +held by each nfsd thread by combining the RPC receive and send +buffers into a single array of pages. This works because there are +no cases where an operation needs a large RPC Call message and a +large RPC Reply at the same time. + +Once an RPC Call has been received, svc_process() updates +svc_rqst::rq_res to describe the part of rq_pages that can be +used for constructing the Reply. This means that the send buffer +(rq_res) shrinks when the received RPC record containing the RPC +Call is large. + +A client can force this shrinkage on TCP by sending a correctly- +formed RPC Call header contained in an RPC record that is +excessively large. The full maximum payload size cannot be +constructed in that case. + +Cc: +Signed-off-by: Chuck Lever +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs3proc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -147,7 +147,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp) + { + struct nfsd3_readargs *argp = rqstp->rq_argp; + struct nfsd3_readres *resp = rqstp->rq_resp; +- u32 max_blocksize = svc_max_payload(rqstp); + unsigned int len; + int v; + +@@ -156,7 +155,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp) + (unsigned long) argp->count, + (unsigned long long) argp->offset); + +- argp->count = min_t(u32, argp->count, max_blocksize); ++ argp->count = min_t(u32, argp->count, svc_max_payload(rqstp)); ++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); + if (argp->offset > (u64)OFFSET_MAX) + argp->offset = (u64)OFFSET_MAX; + if (argp->offset + argp->count > (u64)OFFSET_MAX) diff --git a/queue-5.19/nfsd-protect-against-send-buffer-overflow-in-nfsv3-readdir.patch b/queue-5.19/nfsd-protect-against-send-buffer-overflow-in-nfsv3-readdir.patch new file mode 100644 index 00000000000..948de662e95 --- /dev/null +++ b/queue-5.19/nfsd-protect-against-send-buffer-overflow-in-nfsv3-readdir.patch @@ -0,0 +1,59 @@ +From 640f87c190e0d1b2a0fcb2ecf6d2cd53b1c41991 Mon Sep 17 00:00:00 2001 +From: Chuck Lever +Date: Thu, 1 Sep 2022 15:10:12 -0400 +Subject: NFSD: Protect against send buffer overflow in NFSv3 READDIR + +From: Chuck Lever + +commit 640f87c190e0d1b2a0fcb2ecf6d2cd53b1c41991 upstream. + +Since before the git era, NFSD has conserved the number of pages +held by each nfsd thread by combining the RPC receive and send +buffers into a single array of pages. This works because there are +no cases where an operation needs a large RPC Call message and a +large RPC Reply message at the same time. + +Once an RPC Call has been received, svc_process() updates +svc_rqst::rq_res to describe the part of rq_pages that can be +used for constructing the Reply. This means that the send buffer +(rq_res) shrinks when the received RPC record containing the RPC +Call is large. + +A client can force this shrinkage on TCP by sending a correctly- +formed RPC Call header contained in an RPC record that is +excessively large. The full maximum payload size cannot be +constructed in that case. + +Thanks to Aleksi Illikainen and Kari Hulkko for uncovering this +issue. + +Reported-by: Ben Ronallo +Cc: +Signed-off-by: Chuck Lever +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs3proc.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -550,13 +550,14 @@ static void nfsd3_init_dirlist_pages(str + { + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; +- +- count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); ++ unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen, ++ svc_max_payload(rqstp)); + + memset(buf, 0, sizeof(*buf)); + + /* Reserve room for the NULL ptr & eof flag (-2 words) */ +- buf->buflen = count - XDR_UNIT * 2; ++ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf); ++ buf->buflen -= XDR_UNIT * 2; + buf->pages = rqstp->rq_next_page; + rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; + diff --git a/queue-5.19/pci-sanitise-firmware-bar-assignments-behind-a-pci-pci-bridge.patch b/queue-5.19/pci-sanitise-firmware-bar-assignments-behind-a-pci-pci-bridge.patch new file mode 100644 index 00000000000..f028d6639c5 --- /dev/null +++ b/queue-5.19/pci-sanitise-firmware-bar-assignments-behind-a-pci-pci-bridge.patch @@ -0,0 +1,102 @@ +From 0e32818397426a688f598f35d3bc762eca6d7592 Mon Sep 17 00:00:00 2001 +From: "Maciej W. Rozycki" +Date: Wed, 21 Sep 2022 20:49:16 +0100 +Subject: PCI: Sanitise firmware BAR assignments behind a PCI-PCI bridge + +From: Maciej W. Rozycki + +commit 0e32818397426a688f598f35d3bc762eca6d7592 upstream. + +When pci_assign_resource() is unable to assign resources to a BAR, it uses +pci_revert_fw_address() to fall back to a firmware assignment (if any). +Previously pci_revert_fw_address() assumed all addresses could reach the +device, but this is not true if the device is below a bridge that only +forwards addresses within its windows. + +This problem was observed on a Tyan Tomcat IV S1564D system where the BIOS +did not assign valid addresses to several bridges and USB devices: + + pci 0000:00:11.0: PCI-to-PCIe bridge to [bus 01-ff] + pci 0000:00:11.0: bridge window [io 0xe000-0xefff] + pci 0000:01:00.0: PCIe Upstream Port to [bus 02-ff] + pci 0000:01:00.0: bridge window [io 0x0000-0x0fff] # unreachable + pci 0000:02:02.0: PCIe Downstream Port to [bus 05-ff] + pci 0000:02:02.0: bridge window [io 0x0000-0x0fff] # unreachable + pci 0000:05:00.0: PCIe-to-PCI bridge to [bus 06-ff] + pci 0000:05:00.0: bridge window [io 0x0000-0x0fff] # unreachable + pci 0000:06:08.0: USB UHCI 1.1 + pci 0000:06:08.0: BAR 4: [io 0xfce0-0xfcff] # unreachable + pci 0000:06:08.1: USB UHCI 1.1 + pci 0000:06:08.1: BAR 4: [io 0xfce0-0xfcff] # unreachable + pci 0000:06:08.0: can't claim BAR 4 [io 0xfce0-0xfcff]: no compatible bridge window + pci 0000:06:08.1: can't claim BAR 4 [io 0xfce0-0xfcff]: no compatible bridge window + +During the first pass of assigning unassigned resources, there was not +enough I/O space available, so we couldn't assign the 06:08.0 BAR and +reverted to the firmware assignment (still unreachable). Reverting the +06:08.1 assignment failed because it conflicted with 06:08.0: + + pci 0000:00:11.0: bridge window [io 0xe000-0xefff] + pci 0000:01:00.0: no space for bridge window [io size 0x2000] + pci 0000:02:02.0: no space for bridge window [io size 0x1000] + pci 0000:05:00.0: no space for bridge window [io size 0x1000] + pci 0000:06:08.0: BAR 4: no space for [io size 0x0020] + pci 0000:06:08.0: BAR 4: trying firmware assignment [io 0xfce0-0xfcff] + pci 0000:06:08.1: BAR 4: no space for [io size 0x0020] + pci 0000:06:08.1: BAR 4: trying firmware assignment [io 0xfce0-0xfcff] + pci 0000:06:08.1: BAR 4: [io 0xfce0-0xfcff] conflicts with 0000:06:08.0 [io 0xfce0-0xfcff] + +A subsequent pass assigned valid bridge windows and a valid 06:08.1 BAR, +but left the 06:08.0 BAR alone, so the UHCI device was still unusable: + + pci 0000:00:11.0: bridge window [io 0xe000-0xefff] released + pci 0000:00:11.0: bridge window [io 0x1000-0x2fff] # reassigned + pci 0000:01:00.0: bridge window [io 0x1000-0x2fff] # reassigned + pci 0000:02:02.0: bridge window [io 0x2000-0x2fff] # reassigned + pci 0000:05:00.0: bridge window [io 0x2000-0x2fff] # reassigned + pci 0000:06:08.0: BAR 4: assigned [io 0xfce0-0xfcff] # left alone + pci 0000:06:08.1: BAR 4: assigned [io 0x2000-0x201f] + ... + uhci_hcd 0000:06:08.0: host system error, PCI problems? + uhci_hcd 0000:06:08.0: host controller process error, something bad happened! + uhci_hcd 0000:06:08.0: host controller halted, very bad! + uhci_hcd 0000:06:08.0: HCRESET not completed yet! + uhci_hcd 0000:06:08.0: HC died; cleaning up + +If the address assigned by firmware is not reachable because it's not +within upstream bridge windows, fail instead of assigning the unusable +address from firmware. + +[bhelgaas: commit log, use pci_upstream_bridge()] +Link: https://bugzilla.kernel.org/show_bug.cgi?id=16263 +Link: https://lore.kernel.org/r/alpine.DEB.2.21.2203012338460.46819@angie.orcam.me.uk +Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209211921250.29493@angie.orcam.me.uk +Fixes: 58c84eda0756 ("PCI: fall back to original BIOS BAR addresses") +Signed-off-by: Maciej W. Rozycki +Signed-off-by: Bjorn Helgaas +Cc: stable@vger.kernel.org # v2.6.35+ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/setup-res.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/drivers/pci/setup-res.c ++++ b/drivers/pci/setup-res.c +@@ -214,6 +214,17 @@ static int pci_revert_fw_address(struct + + root = pci_find_parent_resource(dev, res); + if (!root) { ++ /* ++ * If dev is behind a bridge, accesses will only reach it ++ * if res is inside the relevant bridge window. ++ */ ++ if (pci_upstream_bridge(dev)) ++ return -ENXIO; ++ ++ /* ++ * On the root bus, assume the host bridge will forward ++ * everything. ++ */ + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else diff --git a/queue-5.19/powercap-intel_rapl-use-standard-energy-unit-for-spr-dram-rapl-domain.patch b/queue-5.19/powercap-intel_rapl-use-standard-energy-unit-for-spr-dram-rapl-domain.patch new file mode 100644 index 00000000000..389d1a6d9d3 --- /dev/null +++ b/queue-5.19/powercap-intel_rapl-use-standard-energy-unit-for-spr-dram-rapl-domain.patch @@ -0,0 +1,35 @@ +From 4c081324df5608b73428662ca54d5221ea03a6bd Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Sat, 24 Sep 2022 13:47:36 +0800 +Subject: powercap: intel_rapl: Use standard Energy Unit for SPR Dram RAPL domain + +From: Zhang Rui + +commit 4c081324df5608b73428662ca54d5221ea03a6bd upstream. + +Intel Xeon servers used to use a fixed energy resolution (15.3uj) for +Dram RAPL domain. But on SPR, Dram RAPL domain follows the standard +energy resolution as described in MSR_RAPL_POWER_UNIT. + +Remove the SPR dram_domain_energy_unit quirk. + +Fixes: 2d798d9f5967 ("powercap: intel_rapl: add support for Sapphire Rapids") +Signed-off-by: Zhang Rui +Tested-by: Wang Wendy +Cc: 5.9+ # 5.9+ +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/powercap/intel_rapl_common.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/powercap/intel_rapl_common.c ++++ b/drivers/powercap/intel_rapl_common.c +@@ -1035,7 +1035,6 @@ static const struct rapl_defaults rapl_d + .check_unit = rapl_check_unit_core, + .set_floor_freq = set_floor_freq_default, + .compute_time_window = rapl_compute_time_window_core, +- .dram_domain_energy_unit = 15300, + .psys_domain_energy_unit = 1000000000, + .spr_psys_bits = true, + }; diff --git a/queue-5.19/powerpc-boot-explicitly-disable-usage-of-spe-instructions.patch b/queue-5.19/powerpc-boot-explicitly-disable-usage-of-spe-instructions.patch new file mode 100644 index 00000000000..1489609d3b0 --- /dev/null +++ b/queue-5.19/powerpc-boot-explicitly-disable-usage-of-spe-instructions.patch @@ -0,0 +1,36 @@ +From 110a58b9f91c66f743c01a2c217243d94c899c23 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Pali=20Roh=C3=A1r?= +Date: Sat, 27 Aug 2022 15:44:54 +0200 +Subject: powerpc/boot: Explicitly disable usage of SPE instructions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Pali Rohár + +commit 110a58b9f91c66f743c01a2c217243d94c899c23 upstream. + +uImage boot wrapper should not use SPE instructions, like kernel itself. +Boot wrapper has already disabled Altivec and VSX instructions but not SPE. +Options -mno-spe and -mspe=no already set when compilation of kernel, but +not when compiling uImage wrapper yet. Fix it. + +Cc: stable@vger.kernel.org +Signed-off-by: Pali Rohár +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20220827134454.17365-1-pali@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/boot/Makefile | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/powerpc/boot/Makefile ++++ b/arch/powerpc/boot/Makefile +@@ -34,6 +34,7 @@ endif + + BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ + -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \ ++ $(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \ + -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ + $(LINUXINCLUDE) + diff --git a/queue-5.19/powerpc-kconfig-fix-non-existing-config_ppc_fsl_booke.patch b/queue-5.19/powerpc-kconfig-fix-non-existing-config_ppc_fsl_booke.patch new file mode 100644 index 00000000000..a439305b04e --- /dev/null +++ b/queue-5.19/powerpc-kconfig-fix-non-existing-config_ppc_fsl_booke.patch @@ -0,0 +1,32 @@ +From d1203f32d86987a3ccd7de9ba2448ba12d86d125 Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Mon, 19 Sep 2022 19:01:25 +0200 +Subject: powerpc/Kconfig: Fix non existing CONFIG_PPC_FSL_BOOKE + +From: Christophe Leroy + +commit d1203f32d86987a3ccd7de9ba2448ba12d86d125 upstream. + +CONFIG_PPC_FSL_BOOKE doesn't exist. Should be CONFIG_FSL_BOOKE. + +Fixes: 49e3d8ea6248 ("powerpc/fsl_booke: Enable STRICT_KERNEL_RWX") +Cc: stable@vger.kernel.org +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/828f6a64eeb51ce9abfa1d4e84c521a02fecebb8.1663606875.git.christophe.leroy@csgroup.eu +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -817,7 +817,7 @@ config DATA_SHIFT + default 24 if STRICT_KERNEL_RWX && PPC64 + range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 + range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx +- range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_FSL_BOOKE ++ range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && FSL_BOOKE + default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 + default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 + default 23 if STRICT_KERNEL_RWX && PPC_8xx diff --git a/queue-5.19/scsi-lpfc-rework-mib-rx-monitor-debug-info-logic.patch b/queue-5.19/scsi-lpfc-rework-mib-rx-monitor-debug-info-logic.patch new file mode 100644 index 00000000000..a5891adee17 --- /dev/null +++ b/queue-5.19/scsi-lpfc-rework-mib-rx-monitor-debug-info-logic.patch @@ -0,0 +1,529 @@ +From bd269188ea94e40ab002cad7b0df8f12b8f0de54 Mon Sep 17 00:00:00 2001 +From: James Smart +Date: Thu, 18 Aug 2022 18:17:32 -0700 +Subject: scsi: lpfc: Rework MIB Rx Monitor debug info logic + +From: James Smart + +commit bd269188ea94e40ab002cad7b0df8f12b8f0de54 upstream. + +The kernel test robot reported the following sparse warning: + +arch/arm64/include/asm/cmpxchg.h:88:1: sparse: sparse: cast truncates + bits from constant value (369 becomes 69) + +On arm64, atomic_xchg only works on 8-bit byte fields. Thus, the macro +usage of LPFC_RXMONITOR_TABLE_IN_USE can be unintentionally truncated +leading to all logic involving the LPFC_RXMONITOR_TABLE_IN_USE macro to not +work properly. + +Replace the Rx Table atomic_t indexing logic with a new +lpfc_rx_info_monitor structure that holds a circular ring buffer. For +locking semantics, a spinlock_t is used. + +Link: https://lore.kernel.org/r/20220819011736.14141-4-jsmart2021@gmail.com +Fixes: 17b27ac59224 ("scsi: lpfc: Add rx monitoring statistics") +Cc: # v5.15+ +Co-developed-by: Justin Tee +Signed-off-by: Justin Tee +Signed-off-by: James Smart +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/lpfc/lpfc.h | 14 +- + drivers/scsi/lpfc/lpfc_crtn.h | 8 + + drivers/scsi/lpfc/lpfc_debugfs.c | 59 ++---------- + drivers/scsi/lpfc/lpfc_debugfs.h | 2 + drivers/scsi/lpfc/lpfc_init.c | 83 ++++------------- + drivers/scsi/lpfc/lpfc_mem.c | 9 + + drivers/scsi/lpfc/lpfc_sli.c | 190 +++++++++++++++++++++++++++++++++++++-- + 7 files changed, 240 insertions(+), 125 deletions(-) + +--- a/drivers/scsi/lpfc/lpfc.h ++++ b/drivers/scsi/lpfc/lpfc.h +@@ -1576,10 +1576,7 @@ struct lpfc_hba { + u32 cgn_acqe_cnt; + + /* RX monitor handling for CMF */ +- struct rxtable_entry *rxtable; /* RX_monitor information */ +- atomic_t rxtable_idx_head; +-#define LPFC_RXMONITOR_TABLE_IN_USE (LPFC_MAX_RXMONITOR_ENTRY + 73) +- atomic_t rxtable_idx_tail; ++ struct lpfc_rx_info_monitor *rx_monitor; + atomic_t rx_max_read_cnt; /* Maximum read bytes */ + uint64_t rx_block_cnt; + +@@ -1628,7 +1625,7 @@ struct lpfc_hba { + + #define LPFC_MAX_RXMONITOR_ENTRY 800 + #define LPFC_MAX_RXMONITOR_DUMP 32 +-struct rxtable_entry { ++struct rx_info_entry { + uint64_t cmf_bytes; /* Total no of read bytes for CMF_SYNC_WQE */ + uint64_t total_bytes; /* Total no of read bytes requested */ + uint64_t rcv_bytes; /* Total no of read bytes completed */ +@@ -1643,6 +1640,13 @@ struct rxtable_entry { + uint32_t timer_interval; + }; + ++struct lpfc_rx_info_monitor { ++ struct rx_info_entry *ring; /* info organized in a circular buffer */ ++ u32 head_idx, tail_idx; /* index to head/tail of ring */ ++ spinlock_t lock; /* spinlock for ring */ ++ u32 entries; /* storing number entries/size of ring */ ++}; ++ + static inline struct Scsi_Host * + lpfc_shost_from_vport(struct lpfc_vport *vport) + { +--- a/drivers/scsi/lpfc/lpfc_crtn.h ++++ b/drivers/scsi/lpfc/lpfc_crtn.h +@@ -92,6 +92,14 @@ void lpfc_cgn_dump_rxmonitor(struct lpfc + void lpfc_cgn_update_stat(struct lpfc_hba *phba, uint32_t dtag); + void lpfc_unblock_requests(struct lpfc_hba *phba); + void lpfc_block_requests(struct lpfc_hba *phba); ++int lpfc_rx_monitor_create_ring(struct lpfc_rx_info_monitor *rx_monitor, ++ u32 entries); ++void lpfc_rx_monitor_destroy_ring(struct lpfc_rx_info_monitor *rx_monitor); ++void lpfc_rx_monitor_record(struct lpfc_rx_info_monitor *rx_monitor, ++ struct rx_info_entry *entry); ++u32 lpfc_rx_monitor_report(struct lpfc_hba *phba, ++ struct lpfc_rx_info_monitor *rx_monitor, char *buf, ++ u32 buf_len, u32 max_read_entries); + + void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *); + void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); +--- a/drivers/scsi/lpfc/lpfc_debugfs.c ++++ b/drivers/scsi/lpfc/lpfc_debugfs.c +@@ -5531,7 +5531,7 @@ lpfc_rx_monitor_open(struct inode *inode + if (!debug) + goto out; + +- debug->buffer = vmalloc(MAX_DEBUGFS_RX_TABLE_SIZE); ++ debug->buffer = vmalloc(MAX_DEBUGFS_RX_INFO_SIZE); + if (!debug->buffer) { + kfree(debug); + goto out; +@@ -5552,57 +5552,18 @@ lpfc_rx_monitor_read(struct file *file, + struct lpfc_rx_monitor_debug *debug = file->private_data; + struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private; + char *buffer = debug->buffer; +- struct rxtable_entry *entry; +- int i, len = 0, head, tail, last, start; + +- head = atomic_read(&phba->rxtable_idx_head); +- while (head == LPFC_RXMONITOR_TABLE_IN_USE) { +- /* Table is getting updated */ +- msleep(20); +- head = atomic_read(&phba->rxtable_idx_head); ++ if (!phba->rx_monitor) { ++ scnprintf(buffer, MAX_DEBUGFS_RX_INFO_SIZE, ++ "Rx Monitor Info is empty.\n"); ++ } else { ++ lpfc_rx_monitor_report(phba, phba->rx_monitor, buffer, ++ MAX_DEBUGFS_RX_INFO_SIZE, ++ LPFC_MAX_RXMONITOR_ENTRY); + } + +- tail = atomic_xchg(&phba->rxtable_idx_tail, head); +- if (!phba->rxtable || head == tail) { +- len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len, +- "Rxtable is empty\n"); +- goto out; +- } +- last = (head > tail) ? head : LPFC_MAX_RXMONITOR_ENTRY; +- start = tail; +- +- len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len, +- " MaxBPI Tot_Data_CMF Tot_Data_Cmd " +- "Tot_Data_Cmpl Lat(us) Avg_IO Max_IO " +- "Bsy IO_cnt Info BWutil(ms)\n"); +-get_table: +- for (i = start; i < last; i++) { +- entry = &phba->rxtable[i]; +- len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len, +- "%3d:%12lld %12lld %12lld %12lld " +- "%7lldus %8lld %7lld " +- "%2d %4d %2d %2d(%2d)\n", +- i, entry->max_bytes_per_interval, +- entry->cmf_bytes, +- entry->total_bytes, +- entry->rcv_bytes, +- entry->avg_io_latency, +- entry->avg_io_size, +- entry->max_read_cnt, +- entry->cmf_busy, +- entry->io_cnt, +- entry->cmf_info, +- entry->timer_utilization, +- entry->timer_interval); +- } +- +- if (head != last) { +- start = 0; +- last = head; +- goto get_table; +- } +-out: +- return simple_read_from_buffer(buf, nbytes, ppos, buffer, len); ++ return simple_read_from_buffer(buf, nbytes, ppos, buffer, ++ strlen(buffer)); + } + + static int +--- a/drivers/scsi/lpfc/lpfc_debugfs.h ++++ b/drivers/scsi/lpfc/lpfc_debugfs.h +@@ -282,7 +282,7 @@ struct lpfc_idiag { + void *ptr_private; + }; + +-#define MAX_DEBUGFS_RX_TABLE_SIZE (128 * LPFC_MAX_RXMONITOR_ENTRY) ++#define MAX_DEBUGFS_RX_INFO_SIZE (128 * LPFC_MAX_RXMONITOR_ENTRY) + struct lpfc_rx_monitor_debug { + char *i_private; + char *buffer; +--- a/drivers/scsi/lpfc/lpfc_init.c ++++ b/drivers/scsi/lpfc/lpfc_init.c +@@ -5571,38 +5571,12 @@ lpfc_async_link_speed_to_read_top(struct + void + lpfc_cgn_dump_rxmonitor(struct lpfc_hba *phba) + { +- struct rxtable_entry *entry; +- int cnt = 0, head, tail, last, start; +- +- head = atomic_read(&phba->rxtable_idx_head); +- tail = atomic_read(&phba->rxtable_idx_tail); +- if (!phba->rxtable || head == tail) { +- lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT, +- "4411 Rxtable is empty\n"); +- return; +- } +- last = tail; +- start = head; +- +- /* Display the last LPFC_MAX_RXMONITOR_DUMP entries from the rxtable */ +- while (start != last) { +- if (start) +- start--; +- else +- start = LPFC_MAX_RXMONITOR_ENTRY - 1; +- entry = &phba->rxtable[start]; ++ if (!phba->rx_monitor) { + lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT, +- "4410 %02d: MBPI %lld Xmit %lld Cmpl %lld " +- "Lat %lld ASz %lld Info %02d BWUtil %d " +- "Int %d slot %d\n", +- cnt, entry->max_bytes_per_interval, +- entry->total_bytes, entry->rcv_bytes, +- entry->avg_io_latency, entry->avg_io_size, +- entry->cmf_info, entry->timer_utilization, +- entry->timer_interval, start); +- cnt++; +- if (cnt >= LPFC_MAX_RXMONITOR_DUMP) +- return; ++ "4411 Rx Monitor Info is empty.\n"); ++ } else { ++ lpfc_rx_monitor_report(phba, phba->rx_monitor, NULL, 0, ++ LPFC_MAX_RXMONITOR_DUMP); + } + } + +@@ -6009,9 +5983,8 @@ lpfc_cmf_timer(struct hrtimer *timer) + { + struct lpfc_hba *phba = container_of(timer, struct lpfc_hba, + cmf_timer); +- struct rxtable_entry *entry; ++ struct rx_info_entry entry; + uint32_t io_cnt; +- uint32_t head, tail; + uint32_t busy, max_read; + uint64_t total, rcv, lat, mbpi, extra, cnt; + int timer_interval = LPFC_CMF_INTERVAL; +@@ -6131,40 +6104,30 @@ lpfc_cmf_timer(struct hrtimer *timer) + } + + /* Save rxmonitor information for debug */ +- if (phba->rxtable) { +- head = atomic_xchg(&phba->rxtable_idx_head, +- LPFC_RXMONITOR_TABLE_IN_USE); +- entry = &phba->rxtable[head]; +- entry->total_bytes = total; +- entry->cmf_bytes = total + extra; +- entry->rcv_bytes = rcv; +- entry->cmf_busy = busy; +- entry->cmf_info = phba->cmf_active_info; ++ if (phba->rx_monitor) { ++ entry.total_bytes = total; ++ entry.cmf_bytes = total + extra; ++ entry.rcv_bytes = rcv; ++ entry.cmf_busy = busy; ++ entry.cmf_info = phba->cmf_active_info; + if (io_cnt) { +- entry->avg_io_latency = div_u64(lat, io_cnt); +- entry->avg_io_size = div_u64(rcv, io_cnt); ++ entry.avg_io_latency = div_u64(lat, io_cnt); ++ entry.avg_io_size = div_u64(rcv, io_cnt); + } else { +- entry->avg_io_latency = 0; +- entry->avg_io_size = 0; ++ entry.avg_io_latency = 0; ++ entry.avg_io_size = 0; + } +- entry->max_read_cnt = max_read; +- entry->io_cnt = io_cnt; +- entry->max_bytes_per_interval = mbpi; ++ entry.max_read_cnt = max_read; ++ entry.io_cnt = io_cnt; ++ entry.max_bytes_per_interval = mbpi; + if (phba->cmf_active_mode == LPFC_CFG_MANAGED) +- entry->timer_utilization = phba->cmf_last_ts; ++ entry.timer_utilization = phba->cmf_last_ts; + else +- entry->timer_utilization = ms; +- entry->timer_interval = ms; ++ entry.timer_utilization = ms; ++ entry.timer_interval = ms; + phba->cmf_last_ts = 0; + +- /* Increment rxtable index */ +- head = (head + 1) % LPFC_MAX_RXMONITOR_ENTRY; +- tail = atomic_read(&phba->rxtable_idx_tail); +- if (head == tail) { +- tail = (tail + 1) % LPFC_MAX_RXMONITOR_ENTRY; +- atomic_set(&phba->rxtable_idx_tail, tail); +- } +- atomic_set(&phba->rxtable_idx_head, head); ++ lpfc_rx_monitor_record(phba->rx_monitor, &entry); + } + + if (phba->cmf_active_mode == LPFC_CFG_MONITOR) { +--- a/drivers/scsi/lpfc/lpfc_mem.c ++++ b/drivers/scsi/lpfc/lpfc_mem.c +@@ -344,9 +344,12 @@ lpfc_mem_free_all(struct lpfc_hba *phba) + phba->cgn_i = NULL; + } + +- /* Free RX table */ +- kfree(phba->rxtable); +- phba->rxtable = NULL; ++ /* Free RX Monitor */ ++ if (phba->rx_monitor) { ++ lpfc_rx_monitor_destroy_ring(phba->rx_monitor); ++ kfree(phba->rx_monitor); ++ phba->rx_monitor = NULL; ++ } + + /* Free the iocb lookup array */ + kfree(psli->iocbq_lookup); +--- a/drivers/scsi/lpfc/lpfc_sli.c ++++ b/drivers/scsi/lpfc/lpfc_sli.c +@@ -7955,6 +7955,172 @@ static void lpfc_sli4_dip(struct lpfc_hb + } + + /** ++ * lpfc_rx_monitor_create_ring - Initialize ring buffer for rx_monitor ++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object ++ * @entries: Number of rx_info_entry objects to allocate in ring ++ * ++ * Return: ++ * 0 - Success ++ * ENOMEM - Failure to kmalloc ++ **/ ++int lpfc_rx_monitor_create_ring(struct lpfc_rx_info_monitor *rx_monitor, ++ u32 entries) ++{ ++ rx_monitor->ring = kmalloc_array(entries, sizeof(struct rx_info_entry), ++ GFP_KERNEL); ++ if (!rx_monitor->ring) ++ return -ENOMEM; ++ ++ rx_monitor->head_idx = 0; ++ rx_monitor->tail_idx = 0; ++ spin_lock_init(&rx_monitor->lock); ++ rx_monitor->entries = entries; ++ ++ return 0; ++} ++ ++/** ++ * lpfc_rx_monitor_destroy_ring - Free ring buffer for rx_monitor ++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object ++ **/ ++void lpfc_rx_monitor_destroy_ring(struct lpfc_rx_info_monitor *rx_monitor) ++{ ++ spin_lock(&rx_monitor->lock); ++ kfree(rx_monitor->ring); ++ rx_monitor->ring = NULL; ++ rx_monitor->entries = 0; ++ rx_monitor->head_idx = 0; ++ rx_monitor->tail_idx = 0; ++ spin_unlock(&rx_monitor->lock); ++} ++ ++/** ++ * lpfc_rx_monitor_record - Insert an entry into rx_monitor's ring ++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object ++ * @entry: Pointer to rx_info_entry ++ * ++ * Used to insert an rx_info_entry into rx_monitor's ring. Note that this is a ++ * deep copy of rx_info_entry not a shallow copy of the rx_info_entry ptr. ++ * ++ * This is called from lpfc_cmf_timer, which is in timer/softirq context. ++ * ++ * In cases of old data overflow, we do a best effort of FIFO order. ++ **/ ++void lpfc_rx_monitor_record(struct lpfc_rx_info_monitor *rx_monitor, ++ struct rx_info_entry *entry) ++{ ++ struct rx_info_entry *ring = rx_monitor->ring; ++ u32 *head_idx = &rx_monitor->head_idx; ++ u32 *tail_idx = &rx_monitor->tail_idx; ++ spinlock_t *ring_lock = &rx_monitor->lock; ++ u32 ring_size = rx_monitor->entries; ++ ++ spin_lock(ring_lock); ++ memcpy(&ring[*tail_idx], entry, sizeof(*entry)); ++ *tail_idx = (*tail_idx + 1) % ring_size; ++ ++ /* Best effort of FIFO saved data */ ++ if (*tail_idx == *head_idx) ++ *head_idx = (*head_idx + 1) % ring_size; ++ ++ spin_unlock(ring_lock); ++} ++ ++/** ++ * lpfc_rx_monitor_report - Read out rx_monitor's ring ++ * @phba: Pointer to lpfc_hba object ++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object ++ * @buf: Pointer to char buffer that will contain rx monitor info data ++ * @buf_len: Length buf including null char ++ * @max_read_entries: Maximum number of entries to read out of ring ++ * ++ * Used to dump/read what's in rx_monitor's ring buffer. ++ * ++ * If buf is NULL || buf_len == 0, then it is implied that we want to log the ++ * information to kmsg instead of filling out buf. ++ * ++ * Return: ++ * Number of entries read out of the ring ++ **/ ++u32 lpfc_rx_monitor_report(struct lpfc_hba *phba, ++ struct lpfc_rx_info_monitor *rx_monitor, char *buf, ++ u32 buf_len, u32 max_read_entries) ++{ ++ struct rx_info_entry *ring = rx_monitor->ring; ++ struct rx_info_entry *entry; ++ u32 *head_idx = &rx_monitor->head_idx; ++ u32 *tail_idx = &rx_monitor->tail_idx; ++ spinlock_t *ring_lock = &rx_monitor->lock; ++ u32 ring_size = rx_monitor->entries; ++ u32 cnt = 0; ++ char tmp[DBG_LOG_STR_SZ] = {0}; ++ bool log_to_kmsg = (!buf || !buf_len) ? true : false; ++ ++ if (!log_to_kmsg) { ++ /* clear the buffer to be sure */ ++ memset(buf, 0, buf_len); ++ ++ scnprintf(buf, buf_len, "\t%-16s%-16s%-16s%-16s%-8s%-8s%-8s" ++ "%-8s%-8s%-8s%-16s\n", ++ "MaxBPI", "Tot_Data_CMF", ++ "Tot_Data_Cmd", "Tot_Data_Cmpl", ++ "Lat(us)", "Avg_IO", "Max_IO", "Bsy", ++ "IO_cnt", "Info", "BWutil(ms)"); ++ } ++ ++ /* Needs to be _bh because record is called from timer interrupt ++ * context ++ */ ++ spin_lock_bh(ring_lock); ++ while (*head_idx != *tail_idx) { ++ entry = &ring[*head_idx]; ++ ++ /* Read out this entry's data. */ ++ if (!log_to_kmsg) { ++ /* If !log_to_kmsg, then store to buf. */ ++ scnprintf(tmp, sizeof(tmp), ++ "%03d:\t%-16llu%-16llu%-16llu%-16llu%-8llu" ++ "%-8llu%-8llu%-8u%-8u%-8u%u(%u)\n", ++ *head_idx, entry->max_bytes_per_interval, ++ entry->cmf_bytes, entry->total_bytes, ++ entry->rcv_bytes, entry->avg_io_latency, ++ entry->avg_io_size, entry->max_read_cnt, ++ entry->cmf_busy, entry->io_cnt, ++ entry->cmf_info, entry->timer_utilization, ++ entry->timer_interval); ++ ++ /* Check for buffer overflow */ ++ if ((strlen(buf) + strlen(tmp)) >= buf_len) ++ break; ++ ++ /* Append entry's data to buffer */ ++ strlcat(buf, tmp, buf_len); ++ } else { ++ lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT, ++ "4410 %02u: MBPI %llu Xmit %llu " ++ "Cmpl %llu Lat %llu ASz %llu Info %02u " ++ "BWUtil %u Int %u slot %u\n", ++ cnt, entry->max_bytes_per_interval, ++ entry->total_bytes, entry->rcv_bytes, ++ entry->avg_io_latency, ++ entry->avg_io_size, entry->cmf_info, ++ entry->timer_utilization, ++ entry->timer_interval, *head_idx); ++ } ++ ++ *head_idx = (*head_idx + 1) % ring_size; ++ ++ /* Don't feed more than max_read_entries */ ++ cnt++; ++ if (cnt >= max_read_entries) ++ break; ++ } ++ spin_unlock_bh(ring_lock); ++ ++ return cnt; ++} ++ ++/** + * lpfc_cmf_setup - Initialize idle_stat tracking + * @phba: Pointer to HBA context object. + * +@@ -8128,19 +8294,29 @@ no_cmf: + phba->cmf_interval_rate = LPFC_CMF_INTERVAL; + + /* Allocate RX Monitor Buffer */ +- if (!phba->rxtable) { +- phba->rxtable = kmalloc_array(LPFC_MAX_RXMONITOR_ENTRY, +- sizeof(struct rxtable_entry), +- GFP_KERNEL); +- if (!phba->rxtable) { ++ if (!phba->rx_monitor) { ++ phba->rx_monitor = kzalloc(sizeof(*phba->rx_monitor), ++ GFP_KERNEL); ++ ++ if (!phba->rx_monitor) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "2644 Failed to alloc memory " + "for RX Monitor Buffer\n"); + return -ENOMEM; + } ++ ++ /* Instruct the rx_monitor object to instantiate its ring */ ++ if (lpfc_rx_monitor_create_ring(phba->rx_monitor, ++ LPFC_MAX_RXMONITOR_ENTRY)) { ++ kfree(phba->rx_monitor); ++ phba->rx_monitor = NULL; ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, ++ "2645 Failed to alloc memory " ++ "for RX Monitor's Ring\n"); ++ return -ENOMEM; ++ } + } +- atomic_set(&phba->rxtable_idx_head, 0); +- atomic_set(&phba->rxtable_idx_tail, 0); ++ + return 0; + } + diff --git a/queue-5.19/scsi-qedf-populate-sysfs-attributes-for-vport.patch b/queue-5.19/scsi-qedf-populate-sysfs-attributes-for-vport.patch new file mode 100644 index 00000000000..8b32a62da47 --- /dev/null +++ b/queue-5.19/scsi-qedf-populate-sysfs-attributes-for-vport.patch @@ -0,0 +1,54 @@ +From 592642e6b11e620e4b43189f8072752429fc8dc3 Mon Sep 17 00:00:00 2001 +From: Saurav Kashyap +Date: Mon, 19 Sep 2022 06:44:34 -0700 +Subject: scsi: qedf: Populate sysfs attributes for vport + +From: Saurav Kashyap + +commit 592642e6b11e620e4b43189f8072752429fc8dc3 upstream. + +Few vport parameters were displayed by systool as 'Unknown' or 'NULL'. +Copy speed, supported_speed, frame_size and update port_type for NPIV port. + +Link: https://lore.kernel.org/r/20220919134434.3513-1-njavali@marvell.com +Cc: stable@vger.kernel.org +Tested-by: Guangwu Zhang +Reviewed-by: John Meneghini +Signed-off-by: Saurav Kashyap +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qedf/qedf_main.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +--- a/drivers/scsi/qedf/qedf_main.c ++++ b/drivers/scsi/qedf/qedf_main.c +@@ -1921,6 +1921,27 @@ static int qedf_vport_create(struct fc_v + fc_vport_setlink(vn_port); + } + ++ /* Set symbolic node name */ ++ if (base_qedf->pdev->device == QL45xxx) ++ snprintf(fc_host_symbolic_name(vn_port->host), 256, ++ "Marvell FastLinQ 45xxx FCoE v%s", QEDF_VERSION); ++ ++ if (base_qedf->pdev->device == QL41xxx) ++ snprintf(fc_host_symbolic_name(vn_port->host), 256, ++ "Marvell FastLinQ 41xxx FCoE v%s", QEDF_VERSION); ++ ++ /* Set supported speed */ ++ fc_host_supported_speeds(vn_port->host) = n_port->link_supported_speeds; ++ ++ /* Set speed */ ++ vn_port->link_speed = n_port->link_speed; ++ ++ /* Set port type */ ++ fc_host_port_type(vn_port->host) = FC_PORTTYPE_NPIV; ++ ++ /* Set maxframe size */ ++ fc_host_maxframe_size(vn_port->host) = n_port->mfs; ++ + QEDF_INFO(&(base_qedf->dbg_ctx), QEDF_LOG_NPIV, "vn_port=%p.\n", + vn_port); + diff --git a/queue-5.19/serial-8250-let-drivers-request-full-16550a-feature-probing.patch b/queue-5.19/serial-8250-let-drivers-request-full-16550a-feature-probing.patch new file mode 100644 index 00000000000..62a3021b7a3 --- /dev/null +++ b/queue-5.19/serial-8250-let-drivers-request-full-16550a-feature-probing.patch @@ -0,0 +1,62 @@ +From 9906890c89e4dbd900ed87ad3040080339a7f411 Mon Sep 17 00:00:00 2001 +From: "Maciej W. Rozycki" +Date: Wed, 21 Sep 2022 00:35:32 +0100 +Subject: serial: 8250: Let drivers request full 16550A feature probing + +From: Maciej W. Rozycki + +commit 9906890c89e4dbd900ed87ad3040080339a7f411 upstream. + +A SERIAL_8250_16550A_VARIANTS configuration option has been recently +defined that lets one request the 8250 driver not to probe for 16550A +device features so as to reduce the driver's device startup time in +virtual machines. + +Some actual hardware devices require these features to have been fully +determined however for their driver to work correctly, so define a flag +to let drivers request full 16550A feature probing on a device-by-device +basis if required regardless of the SERIAL_8250_16550A_VARIANTS option +setting chosen. + +Fixes: dc56ecb81a0a ("serial: 8250: Support disabling mdelay-filled probes of 16550A variants") +Cc: stable@vger.kernel.org # v5.6+ +Reported-by: Anders Blomdell +Signed-off-by: Maciej W. Rozycki +Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209202357520.41633@angie.orcam.me.uk +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/8250/8250_port.c | 3 ++- + include/linux/serial_core.h | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/tty/serial/8250/8250_port.c ++++ b/drivers/tty/serial/8250/8250_port.c +@@ -1029,7 +1029,8 @@ static void autoconfig_16550a(struct uar + up->port.type = PORT_16550A; + up->capabilities |= UART_CAP_FIFO; + +- if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS)) ++ if (!IS_ENABLED(CONFIG_SERIAL_8250_16550A_VARIANTS) && ++ !(up->port.flags & UPF_FULL_PROBE)) + return; + + /* +--- a/include/linux/serial_core.h ++++ b/include/linux/serial_core.h +@@ -101,7 +101,7 @@ struct uart_icount { + __u32 buf_overrun; + }; + +-typedef unsigned int __bitwise upf_t; ++typedef u64 __bitwise upf_t; + typedef unsigned int __bitwise upstat_t; + + struct uart_port { +@@ -208,6 +208,7 @@ struct uart_port { + #define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) + #define UPF_DEAD ((__force upf_t) (1 << 30)) + #define UPF_IOREMAP ((__force upf_t) (1 << 31)) ++#define UPF_FULL_PROBE ((__force upf_t) (1ULL << 32)) + + #define __UPF_CHANGE_MASK 0x17fff + #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) diff --git a/queue-5.19/serial-8250-request-full-16550a-feature-probing-for-oxsemi-pcie-devices.patch b/queue-5.19/serial-8250-request-full-16550a-feature-probing-for-oxsemi-pcie-devices.patch new file mode 100644 index 00000000000..1a0f8ce6a45 --- /dev/null +++ b/queue-5.19/serial-8250-request-full-16550a-feature-probing-for-oxsemi-pcie-devices.patch @@ -0,0 +1,50 @@ +From 00b7a4d4ee42be1c515e56cb1e8ba0f25e271d8e Mon Sep 17 00:00:00 2001 +From: "Maciej W. Rozycki" +Date: Wed, 21 Sep 2022 00:35:37 +0100 +Subject: serial: 8250: Request full 16550A feature probing for OxSemi PCIe devices + +From: Maciej W. Rozycki + +commit 00b7a4d4ee42be1c515e56cb1e8ba0f25e271d8e upstream. + +Oxford Semiconductor PCIe (Tornado) 950 serial port devices need to +operate in the enhanced mode via the EFR register for the Divide-by-M +N/8 baud rate generator prescaler to be used in their native UART mode. +Otherwise the prescaler is fixed at 1 causing grossly incorrect baud +rates to be programmed. + +Accessing the EFR register requires 16550A features to have been probed +for, so request this to happen regardless of SERIAL_8250_16550A_VARIANTS +by setting UPF_FULL_PROBE in port flags. + +Fixes: 366f6c955d4d ("serial: 8250: Add proper clock handling for OxSemi PCIe devices") +Cc: stable@vger.kernel.org # v5.19+ +Reported-by: Anders Blomdell +Signed-off-by: Maciej W. Rozycki +Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209210005040.41633@angie.orcam.me.uk +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/8250/8250_pci.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/tty/serial/8250/8250_pci.c ++++ b/drivers/tty/serial/8250/8250_pci.c +@@ -1232,6 +1232,10 @@ static void pci_oxsemi_tornado_set_mctrl + serial8250_do_set_mctrl(port, mctrl); + } + ++/* ++ * We require EFR features for clock programming, so set UPF_FULL_PROBE ++ * for full probing regardless of CONFIG_SERIAL_8250_16550A_VARIANTS setting. ++ */ + static int pci_oxsemi_tornado_setup(struct serial_private *priv, + const struct pciserial_board *board, + struct uart_8250_port *up, int idx) +@@ -1239,6 +1243,7 @@ static int pci_oxsemi_tornado_setup(stru + struct pci_dev *dev = priv->dev; + + if (pci_oxsemi_tornado_p(dev)) { ++ up->port.flags |= UPF_FULL_PROBE; + up->port.get_divisor = pci_oxsemi_tornado_get_divisor; + up->port.set_divisor = pci_oxsemi_tornado_set_divisor; + up->port.set_mctrl = pci_oxsemi_tornado_set_mctrl; diff --git a/queue-5.19/serial-cpm_uart-don-t-request-irq-too-early-for-console-port.patch b/queue-5.19/serial-cpm_uart-don-t-request-irq-too-early-for-console-port.patch new file mode 100644 index 00000000000..425ba058061 --- /dev/null +++ b/queue-5.19/serial-cpm_uart-don-t-request-irq-too-early-for-console-port.patch @@ -0,0 +1,84 @@ +From 30963b2f75bfdbbcf1cc5d80bf88fec7aaba808d Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Fri, 30 Sep 2022 10:33:56 +0200 +Subject: serial: cpm_uart: Don't request IRQ too early for console port + +From: Christophe Leroy + +commit 30963b2f75bfdbbcf1cc5d80bf88fec7aaba808d upstream. + +The following message is seen during boot and the activation of +console port gets delayed until normal serial ports activation. + +[ 0.001346] irq: no irq domain found for pic@930 ! + +The console port doesn't need irq, perform irq reservation later, +during cpm_uart probe. + +While at it, don't use NO_IRQ but 0 which is the value returned +by irq_of_parse_and_map() in case of error. By chance powerpc's +NO_IRQ has value 0 but on some architectures it is -1. + +Fixes: 14d893fc6846 ("powerpc/8xx: Convert CPM1 interrupt controller to platform_device") +Cc: stable@vger.kernel.org +Signed-off-by: Christophe Leroy +Link: https://lore.kernel.org/r/8bed0f30c2e9ef16ae64fb1243a16d54a48eb8da.1664526717.git.christophe.leroy@csgroup.eu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/cpm_uart/cpm_uart_core.c | 22 ++++++++++------------ + 1 file changed, 10 insertions(+), 12 deletions(-) + +--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c ++++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c +@@ -1214,12 +1214,6 @@ static int cpm_uart_init_port(struct dev + pinfo->port.fifosize = pinfo->tx_nrfifos * pinfo->tx_fifosize; + spin_lock_init(&pinfo->port.lock); + +- pinfo->port.irq = irq_of_parse_and_map(np, 0); +- if (pinfo->port.irq == NO_IRQ) { +- ret = -EINVAL; +- goto out_pram; +- } +- + for (i = 0; i < NUM_GPIOS; i++) { + struct gpio_desc *gpiod; + +@@ -1229,7 +1223,7 @@ static int cpm_uart_init_port(struct dev + + if (IS_ERR(gpiod)) { + ret = PTR_ERR(gpiod); +- goto out_irq; ++ goto out_pram; + } + + if (gpiod) { +@@ -1255,8 +1249,6 @@ static int cpm_uart_init_port(struct dev + + return cpm_uart_request_port(&pinfo->port); + +-out_irq: +- irq_dispose_mapping(pinfo->port.irq); + out_pram: + cpm_uart_unmap_pram(pinfo, pram); + out_mem: +@@ -1436,11 +1428,17 @@ static int cpm_uart_probe(struct platfor + /* initialize the device pointer for the port */ + pinfo->port.dev = &ofdev->dev; + ++ pinfo->port.irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); ++ if (!pinfo->port.irq) ++ return -EINVAL; ++ + ret = cpm_uart_init_port(ofdev->dev.of_node, pinfo); +- if (ret) +- return ret; ++ if (!ret) ++ return uart_add_one_port(&cpm_reg, &pinfo->port); ++ ++ irq_dispose_mapping(pinfo->port.irq); + +- return uart_add_one_port(&cpm_reg, &pinfo->port); ++ return ret; + } + + static int cpm_uart_remove(struct platform_device *ofdev) diff --git a/queue-5.19/serial-stm32-deassert-transmit-enable-on-rs485_config.patch b/queue-5.19/serial-stm32-deassert-transmit-enable-on-rs485_config.patch new file mode 100644 index 00000000000..879a38fec5a --- /dev/null +++ b/queue-5.19/serial-stm32-deassert-transmit-enable-on-rs485_config.patch @@ -0,0 +1,166 @@ +From adafbbf6895eb0ce41a313c6ee68870ab9aa93cd Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Sun, 11 Sep 2022 11:02:03 +0200 +Subject: serial: stm32: Deassert Transmit Enable on ->rs485_config() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Lukas Wunner + +commit adafbbf6895eb0ce41a313c6ee68870ab9aa93cd upstream. + +The STM32 USART can control RS-485 Transmit Enable in hardware. Since +commit 7df5081cbf5e ("serial: stm32: Add RS485 RTS GPIO control"), +it can alternatively be controlled in software. That was done to allow +RS-485 even if the RTS pin is unavailable because it's pinmuxed to a +different function. + +However the commit neglected to deassert Transmit Enable upon invocation +of the ->rs485_config() callback. Fix it. + +Avoid forward declarations by moving stm32_usart_tx_empty(), +stm32_usart_rs485_rts_enable() and stm32_usart_rs485_rts_disable() +further up in the driver. + +Fixes: 7df5081cbf5e ("serial: stm32: Add RS485 RTS GPIO control") +Cc: stable@vger.kernel.org # v5.9+ +Cc: Marek Vasut +Reviewed-by: Ilpo Järvinen +Signed-off-by: Lukas Wunner +Link: https://lore.kernel.org/r/6059eab35dba394468335ef640df8b0050fd9dbd.1662886616.git.lukas@wunner.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/stm32-usart.c | 100 ++++++++++++++++++++------------------- + 1 file changed, 53 insertions(+), 47 deletions(-) + +--- a/drivers/tty/serial/stm32-usart.c ++++ b/drivers/tty/serial/stm32-usart.c +@@ -62,6 +62,53 @@ static void stm32_usart_clr_bits(struct + writel_relaxed(val, port->membase + reg); + } + ++static unsigned int stm32_usart_tx_empty(struct uart_port *port) ++{ ++ struct stm32_port *stm32_port = to_stm32_port(port); ++ const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; ++ ++ if (readl_relaxed(port->membase + ofs->isr) & USART_SR_TC) ++ return TIOCSER_TEMT; ++ ++ return 0; ++} ++ ++static void stm32_usart_rs485_rts_enable(struct uart_port *port) ++{ ++ struct stm32_port *stm32_port = to_stm32_port(port); ++ struct serial_rs485 *rs485conf = &port->rs485; ++ ++ if (stm32_port->hw_flow_control || ++ !(rs485conf->flags & SER_RS485_ENABLED)) ++ return; ++ ++ if (rs485conf->flags & SER_RS485_RTS_ON_SEND) { ++ mctrl_gpio_set(stm32_port->gpios, ++ stm32_port->port.mctrl | TIOCM_RTS); ++ } else { ++ mctrl_gpio_set(stm32_port->gpios, ++ stm32_port->port.mctrl & ~TIOCM_RTS); ++ } ++} ++ ++static void stm32_usart_rs485_rts_disable(struct uart_port *port) ++{ ++ struct stm32_port *stm32_port = to_stm32_port(port); ++ struct serial_rs485 *rs485conf = &port->rs485; ++ ++ if (stm32_port->hw_flow_control || ++ !(rs485conf->flags & SER_RS485_ENABLED)) ++ return; ++ ++ if (rs485conf->flags & SER_RS485_RTS_ON_SEND) { ++ mctrl_gpio_set(stm32_port->gpios, ++ stm32_port->port.mctrl & ~TIOCM_RTS); ++ } else { ++ mctrl_gpio_set(stm32_port->gpios, ++ stm32_port->port.mctrl | TIOCM_RTS); ++ } ++} ++ + static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE, + u32 delay_DDE, u32 baud) + { +@@ -145,6 +192,12 @@ static int stm32_usart_config_rs485(stru + + stm32_usart_set_bits(port, ofs->cr1, BIT(cfg->uart_enable_bit)); + ++ /* Adjust RTS polarity in case it's driven in software */ ++ if (stm32_usart_tx_empty(port)) ++ stm32_usart_rs485_rts_disable(port); ++ else ++ stm32_usart_rs485_rts_enable(port); ++ + return 0; + } + +@@ -460,42 +513,6 @@ static void stm32_usart_tc_interrupt_dis + stm32_usart_clr_bits(port, ofs->cr1, USART_CR1_TCIE); + } + +-static void stm32_usart_rs485_rts_enable(struct uart_port *port) +-{ +- struct stm32_port *stm32_port = to_stm32_port(port); +- struct serial_rs485 *rs485conf = &port->rs485; +- +- if (stm32_port->hw_flow_control || +- !(rs485conf->flags & SER_RS485_ENABLED)) +- return; +- +- if (rs485conf->flags & SER_RS485_RTS_ON_SEND) { +- mctrl_gpio_set(stm32_port->gpios, +- stm32_port->port.mctrl | TIOCM_RTS); +- } else { +- mctrl_gpio_set(stm32_port->gpios, +- stm32_port->port.mctrl & ~TIOCM_RTS); +- } +-} +- +-static void stm32_usart_rs485_rts_disable(struct uart_port *port) +-{ +- struct stm32_port *stm32_port = to_stm32_port(port); +- struct serial_rs485 *rs485conf = &port->rs485; +- +- if (stm32_port->hw_flow_control || +- !(rs485conf->flags & SER_RS485_ENABLED)) +- return; +- +- if (rs485conf->flags & SER_RS485_RTS_ON_SEND) { +- mctrl_gpio_set(stm32_port->gpios, +- stm32_port->port.mctrl & ~TIOCM_RTS); +- } else { +- mctrl_gpio_set(stm32_port->gpios, +- stm32_port->port.mctrl | TIOCM_RTS); +- } +-} +- + static void stm32_usart_transmit_chars_pio(struct uart_port *port) + { + struct stm32_port *stm32_port = to_stm32_port(port); +@@ -738,17 +755,6 @@ static irqreturn_t stm32_usart_threaded_ + return IRQ_HANDLED; + } + +-static unsigned int stm32_usart_tx_empty(struct uart_port *port) +-{ +- struct stm32_port *stm32_port = to_stm32_port(port); +- const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; +- +- if (readl_relaxed(port->membase + ofs->isr) & USART_SR_TC) +- return TIOCSER_TEMT; +- +- return 0; +-} +- + static void stm32_usart_set_mctrl(struct uart_port *port, unsigned int mctrl) + { + struct stm32_port *stm32_port = to_stm32_port(port); diff --git a/queue-5.19/series b/queue-5.19/series index 00641bd0fe9..6d9897fb771 100644 --- a/queue-5.19/series +++ b/queue-5.19/series @@ -72,3 +72,25 @@ drm-udl-restore-display-mode-on-resume.patch arm64-mte-move-register-initialization-to-c.patch arm64-errata-add-cortex-a55-to-the-repeat-tlbi-list.patch clocksource-drivers-arm_arch_timer-fix-cntpct_lo-and-cntvct_lo-value.patch +mm-hugetlb-fix-races-when-looking-up-a-cont-pte-pmd-size-hugetlb-page.patch +mm-damon-validate-if-the-pmd-entry-is-present-before-accessing.patch +mm-uffd-fix-warning-without-pte_marker_uffd_wp-compiled-in.patch +mm-mmap-undo-mmap-when-arch_validate_flags-fails.patch +xen-gntdev-prevent-leaking-grants.patch +xen-gntdev-accommodate-vma-splitting.patch +pci-sanitise-firmware-bar-assignments-behind-a-pci-pci-bridge.patch +serial-cpm_uart-don-t-request-irq-too-early-for-console-port.patch +serial-stm32-deassert-transmit-enable-on-rs485_config.patch +serial-8250-let-drivers-request-full-16550a-feature-probing.patch +serial-8250-request-full-16550a-feature-probing-for-oxsemi-pcie-devices.patch +nfsd-protect-against-send-buffer-overflow-in-nfsv3-readdir.patch +nfsd-protect-against-send-buffer-overflow-in-nfsv2-read.patch +nfsd-protect-against-send-buffer-overflow-in-nfsv3-read.patch +cpufreq-qcom-cpufreq-hw-fix-uninitialized-throttled_freq-warning.patch +powercap-intel_rapl-use-standard-energy-unit-for-spr-dram-rapl-domain.patch +powerpc-kconfig-fix-non-existing-config_ppc_fsl_booke.patch +powerpc-boot-explicitly-disable-usage-of-spe-instructions.patch +slimbus-qcom-ngd-use-correct-error-in-message-of-pdr_add_lookup-failure.patch +slimbus-qcom-ngd-cleanup-in-probe-error-path.patch +scsi-lpfc-rework-mib-rx-monitor-debug-info-logic.patch +scsi-qedf-populate-sysfs-attributes-for-vport.patch diff --git a/queue-5.19/slimbus-qcom-ngd-cleanup-in-probe-error-path.patch b/queue-5.19/slimbus-qcom-ngd-cleanup-in-probe-error-path.patch new file mode 100644 index 00000000000..610e1586bf8 --- /dev/null +++ b/queue-5.19/slimbus-qcom-ngd-cleanup-in-probe-error-path.patch @@ -0,0 +1,78 @@ +From 16f14551d0df9e7cd283545d7d748829594d912f Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Fri, 16 Sep 2022 13:29:08 +0100 +Subject: slimbus: qcom-ngd: cleanup in probe error path + +From: Krzysztof Kozlowski + +commit 16f14551d0df9e7cd283545d7d748829594d912f upstream. + +Add proper error path in probe() to cleanup resources previously +acquired/allocated to fix warnings visible during probe deferral: + + notifier callback qcom_slim_ngd_ssr_notify already registered + WARNING: CPU: 6 PID: 70 at kernel/notifier.c:28 notifier_chain_register+0x5c/0x90 + Modules linked in: + CPU: 6 PID: 70 Comm: kworker/u16:1 Not tainted 6.0.0-rc3-next-20220830 #380 + Call trace: + notifier_chain_register+0x5c/0x90 + srcu_notifier_chain_register+0x44/0x90 + qcom_register_ssr_notifier+0x38/0x4c + qcom_slim_ngd_ctrl_probe+0xd8/0x400 + platform_probe+0x6c/0xe0 + really_probe+0xbc/0x2d4 + __driver_probe_device+0x78/0xe0 + driver_probe_device+0x3c/0x12c + __device_attach_driver+0xb8/0x120 + bus_for_each_drv+0x78/0xd0 + __device_attach+0xa8/0x1c0 + device_initial_probe+0x18/0x24 + bus_probe_device+0xa0/0xac + deferred_probe_work_func+0x88/0xc0 + process_one_work+0x1d4/0x320 + worker_thread+0x2cc/0x44c + kthread+0x110/0x114 + ret_from_fork+0x10/0x20 + +Fixes: e1ae85e1830e ("slimbus: qcom-ngd-ctrl: add Protection Domain Restart Support") +Cc: +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Srinivas Kandagatla +Link: https://lore.kernel.org/r/20220916122910.170730-3-srinivas.kandagatla@linaro.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/slimbus/qcom-ngd-ctrl.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/drivers/slimbus/qcom-ngd-ctrl.c ++++ b/drivers/slimbus/qcom-ngd-ctrl.c +@@ -1576,18 +1576,27 @@ static int qcom_slim_ngd_ctrl_probe(stru + ctrl->pdr = pdr_handle_alloc(slim_pd_status, ctrl); + if (IS_ERR(ctrl->pdr)) { + dev_err(dev, "Failed to init PDR handle\n"); +- return PTR_ERR(ctrl->pdr); ++ ret = PTR_ERR(ctrl->pdr); ++ goto err_pdr_alloc; + } + + pds = pdr_add_lookup(ctrl->pdr, "avs/audio", "msm/adsp/audio_pd"); + if (IS_ERR(pds) && PTR_ERR(pds) != -EALREADY) { + ret = PTR_ERR(pds); + dev_err(dev, "pdr add lookup failed: %d\n", ret); +- return ret; ++ goto err_pdr_lookup; + } + + platform_driver_register(&qcom_slim_ngd_driver); + return of_qcom_slim_ngd_register(dev, ctrl); ++ ++err_pdr_alloc: ++ qcom_unregister_ssr_notifier(ctrl->notifier, &ctrl->nb); ++ ++err_pdr_lookup: ++ pdr_handle_release(ctrl->pdr); ++ ++ return ret; + } + + static int qcom_slim_ngd_ctrl_remove(struct platform_device *pdev) diff --git a/queue-5.19/slimbus-qcom-ngd-use-correct-error-in-message-of-pdr_add_lookup-failure.patch b/queue-5.19/slimbus-qcom-ngd-use-correct-error-in-message-of-pdr_add_lookup-failure.patch new file mode 100644 index 00000000000..b5e4784f671 --- /dev/null +++ b/queue-5.19/slimbus-qcom-ngd-use-correct-error-in-message-of-pdr_add_lookup-failure.patch @@ -0,0 +1,35 @@ +From 5038d21dde818fe74ba1fcb6f2cee35b8c2ebbf2 Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Fri, 16 Sep 2022 13:29:07 +0100 +Subject: slimbus: qcom-ngd: use correct error in message of pdr_add_lookup() failure + +From: Krzysztof Kozlowski + +commit 5038d21dde818fe74ba1fcb6f2cee35b8c2ebbf2 upstream. + +Use correct error code, instead of previous 'ret' value, when printing +error from pdr_add_lookup() failure. + +Fixes: e1ae85e1830e ("slimbus: qcom-ngd-ctrl: add Protection Domain Restart Support") +Cc: +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Srinivas Kandagatla +Link: https://lore.kernel.org/r/20220916122910.170730-2-srinivas.kandagatla@linaro.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/slimbus/qcom-ngd-ctrl.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/slimbus/qcom-ngd-ctrl.c ++++ b/drivers/slimbus/qcom-ngd-ctrl.c +@@ -1581,8 +1581,9 @@ static int qcom_slim_ngd_ctrl_probe(stru + + pds = pdr_add_lookup(ctrl->pdr, "avs/audio", "msm/adsp/audio_pd"); + if (IS_ERR(pds) && PTR_ERR(pds) != -EALREADY) { ++ ret = PTR_ERR(pds); + dev_err(dev, "pdr add lookup failed: %d\n", ret); +- return PTR_ERR(pds); ++ return ret; + } + + platform_driver_register(&qcom_slim_ngd_driver); diff --git a/queue-5.19/xen-gntdev-accommodate-vma-splitting.patch b/queue-5.19/xen-gntdev-accommodate-vma-splitting.patch new file mode 100644 index 00000000000..65184a01d08 --- /dev/null +++ b/queue-5.19/xen-gntdev-accommodate-vma-splitting.patch @@ -0,0 +1,263 @@ +From 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c Mon Sep 17 00:00:00 2001 +From: "M. Vefa Bicakci" +Date: Sun, 2 Oct 2022 18:20:06 -0400 +Subject: xen/gntdev: Accommodate VMA splitting + +From: M. Vefa Bicakci + +commit 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c upstream. + +Prior to this commit, the gntdev driver code did not handle the +following scenario correctly with paravirtualized (PV) Xen domains: + +* User process sets up a gntdev mapping composed of two grant mappings + (i.e., two pages shared by another Xen domain). +* User process munmap()s one of the pages. +* User process munmap()s the remaining page. +* User process exits. + +In the scenario above, the user process would cause the kernel to log +the following messages in dmesg for the first munmap(), and the second +munmap() call would result in similar log messages: + + BUG: Bad page map in process doublemap.test pte:... pmd:... + page:0000000057c97bff refcount:1 mapcount:-1 \ + mapping:0000000000000000 index:0x0 pfn:... + ... + page dumped because: bad pte + ... + file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0 + ... + Call Trace: + + dump_stack_lvl+0x46/0x5e + print_bad_pte.cold+0x66/0xb6 + unmap_page_range+0x7e5/0xdc0 + unmap_vmas+0x78/0xf0 + unmap_region+0xa8/0x110 + __do_munmap+0x1ea/0x4e0 + __vm_munmap+0x75/0x120 + __x64_sys_munmap+0x28/0x40 + do_syscall_64+0x38/0x90 + entry_SYSCALL_64_after_hwframe+0x61/0xcb + ... + +For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG) +would print out the following and trigger a general protection fault in +the affected Xen PV domain: + + (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ... + (XEN) d0v... Attempt to implicitly unmap d0's grant PTE ... + +As of this writing, gntdev_grant_map structure's vma field (referred to +as map->vma below) is mainly used for checking the start and end +addresses of mappings. However, with split VMAs, these may change, and +there could be more than one VMA associated with a gntdev mapping. +Hence, remove the use of map->vma and rely on map->pages_vm_start for +the original start address and on (map->count << PAGE_SHIFT) for the +original mapping size. Let the invalidate() and find_special_page() +hooks use these. + +Also, given that there can be multiple VMAs associated with a gntdev +mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to +the end of gntdev_put_map, so that the MMU notifier is only removed +after the closing of the last remaining VMA. + +Finally, use an atomic to prevent inadvertent gntdev mapping re-use, +instead of using the map->live_grants atomic counter and/or the map->vma +pointer (the latter of which is now removed). This prevents the +userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the +same address range as a previously set up gntdev mapping. This scenario +can be summarized with the following call-trace, which was valid prior +to this commit: + + mmap + gntdev_mmap + mmap (repeat mmap with MAP_FIXED over the same address range) + gntdev_invalidate + unmap_grant_pages (sets 'being_removed' entries to true) + gnttab_unmap_refs_async + unmap_single_vma + gntdev_mmap (maps the shared pages again) + munmap + gntdev_invalidate + unmap_grant_pages + (no-op because 'being_removed' entries are true) + unmap_single_vma (For PV domains, Xen reports that a granted page + is being unmapped and triggers a general protection fault in the + affected domain, if Xen was built with CONFIG_DEBUG) + +The fix for this last scenario could be worth its own commit, but we +opted for a single commit, because removing the gntdev_grant_map +structure's vma field requires guarding the entry to gntdev_mmap(), and +the live_grants atomic counter is not sufficient on its own to prevent +the mmap() over a pre-existing mapping. + +Link: https://github.com/QubesOS/qubes-issues/issues/7631 +Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages") +Cc: stable@vger.kernel.org +Signed-off-by: M. Vefa Bicakci +Reviewed-by: Juergen Gross +Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/gntdev-common.h | 3 +- + drivers/xen/gntdev.c | 58 ++++++++++++++++++-------------------------- + 2 files changed, 27 insertions(+), 34 deletions(-) + +--- a/drivers/xen/gntdev-common.h ++++ b/drivers/xen/gntdev-common.h +@@ -44,9 +44,10 @@ struct gntdev_unmap_notify { + }; + + struct gntdev_grant_map { ++ atomic_t in_use; + struct mmu_interval_notifier notifier; ++ bool notifier_init; + struct list_head next; +- struct vm_area_struct *vma; + int index; + int count; + int flags; +--- a/drivers/xen/gntdev.c ++++ b/drivers/xen/gntdev.c +@@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv * + */ + } + ++ if (use_ptemod && map->notifier_init) ++ mmu_interval_notifier_remove(&map->notifier); ++ + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { + notify_remote_via_evtchn(map->notify.event); + evtchn_put(map->notify.event); +@@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv * + static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) + { + struct gntdev_grant_map *map = data; +- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; ++ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT; + int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte | + (1 << _GNTMAP_guest_avail0); + u64 pte_maddr; +@@ -508,11 +511,7 @@ static void gntdev_vma_close(struct vm_a + struct gntdev_priv *priv = file->private_data; + + pr_debug("gntdev_vma_close %p\n", vma); +- if (use_ptemod) { +- WARN_ON(map->vma != vma); +- mmu_interval_notifier_remove(&map->notifier); +- map->vma = NULL; +- } ++ + vma->vm_private_data = NULL; + gntdev_put_map(priv, map); + } +@@ -540,29 +539,30 @@ static bool gntdev_invalidate(struct mmu + struct gntdev_grant_map *map = + container_of(mn, struct gntdev_grant_map, notifier); + unsigned long mstart, mend; ++ unsigned long map_start, map_end; + + if (!mmu_notifier_range_blockable(range)) + return false; + ++ map_start = map->pages_vm_start; ++ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT); ++ + /* + * If the VMA is split or otherwise changed the notifier is not + * updated, but we don't want to process VA's outside the modified + * VMA. FIXME: It would be much more understandable to just prevent + * modifying the VMA in the first place. + */ +- if (map->vma->vm_start >= range->end || +- map->vma->vm_end <= range->start) ++ if (map_start >= range->end || map_end <= range->start) + return true; + +- mstart = max(range->start, map->vma->vm_start); +- mend = min(range->end, map->vma->vm_end); ++ mstart = max(range->start, map_start); ++ mend = min(range->end, map_end); + pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", +- map->index, map->count, +- map->vma->vm_start, map->vma->vm_end, +- range->start, range->end, mstart, mend); +- unmap_grant_pages(map, +- (mstart - map->vma->vm_start) >> PAGE_SHIFT, +- (mend - mstart) >> PAGE_SHIFT); ++ map->index, map->count, map_start, map_end, ++ range->start, range->end, mstart, mend); ++ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT, ++ (mend - mstart) >> PAGE_SHIFT); + + return true; + } +@@ -1042,18 +1042,15 @@ static int gntdev_mmap(struct file *flip + return -EINVAL; + + pr_debug("map %d+%d at %lx (pgoff %lx)\n", +- index, count, vma->vm_start, vma->vm_pgoff); ++ index, count, vma->vm_start, vma->vm_pgoff); + + mutex_lock(&priv->lock); + map = gntdev_find_map_index(priv, index, count); + if (!map) + goto unlock_out; +- if (use_ptemod && map->vma) +- goto unlock_out; +- if (atomic_read(&map->live_grants)) { +- err = -EAGAIN; ++ if (!atomic_add_unless(&map->in_use, 1, 1)) + goto unlock_out; +- } ++ + refcount_inc(&map->users); + + vma->vm_ops = &gntdev_vmops; +@@ -1074,15 +1071,16 @@ static int gntdev_mmap(struct file *flip + map->flags |= GNTMAP_readonly; + } + ++ map->pages_vm_start = vma->vm_start; ++ + if (use_ptemod) { +- map->vma = vma; + err = mmu_interval_notifier_insert_locked( + &map->notifier, vma->vm_mm, vma->vm_start, + vma->vm_end - vma->vm_start, &gntdev_mmu_ops); +- if (err) { +- map->vma = NULL; ++ if (err) + goto out_unlock_put; +- } ++ ++ map->notifier_init = true; + } + mutex_unlock(&priv->lock); + +@@ -1099,7 +1097,6 @@ static int gntdev_mmap(struct file *flip + */ + mmu_interval_read_begin(&map->notifier); + +- map->pages_vm_start = vma->vm_start; + err = apply_to_page_range(vma->vm_mm, vma->vm_start, + vma->vm_end - vma->vm_start, + find_grant_ptes, map); +@@ -1128,13 +1125,8 @@ unlock_out: + out_unlock_put: + mutex_unlock(&priv->lock); + out_put_map: +- if (use_ptemod) { ++ if (use_ptemod) + unmap_grant_pages(map, 0, map->count); +- if (map->vma) { +- mmu_interval_notifier_remove(&map->notifier); +- map->vma = NULL; +- } +- } + gntdev_put_map(priv, map); + return err; + } diff --git a/queue-5.19/xen-gntdev-prevent-leaking-grants.patch b/queue-5.19/xen-gntdev-prevent-leaking-grants.patch new file mode 100644 index 00000000000..04dc2ce3854 --- /dev/null +++ b/queue-5.19/xen-gntdev-prevent-leaking-grants.patch @@ -0,0 +1,154 @@ +From 0991028cd49567d7016d1b224fe0117c35059f86 Mon Sep 17 00:00:00 2001 +From: "M. Vefa Bicakci" +Date: Sun, 2 Oct 2022 18:20:05 -0400 +Subject: xen/gntdev: Prevent leaking grants + +From: M. Vefa Bicakci + +commit 0991028cd49567d7016d1b224fe0117c35059f86 upstream. + +Prior to this commit, if a grant mapping operation failed partially, +some of the entries in the map_ops array would be invalid, whereas all +of the entries in the kmap_ops array would be valid. This in turn would +cause the following logic in gntdev_map_grant_pages to become invalid: + + for (i = 0; i < map->count; i++) { + if (map->map_ops[i].status == GNTST_okay) { + map->unmap_ops[i].handle = map->map_ops[i].handle; + if (!use_ptemod) + alloced++; + } + if (use_ptemod) { + if (map->kmap_ops[i].status == GNTST_okay) { + if (map->map_ops[i].status == GNTST_okay) + alloced++; + map->kunmap_ops[i].handle = map->kmap_ops[i].handle; + } + } + } + ... + atomic_add(alloced, &map->live_grants); + +Assume that use_ptemod is true (i.e., the domain mapping the granted +pages is a paravirtualized domain). In the code excerpt above, note that +the "alloced" variable is only incremented when both kmap_ops[i].status +and map_ops[i].status are set to GNTST_okay (i.e., both mapping +operations are successful). However, as also noted above, there are +cases where a grant mapping operation fails partially, breaking the +assumption of the code excerpt above. + +The aforementioned causes map->live_grants to be incorrectly set. In +some cases, all of the map_ops mappings fail, but all of the kmap_ops +mappings succeed, meaning that live_grants may remain zero. This in turn +makes it impossible to unmap the successfully grant-mapped pages pointed +to by kmap_ops, because unmap_grant_pages has the following snippet of +code at its beginning: + + if (atomic_read(&map->live_grants) == 0) + return; /* Nothing to do */ + +In other cases where only some of the map_ops mappings fail but all +kmap_ops mappings succeed, live_grants is made positive, but when the +user requests unmapping the grant-mapped pages, __unmap_grant_pages_done +will then make map->live_grants negative, because the latter function +does not check if all of the pages that were requested to be unmapped +were actually unmapped, and the same function unconditionally subtracts +"data->count" (i.e., a value that can be greater than map->live_grants) +from map->live_grants. The side effects of a negative live_grants value +have not been studied. + +The net effect of all of this is that grant references are leaked in one +of the above conditions. In Qubes OS v4.1 (which uses Xen's grant +mechanism extensively for X11 GUI isolation), this issue manifests +itself with warning messages like the following to be printed out by the +Linux kernel in the VM that had granted pages (that contain X11 GUI +window data) to dom0: "g.e. 0x1234 still pending", especially after the +user rapidly resizes GUI VM windows (causing some grant-mapping +operations to partially or completely fail, due to the fact that the VM +unshares some of the pages as part of the window resizing, making the +pages impossible to grant-map from dom0). + +The fix for this issue involves counting all successful map_ops and +kmap_ops mappings separately, and then adding the sum to live_grants. +During unmapping, only the number of successfully unmapped grants is +subtracted from live_grants. The code is also modified to check for +negative live_grants values after the subtraction and warn the user. + +Link: https://github.com/QubesOS/qubes-issues/issues/7631 +Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()") +Cc: stable@vger.kernel.org +Signed-off-by: M. Vefa Bicakci +Acked-by: Demi Marie Obenour +Reviewed-by: Juergen Gross +Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/gntdev.c | 22 +++++++++++++++++----- + 1 file changed, 17 insertions(+), 5 deletions(-) + +--- a/drivers/xen/gntdev.c ++++ b/drivers/xen/gntdev.c +@@ -367,8 +367,7 @@ int gntdev_map_grant_pages(struct gntdev + for (i = 0; i < map->count; i++) { + if (map->map_ops[i].status == GNTST_okay) { + map->unmap_ops[i].handle = map->map_ops[i].handle; +- if (!use_ptemod) +- alloced++; ++ alloced++; + } else if (!err) + err = -EINVAL; + +@@ -377,8 +376,7 @@ int gntdev_map_grant_pages(struct gntdev + + if (use_ptemod) { + if (map->kmap_ops[i].status == GNTST_okay) { +- if (map->map_ops[i].status == GNTST_okay) +- alloced++; ++ alloced++; + map->kunmap_ops[i].handle = map->kmap_ops[i].handle; + } else if (!err) + err = -EINVAL; +@@ -394,8 +392,14 @@ static void __unmap_grant_pages_done(int + unsigned int i; + struct gntdev_grant_map *map = data->data; + unsigned int offset = data->unmap_ops - map->unmap_ops; ++ int successful_unmaps = 0; ++ int live_grants; + + for (i = 0; i < data->count; i++) { ++ if (map->unmap_ops[offset + i].status == GNTST_okay && ++ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE) ++ successful_unmaps++; ++ + WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay && + map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); + pr_debug("unmap handle=%d st=%d\n", +@@ -403,6 +407,10 @@ static void __unmap_grant_pages_done(int + map->unmap_ops[offset+i].status); + map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; + if (use_ptemod) { ++ if (map->kunmap_ops[offset + i].status == GNTST_okay && ++ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE) ++ successful_unmaps++; ++ + WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay && + map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); + pr_debug("kunmap handle=%u st=%d\n", +@@ -411,11 +419,15 @@ static void __unmap_grant_pages_done(int + map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; + } + } ++ + /* + * Decrease the live-grant counter. This must happen after the loop to + * prevent premature reuse of the grants by gnttab_mmap(). + */ +- atomic_sub(data->count, &map->live_grants); ++ live_grants = atomic_sub_return(successful_unmaps, &map->live_grants); ++ if (WARN_ON(live_grants < 0)) ++ pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n", ++ __func__, live_grants, successful_unmaps); + + /* Release reference taken by __unmap_grant_pages */ + gntdev_put_map(NULL, map);