From: Greg Kroah-Hartman Date: Thu, 15 Aug 2024 08:40:13 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v4.19.320~35 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=74a41753288a6ff1a2abfe9193b0b804addcfa35;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: genirq-cpuhotplug-retry-with-cpu_online_mask-when-migration-fails.patch genirq-cpuhotplug-skip-suspended-interrupts-when-restoring-affinity.patch loongarch-define-__arch_want_new_stat-in-unistd.h.patch mm-gup-stop-abusing-try_grab_folio.patch nfsd-expose-proc-net-sunrpc-nfsd-in-net-namespaces.patch nfsd-fix-frame-size-warning-in-svc_export_parse.patch nfsd-make-all-of-the-nfsd-stats-per-network-namespace.patch nfsd-make-svc_stat-per-network-namespace-instead-of-global.patch nfsd-remove-nfsd_stats-make-th_cnt-a-global-counter.patch nfsd-rename-nfsd_net_-to-nfsd_stats_.patch nfsd-rewrite-synopsis-of-nfsd_percpu_counters_init.patch nfsd-stop-setting-pg_stats-for-unused-stats.patch nvme-pci-add-apst-quirk-for-lenovo-n60z-laptop.patch sunrpc-don-t-change-sv_stats-if-it-doesn-t-exist.patch sunrpc-pass-in-the-sv_stats-struct-through-svc_create_pooled.patch sunrpc-remove-pg_stats-from-svc_program.patch sunrpc-use-the-struct-net-as-the-svc-proc-private.patch --- diff --git a/queue-6.6/genirq-cpuhotplug-retry-with-cpu_online_mask-when-migration-fails.patch b/queue-6.6/genirq-cpuhotplug-retry-with-cpu_online_mask-when-migration-fails.patch new file mode 100644 index 00000000000..61ef6cacbec --- /dev/null +++ b/queue-6.6/genirq-cpuhotplug-retry-with-cpu_online_mask-when-migration-fails.patch @@ -0,0 +1,66 @@ +From 88d724e2301a69c1ab805cd74fc27aa36ae529e0 Mon Sep 17 00:00:00 2001 +From: Dongli Zhang +Date: Tue, 23 Apr 2024 00:34:13 -0700 +Subject: genirq/cpuhotplug: Retry with cpu_online_mask when migration fails + +From: Dongli Zhang + +commit 88d724e2301a69c1ab805cd74fc27aa36ae529e0 upstream. + +When a CPU goes offline, the interrupts affine to that CPU are +re-configured. + +Managed interrupts undergo either migration to other CPUs or shutdown if +all CPUs listed in the affinity are offline. The migration of managed +interrupts is guaranteed on x86 because there are interrupt vectors +reserved. + +Regular interrupts are migrated to a still online CPU in the affinity mask +or if there is no online CPU to any online CPU. + +This works as long as the still online CPUs in the affinity mask have +interrupt vectors available, but in case that none of those CPUs has a +vector available the migration fails and the device interrupt becomes +stale. + +This is not any different from the case where the affinity mask does not +contain any online CPU, but there is no fallback operation for this. + +Instead of giving up, retry the migration attempt with the online CPU mask +if the interrupt is not managed, as managed interrupts cannot be affected +by this problem. + +Signed-off-by: Dongli Zhang +Signed-off-by: Thomas Gleixner +Link: https://lore.kernel.org/r/20240423073413.79625-1-dongli.zhang@oracle.com +Cc: Bart Van Assche +Signed-off-by: Greg Kroah-Hartman +--- + kernel/irq/cpuhotplug.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +--- a/kernel/irq/cpuhotplug.c ++++ b/kernel/irq/cpuhotplug.c +@@ -130,6 +130,22 @@ static bool migrate_one_irq(struct irq_d + * CPU. + */ + err = irq_do_set_affinity(d, affinity, false); ++ ++ /* ++ * If there are online CPUs in the affinity mask, but they have no ++ * vectors left to make the migration work, try to break the ++ * affinity by migrating to any online CPU. ++ */ ++ if (err == -ENOSPC && !irqd_affinity_is_managed(d) && affinity != cpu_online_mask) { ++ pr_debug("IRQ%u: set affinity failed for %*pbl, re-try with online CPUs\n", ++ d->irq, cpumask_pr_args(affinity)); ++ ++ affinity = cpu_online_mask; ++ brokeaff = true; ++ ++ err = irq_do_set_affinity(d, affinity, false); ++ } ++ + if (err) { + pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", + d->irq, err); diff --git a/queue-6.6/genirq-cpuhotplug-skip-suspended-interrupts-when-restoring-affinity.patch b/queue-6.6/genirq-cpuhotplug-skip-suspended-interrupts-when-restoring-affinity.patch new file mode 100644 index 00000000000..8a480e3f683 --- /dev/null +++ b/queue-6.6/genirq-cpuhotplug-skip-suspended-interrupts-when-restoring-affinity.patch @@ -0,0 +1,71 @@ +From a60dd06af674d3bb76b40da5d722e4a0ecefe650 Mon Sep 17 00:00:00 2001 +From: David Stevens +Date: Wed, 24 Apr 2024 18:03:41 +0900 +Subject: genirq/cpuhotplug: Skip suspended interrupts when restoring affinity + +From: David Stevens + +commit a60dd06af674d3bb76b40da5d722e4a0ecefe650 upstream. + +irq_restore_affinity_of_irq() restarts managed interrupts unconditionally +when the first CPU in the affinity mask comes online. That's correct during +normal hotplug operations, but not when resuming from S3 because the +drivers are not resumed yet and interrupt delivery is not expected by them. + +Skip the startup of suspended interrupts and let resume_device_irqs() deal +with restoring them. This ensures that irqs are not delivered to drivers +during the noirq phase of resuming from S3, after non-boot CPUs are brought +back online. + +Signed-off-by: David Stevens +Signed-off-by: Thomas Gleixner +Link: https://lore.kernel.org/r/20240424090341.72236-1-stevensd@chromium.org +Cc: Bart Van Assche +Signed-off-by: Greg Kroah-Hartman +--- + kernel/irq/cpuhotplug.c | 11 ++++++++--- + kernel/irq/manage.c | 12 ++++++++---- + 2 files changed, 16 insertions(+), 7 deletions(-) + +--- a/kernel/irq/cpuhotplug.c ++++ b/kernel/irq/cpuhotplug.c +@@ -195,10 +195,15 @@ static void irq_restore_affinity_of_irq( + !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity)) + return; + +- if (irqd_is_managed_and_shutdown(data)) { +- irq_startup(desc, IRQ_RESEND, IRQ_START_COND); ++ /* ++ * Don't restore suspended interrupts here when a system comes back ++ * from S3. They are reenabled via resume_device_irqs(). ++ */ ++ if (desc->istate & IRQS_SUSPENDED) + return; +- } ++ ++ if (irqd_is_managed_and_shutdown(data)) ++ irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + + /* + * If the interrupt can only be directed to a single target +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -796,10 +796,14 @@ void __enable_irq(struct irq_desc *desc) + irq_settings_set_noprobe(desc); + /* + * Call irq_startup() not irq_enable() here because the +- * interrupt might be marked NOAUTOEN. So irq_startup() +- * needs to be invoked when it gets enabled the first +- * time. If it was already started up, then irq_startup() +- * will invoke irq_enable() under the hood. ++ * interrupt might be marked NOAUTOEN so irq_startup() ++ * needs to be invoked when it gets enabled the first time. ++ * This is also required when __enable_irq() is invoked for ++ * a managed and shutdown interrupt from the S3 resume ++ * path. ++ * ++ * If it was already started up, then irq_startup() will ++ * invoke irq_enable() under the hood. + */ + irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); + break; diff --git a/queue-6.6/loongarch-define-__arch_want_new_stat-in-unistd.h.patch b/queue-6.6/loongarch-define-__arch_want_new_stat-in-unistd.h.patch new file mode 100644 index 00000000000..7a2a98fe72b --- /dev/null +++ b/queue-6.6/loongarch-define-__arch_want_new_stat-in-unistd.h.patch @@ -0,0 +1,63 @@ +From 7697a0fe0154468f5df35c23ebd7aa48994c2cdc Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Sat, 20 Jul 2024 22:40:58 +0800 +Subject: LoongArch: Define __ARCH_WANT_NEW_STAT in unistd.h + +From: Huacai Chen + +commit 7697a0fe0154468f5df35c23ebd7aa48994c2cdc upstream. + +Chromium sandbox apparently wants to deny statx [1] so it could properly +inspect arguments after the sandboxed process later falls back to fstat. +Because there's currently not a "fd-only" version of statx, so that the +sandbox has no way to ensure the path argument is empty without being +able to peek into the sandboxed process's memory. For architectures able +to do newfstatat though, glibc falls back to newfstatat after getting +-ENOSYS for statx, then the respective SIGSYS handler [2] takes care of +inspecting the path argument, transforming allowed newfstatat's into +fstat instead which is allowed and has the same type of return value. + +But, as LoongArch is the first architecture to not have fstat nor +newfstatat, the LoongArch glibc does not attempt falling back at all +when it gets -ENOSYS for statx -- and you see the problem there! + +Actually, back when the LoongArch port was under review, people were +aware of the same problem with sandboxing clone3 [3], so clone was +eventually kept. Unfortunately it seemed at that time no one had noticed +statx, so besides restoring fstat/newfstatat to LoongArch uapi (and +postponing the problem further), it seems inevitable that we would need +to tackle seccomp deep argument inspection. + +However, this is obviously a decision that shouldn't be taken lightly, +so we just restore fstat/newfstatat by defining __ARCH_WANT_NEW_STAT +in unistd.h. This is the simplest solution for now, and so we hope the +community will tackle the long-standing problem of seccomp deep argument +inspection in the future [4][5]. + +Also add "newstat" to syscall_abis_64 in Makefile.syscalls due to +upstream asm-generic changes. + +More infomation please reading this thread [6]. + +[1] https://chromium-review.googlesource.com/c/chromium/src/+/2823150 +[2] https://chromium.googlesource.com/chromium/src/sandbox/+/c085b51940bd/linux/seccomp-bpf-helpers/sigsys_handlers.cc#355 +[3] https://lore.kernel.org/linux-arch/20220511211231.GG7074@brightrain.aerifal.cx/ +[4] https://lwn.net/Articles/799557/ +[5] https://lpc.events/event/4/contributions/560/attachments/397/640/deep-arg-inspection.pdf +[6] https://lore.kernel.org/loongarch/20240226-granit-seilschaft-eccc2433014d@brauner/T/#t + +Cc: stable@vger.kernel.org +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/include/uapi/asm/unistd.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/loongarch/include/uapi/asm/unistd.h ++++ b/arch/loongarch/include/uapi/asm/unistd.h +@@ -1,4 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#define __ARCH_WANT_NEW_STAT + #define __ARCH_WANT_SYS_CLONE + #define __ARCH_WANT_SYS_CLONE3 + diff --git a/queue-6.6/mm-gup-stop-abusing-try_grab_folio.patch b/queue-6.6/mm-gup-stop-abusing-try_grab_folio.patch new file mode 100644 index 00000000000..74a3cb26790 --- /dev/null +++ b/queue-6.6/mm-gup-stop-abusing-try_grab_folio.patch @@ -0,0 +1,524 @@ +From f442fa6141379a20b48ae3efabee827a3d260787 Mon Sep 17 00:00:00 2001 +From: Yang Shi +Date: Fri, 28 Jun 2024 12:14:58 -0700 +Subject: mm: gup: stop abusing try_grab_folio + +From: Yang Shi + +commit f442fa6141379a20b48ae3efabee827a3d260787 upstream. + +A kernel warning was reported when pinning folio in CMA memory when +launching SEV virtual machine. The splat looks like: + +[ 464.325306] WARNING: CPU: 13 PID: 6734 at mm/gup.c:1313 __get_user_pages+0x423/0x520 +[ 464.325464] CPU: 13 PID: 6734 Comm: qemu-kvm Kdump: loaded Not tainted 6.6.33+ #6 +[ 464.325477] RIP: 0010:__get_user_pages+0x423/0x520 +[ 464.325515] Call Trace: +[ 464.325520] +[ 464.325523] ? __get_user_pages+0x423/0x520 +[ 464.325528] ? __warn+0x81/0x130 +[ 464.325536] ? __get_user_pages+0x423/0x520 +[ 464.325541] ? report_bug+0x171/0x1a0 +[ 464.325549] ? handle_bug+0x3c/0x70 +[ 464.325554] ? exc_invalid_op+0x17/0x70 +[ 464.325558] ? asm_exc_invalid_op+0x1a/0x20 +[ 464.325567] ? __get_user_pages+0x423/0x520 +[ 464.325575] __gup_longterm_locked+0x212/0x7a0 +[ 464.325583] internal_get_user_pages_fast+0xfb/0x190 +[ 464.325590] pin_user_pages_fast+0x47/0x60 +[ 464.325598] sev_pin_memory+0xca/0x170 [kvm_amd] +[ 464.325616] sev_mem_enc_register_region+0x81/0x130 [kvm_amd] + +Per the analysis done by yangge, when starting the SEV virtual machine, it +will call pin_user_pages_fast(..., FOLL_LONGTERM, ...) to pin the memory. +But the page is in CMA area, so fast GUP will fail then fallback to the +slow path due to the longterm pinnalbe check in try_grab_folio(). + +The slow path will try to pin the pages then migrate them out of CMA area. +But the slow path also uses try_grab_folio() to pin the page, it will +also fail due to the same check then the above warning is triggered. + +In addition, the try_grab_folio() is supposed to be used in fast path and +it elevates folio refcount by using add ref unless zero. We are guaranteed +to have at least one stable reference in slow path, so the simple atomic add +could be used. The performance difference should be trivial, but the +misuse may be confusing and misleading. + +Redefined try_grab_folio() to try_grab_folio_fast(), and try_grab_page() +to try_grab_folio(), and use them in the proper paths. This solves both +the abuse and the kernel warning. + +The proper naming makes their usecase more clear and should prevent from +abusing in the future. + +peterx said: + +: The user will see the pin fails, for gpu-slow it further triggers the WARN +: right below that failure (as in the original report): +: +: folio = try_grab_folio(page, page_increm - 1, +: foll_flags); +: if (WARN_ON_ONCE(!folio)) { <------------------------ here +: /* +: * Release the 1st page ref if the +: * folio is problematic, fail hard. +: */ +: gup_put_folio(page_folio(page), 1, +: foll_flags); +: ret = -EFAULT; +: goto out; +: } + +[1] https://lore.kernel.org/linux-mm/1719478388-31917-1-git-send-email-yangge1116@126.com/ + +[shy828301@gmail.com: fix implicit declaration of function try_grab_folio_fast] + Link: https://lkml.kernel.org/r/CAHbLzkowMSso-4Nufc9hcMehQsK9PNz3OSu-+eniU-2Mm-xjhA@mail.gmail.com +Link: https://lkml.kernel.org/r/20240628191458.2605553-1-yang@os.amperecomputing.com +Fixes: 57edfcfd3419 ("mm/gup: accelerate thp gup even for "pages != NULL"") +Signed-off-by: Yang Shi +Reported-by: yangge +Cc: Christoph Hellwig +Cc: David Hildenbrand +Cc: Peter Xu +Cc: [6.6+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/gup.c | 251 ++++++++++++++++++++++++++++--------------------------- + mm/huge_memory.c | 6 - + mm/hugetlb.c | 2 + mm/internal.h | 4 + 4 files changed, 135 insertions(+), 128 deletions(-) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -97,95 +97,6 @@ retry: + return folio; + } + +-/** +- * try_grab_folio() - Attempt to get or pin a folio. +- * @page: pointer to page to be grabbed +- * @refs: the value to (effectively) add to the folio's refcount +- * @flags: gup flags: these are the FOLL_* flag values. +- * +- * "grab" names in this file mean, "look at flags to decide whether to use +- * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. +- * +- * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the +- * same time. (That's true throughout the get_user_pages*() and +- * pin_user_pages*() APIs.) Cases: +- * +- * FOLL_GET: folio's refcount will be incremented by @refs. +- * +- * FOLL_PIN on large folios: folio's refcount will be incremented by +- * @refs, and its pincount will be incremented by @refs. +- * +- * FOLL_PIN on single-page folios: folio's refcount will be incremented by +- * @refs * GUP_PIN_COUNTING_BIAS. +- * +- * Return: The folio containing @page (with refcount appropriately +- * incremented) for success, or NULL upon failure. If neither FOLL_GET +- * nor FOLL_PIN was set, that's considered failure, and furthermore, +- * a likely bug in the caller, so a warning is also emitted. +- */ +-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) +-{ +- struct folio *folio; +- +- if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) +- return NULL; +- +- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) +- return NULL; +- +- if (flags & FOLL_GET) +- return try_get_folio(page, refs); +- +- /* FOLL_PIN is set */ +- +- /* +- * Don't take a pin on the zero page - it's not going anywhere +- * and it is used in a *lot* of places. +- */ +- if (is_zero_page(page)) +- return page_folio(page); +- +- folio = try_get_folio(page, refs); +- if (!folio) +- return NULL; +- +- /* +- * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a +- * right zone, so fail and let the caller fall back to the slow +- * path. +- */ +- if (unlikely((flags & FOLL_LONGTERM) && +- !folio_is_longterm_pinnable(folio))) { +- if (!put_devmap_managed_page_refs(&folio->page, refs)) +- folio_put_refs(folio, refs); +- return NULL; +- } +- +- /* +- * When pinning a large folio, use an exact count to track it. +- * +- * However, be sure to *also* increment the normal folio +- * refcount field at least once, so that the folio really +- * is pinned. That's why the refcount from the earlier +- * try_get_folio() is left intact. +- */ +- if (folio_test_large(folio)) +- atomic_add(refs, &folio->_pincount); +- else +- folio_ref_add(folio, +- refs * (GUP_PIN_COUNTING_BIAS - 1)); +- /* +- * Adjust the pincount before re-checking the PTE for changes. +- * This is essentially a smp_mb() and is paired with a memory +- * barrier in page_try_share_anon_rmap(). +- */ +- smp_mb__after_atomic(); +- +- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); +- +- return folio; +-} +- + static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) + { + if (flags & FOLL_PIN) { +@@ -203,58 +114,59 @@ static void gup_put_folio(struct folio * + } + + /** +- * try_grab_page() - elevate a page's refcount by a flag-dependent amount +- * @page: pointer to page to be grabbed +- * @flags: gup flags: these are the FOLL_* flag values. ++ * try_grab_folio() - add a folio's refcount by a flag-dependent amount ++ * @folio: pointer to folio to be grabbed ++ * @refs: the value to (effectively) add to the folio's refcount ++ * @flags: gup flags: these are the FOLL_* flag values + * + * This might not do anything at all, depending on the flags argument. + * + * "grab" names in this file mean, "look at flags to decide whether to use +- * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount. ++ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. + * + * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same +- * time. Cases: please see the try_grab_folio() documentation, with +- * "refs=1". ++ * time. + * + * Return: 0 for success, or if no action was required (if neither FOLL_PIN + * nor FOLL_GET was set, nothing is done). A negative error code for failure: + * +- * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not ++ * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not + * be grabbed. ++ * ++ * It is called when we have a stable reference for the folio, typically in ++ * GUP slow path. + */ +-int __must_check try_grab_page(struct page *page, unsigned int flags) ++int __must_check try_grab_folio(struct folio *folio, int refs, ++ unsigned int flags) + { +- struct folio *folio = page_folio(page); +- + if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) + return -ENOMEM; + +- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) ++ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page))) + return -EREMOTEIO; + + if (flags & FOLL_GET) +- folio_ref_inc(folio); ++ folio_ref_add(folio, refs); + else if (flags & FOLL_PIN) { + /* + * Don't take a pin on the zero page - it's not going anywhere + * and it is used in a *lot* of places. + */ +- if (is_zero_page(page)) ++ if (is_zero_folio(folio)) + return 0; + + /* +- * Similar to try_grab_folio(): be sure to *also* +- * increment the normal page refcount field at least once, ++ * Increment the normal page refcount field at least once, + * so that the page really is pinned. + */ + if (folio_test_large(folio)) { +- folio_ref_add(folio, 1); +- atomic_add(1, &folio->_pincount); ++ folio_ref_add(folio, refs); ++ atomic_add(refs, &folio->_pincount); + } else { +- folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); ++ folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS); + } + +- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); ++ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); + } + + return 0; +@@ -647,8 +559,8 @@ static struct page *follow_page_pte(stru + VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && + !PageAnonExclusive(page), page); + +- /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ +- ret = try_grab_page(page, flags); ++ /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */ ++ ret = try_grab_folio(page_folio(page), 1, flags); + if (unlikely(ret)) { + page = ERR_PTR(ret); + goto out; +@@ -899,7 +811,7 @@ static int get_gate_page(struct mm_struc + goto unmap; + *page = pte_page(entry); + } +- ret = try_grab_page(*page, gup_flags); ++ ret = try_grab_folio(page_folio(*page), 1, gup_flags); + if (unlikely(ret)) + goto unmap; + out: +@@ -1302,20 +1214,19 @@ next_page: + * pages. + */ + if (page_increm > 1) { +- struct folio *folio; ++ struct folio *folio = page_folio(page); + + /* + * Since we already hold refcount on the + * large folio, this should never fail. + */ +- folio = try_grab_folio(page, page_increm - 1, +- foll_flags); +- if (WARN_ON_ONCE(!folio)) { ++ if (try_grab_folio(folio, page_increm - 1, ++ foll_flags)) { + /* + * Release the 1st page ref if the + * folio is problematic, fail hard. + */ +- gup_put_folio(page_folio(page), 1, ++ gup_put_folio(folio, 1, + foll_flags); + ret = -EFAULT; + goto out; +@@ -2541,6 +2452,102 @@ static void __maybe_unused undo_dev_page + } + } + ++/** ++ * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. ++ * @page: pointer to page to be grabbed ++ * @refs: the value to (effectively) add to the folio's refcount ++ * @flags: gup flags: these are the FOLL_* flag values. ++ * ++ * "grab" names in this file mean, "look at flags to decide whether to use ++ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. ++ * ++ * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the ++ * same time. (That's true throughout the get_user_pages*() and ++ * pin_user_pages*() APIs.) Cases: ++ * ++ * FOLL_GET: folio's refcount will be incremented by @refs. ++ * ++ * FOLL_PIN on large folios: folio's refcount will be incremented by ++ * @refs, and its pincount will be incremented by @refs. ++ * ++ * FOLL_PIN on single-page folios: folio's refcount will be incremented by ++ * @refs * GUP_PIN_COUNTING_BIAS. ++ * ++ * Return: The folio containing @page (with refcount appropriately ++ * incremented) for success, or NULL upon failure. If neither FOLL_GET ++ * nor FOLL_PIN was set, that's considered failure, and furthermore, ++ * a likely bug in the caller, so a warning is also emitted. ++ * ++ * It uses add ref unless zero to elevate the folio refcount and must be called ++ * in fast path only. ++ */ ++static struct folio *try_grab_folio_fast(struct page *page, int refs, ++ unsigned int flags) ++{ ++ struct folio *folio; ++ ++ /* Raise warn if it is not called in fast GUP */ ++ VM_WARN_ON_ONCE(!irqs_disabled()); ++ ++ if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) ++ return NULL; ++ ++ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) ++ return NULL; ++ ++ if (flags & FOLL_GET) ++ return try_get_folio(page, refs); ++ ++ /* FOLL_PIN is set */ ++ ++ /* ++ * Don't take a pin on the zero page - it's not going anywhere ++ * and it is used in a *lot* of places. ++ */ ++ if (is_zero_page(page)) ++ return page_folio(page); ++ ++ folio = try_get_folio(page, refs); ++ if (!folio) ++ return NULL; ++ ++ /* ++ * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a ++ * right zone, so fail and let the caller fall back to the slow ++ * path. ++ */ ++ if (unlikely((flags & FOLL_LONGTERM) && ++ !folio_is_longterm_pinnable(folio))) { ++ if (!put_devmap_managed_page_refs(&folio->page, refs)) ++ folio_put_refs(folio, refs); ++ return NULL; ++ } ++ ++ /* ++ * When pinning a large folio, use an exact count to track it. ++ * ++ * However, be sure to *also* increment the normal folio ++ * refcount field at least once, so that the folio really ++ * is pinned. That's why the refcount from the earlier ++ * try_get_folio() is left intact. ++ */ ++ if (folio_test_large(folio)) ++ atomic_add(refs, &folio->_pincount); ++ else ++ folio_ref_add(folio, ++ refs * (GUP_PIN_COUNTING_BIAS - 1)); ++ /* ++ * Adjust the pincount before re-checking the PTE for changes. ++ * This is essentially a smp_mb() and is paired with a memory ++ * barrier in folio_try_share_anon_rmap_*(). ++ */ ++ smp_mb__after_atomic(); ++ ++ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); ++ ++ return folio; ++} ++ + #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL + /* + * Fast-gup relies on pte change detection to avoid concurrent pgtable +@@ -2605,7 +2612,7 @@ static int gup_pte_range(pmd_t pmd, pmd_ + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + +- folio = try_grab_folio(page, 1, flags); ++ folio = try_grab_folio_fast(page, 1, flags); + if (!folio) + goto pte_unmap; + +@@ -2699,7 +2706,7 @@ static int __gup_device_huge(unsigned lo + + SetPageReferenced(page); + pages[*nr] = page; +- if (unlikely(try_grab_page(page, flags))) { ++ if (unlikely(try_grab_folio(page_folio(page), 1, flags))) { + undo_dev_pagemap(nr, nr_start, flags, pages); + break; + } +@@ -2808,7 +2815,7 @@ static int gup_hugepte(pte_t *ptep, unsi + page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT); + refs = record_subpages(page, addr, end, pages + *nr); + +- folio = try_grab_folio(page, refs, flags); ++ folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + +@@ -2879,7 +2886,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_ + page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT); + refs = record_subpages(page, addr, end, pages + *nr); + +- folio = try_grab_folio(page, refs, flags); ++ folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + +@@ -2923,7 +2930,7 @@ static int gup_huge_pud(pud_t orig, pud_ + page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT); + refs = record_subpages(page, addr, end, pages + *nr); + +- folio = try_grab_folio(page, refs, flags); ++ folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + +@@ -2963,7 +2970,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_ + page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT); + refs = record_subpages(page, addr, end, pages + *nr); + +- folio = try_grab_folio(page, refs, flags); ++ folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1056,7 +1056,7 @@ struct page *follow_devmap_pmd(struct vm + if (!*pgmap) + return ERR_PTR(-EFAULT); + page = pfn_to_page(pfn); +- ret = try_grab_page(page, flags); ++ ret = try_grab_folio(page_folio(page), 1, flags); + if (ret) + page = ERR_PTR(ret); + +@@ -1214,7 +1214,7 @@ struct page *follow_devmap_pud(struct vm + return ERR_PTR(-EFAULT); + page = pfn_to_page(pfn); + +- ret = try_grab_page(page, flags); ++ ret = try_grab_folio(page_folio(page), 1, flags); + if (ret) + page = ERR_PTR(ret); + +@@ -1475,7 +1475,7 @@ struct page *follow_trans_huge_pmd(struc + VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && + !PageAnonExclusive(page), page); + +- ret = try_grab_page(page, flags); ++ ret = try_grab_folio(page_folio(page), 1, flags); + if (ret) + return ERR_PTR(ret); + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -6532,7 +6532,7 @@ struct page *hugetlb_follow_page_mask(st + * try_grab_page() should always be able to get the page here, + * because we hold the ptl lock and have verified pte_present(). + */ +- ret = try_grab_page(page, flags); ++ ret = try_grab_folio(page_folio(page), 1, flags); + + if (WARN_ON_ONCE(ret)) { + page = ERR_PTR(ret); +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -938,8 +938,8 @@ int migrate_device_coherent_page(struct + /* + * mm/gup.c + */ +-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags); +-int __must_check try_grab_page(struct page *page, unsigned int flags); ++int __must_check try_grab_folio(struct folio *folio, int refs, ++ unsigned int flags); + + /* + * mm/huge_memory.c diff --git a/queue-6.6/nfsd-expose-proc-net-sunrpc-nfsd-in-net-namespaces.patch b/queue-6.6/nfsd-expose-proc-net-sunrpc-nfsd-in-net-namespaces.patch new file mode 100644 index 00000000000..3a857cfd3a5 --- /dev/null +++ b/queue-6.6/nfsd-expose-proc-net-sunrpc-nfsd-in-net-namespaces.patch @@ -0,0 +1,136 @@ +From stable+bounces-67401-greg=kroah.com@vger.kernel.org Tue Aug 13 00:38:33 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:36:01 -0400 +Subject: nfsd: expose /proc/net/sunrpc/nfsd in net namespaces +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Josef Bacik , Jeff Layton +Message-ID: <20240812223604.32592-10-cel@kernel.org> + +From: Josef Bacik + +[ Upstream commit 93483ac5fec62cc1de166051b219d953bb5e4ef4 ] + +We are running nfsd servers inside of containers with their own network +namespace, and we want to monitor these services using the stats found +in /proc. However these are not exposed in the proc inside of the +container, so we have to bind mount the host /proc into our containers +to get at this information. + +Separate out the stat counters init and the proc registration, and move +the proc registration into the pernet operations entry and exit points +so that these stats can be exposed inside of network namespaces. + +This is an intermediate step, this just exposes the global counters in +the network namespace. Subsequent patches will move these counters into +the per-network namespace container. + +Signed-off-by: Josef Bacik +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfsctl.c | 8 +++++--- + fs/nfsd/stats.c | 21 ++++++--------------- + fs/nfsd/stats.h | 6 ++++-- + 3 files changed, 15 insertions(+), 20 deletions(-) + +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -1532,6 +1532,7 @@ static __net_init int nfsd_net_init(stru + nfsd4_init_leases_net(nn); + get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); + seqlock_init(&nn->writeverf_lock); ++ nfsd_proc_stat_init(net); + + return 0; + +@@ -1552,6 +1553,7 @@ static __net_exit void nfsd_net_exit(str + { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + ++ nfsd_proc_stat_shutdown(net); + nfsd_net_reply_cache_destroy(nn); + nfsd_idmap_shutdown(net); + nfsd_export_shutdown(net); +@@ -1575,7 +1577,7 @@ static int __init init_nfsd(void) + retval = nfsd4_init_pnfs(); + if (retval) + goto out_free_slabs; +- retval = nfsd_stat_init(); /* Statistics */ ++ retval = nfsd_stat_counters_init(); /* Statistics */ + if (retval) + goto out_free_pnfs; + retval = nfsd_drc_slab_create(); +@@ -1611,7 +1613,7 @@ out_free_lockd: + nfsd_lockd_shutdown(); + nfsd_drc_slab_free(); + out_free_stat: +- nfsd_stat_shutdown(); ++ nfsd_stat_counters_destroy(); + out_free_pnfs: + nfsd4_exit_pnfs(); + out_free_slabs: +@@ -1628,7 +1630,7 @@ static void __exit exit_nfsd(void) + nfsd_drc_slab_free(); + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); +- nfsd_stat_shutdown(); ++ nfsd_stat_counters_destroy(); + nfsd_lockd_shutdown(); + nfsd4_free_slabs(); + nfsd4_exit_pnfs(); +--- a/fs/nfsd/stats.c ++++ b/fs/nfsd/stats.c +@@ -108,31 +108,22 @@ void nfsd_percpu_counters_destroy(struct + percpu_counter_destroy(&counters[i]); + } + +-static int nfsd_stat_counters_init(void) ++int nfsd_stat_counters_init(void) + { + return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); + } + +-static void nfsd_stat_counters_destroy(void) ++void nfsd_stat_counters_destroy(void) + { + nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); + } + +-int nfsd_stat_init(void) ++void nfsd_proc_stat_init(struct net *net) + { +- int err; +- +- err = nfsd_stat_counters_init(); +- if (err) +- return err; +- +- svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops); +- +- return 0; ++ svc_proc_register(net, &nfsd_svcstats, &nfsd_proc_ops); + } + +-void nfsd_stat_shutdown(void) ++void nfsd_proc_stat_shutdown(struct net *net) + { +- nfsd_stat_counters_destroy(); +- svc_proc_unregister(&init_net, "nfsd"); ++ svc_proc_unregister(net, "nfsd"); + } +--- a/fs/nfsd/stats.h ++++ b/fs/nfsd/stats.h +@@ -40,8 +40,10 @@ extern struct svc_stat nfsd_svcstats; + int nfsd_percpu_counters_init(struct percpu_counter *counters, int num); + void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num); + void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num); +-int nfsd_stat_init(void); +-void nfsd_stat_shutdown(void); ++int nfsd_stat_counters_init(void); ++void nfsd_stat_counters_destroy(void); ++void nfsd_proc_stat_init(struct net *net); ++void nfsd_proc_stat_shutdown(struct net *net); + + static inline void nfsd_stats_rc_hits_inc(void) + { diff --git a/queue-6.6/nfsd-fix-frame-size-warning-in-svc_export_parse.patch b/queue-6.6/nfsd-fix-frame-size-warning-in-svc_export_parse.patch new file mode 100644 index 00000000000..723c0aa93e7 --- /dev/null +++ b/queue-6.6/nfsd-fix-frame-size-warning-in-svc_export_parse.patch @@ -0,0 +1,167 @@ +From stable+bounces-67394-greg=kroah.com@vger.kernel.org Tue Aug 13 00:36:40 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:35:54 -0400 +Subject: NFSD: Fix frame size warning in svc_export_parse() +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Chuck Lever , kernel test robot , Amir Goldstein , Jeff Layton +Message-ID: <20240812223604.32592-3-cel@kernel.org> + +From: Chuck Lever + +[ Upstream commit 6939ace1f22681fface7841cdbf34d3204cc94b5 ] + +fs/nfsd/export.c: In function 'svc_export_parse': +fs/nfsd/export.c:737:1: warning: the frame size of 1040 bytes is larger than 1024 bytes [-Wframe-larger-than=] + 737 | } + +On my systems, svc_export_parse() has a stack frame of over 800 +bytes, not 1040, but nonetheless, it could do with some reduction. + +When a struct svc_export is on the stack, it's a temporary structure +used as an argument, and not visible as an actual exported FS. No +need to reserve space for export_stats in such cases. + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202310012359.YEw5IrK6-lkp@intel.com/ +Cc: Amir Goldstein +Reviewed-by: Jeff Layton +Stable-dep-of: 4b14885411f7 ("nfsd: make all of the nfsd stats per-network namespace") +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/export.c | 32 +++++++++++++++++++++++--------- + fs/nfsd/export.h | 4 ++-- + fs/nfsd/stats.h | 12 ++++++------ + 3 files changed, 31 insertions(+), 17 deletions(-) + +--- a/fs/nfsd/export.c ++++ b/fs/nfsd/export.c +@@ -339,12 +339,16 @@ static int export_stats_init(struct expo + + static void export_stats_reset(struct export_stats *stats) + { +- nfsd_percpu_counters_reset(stats->counter, EXP_STATS_COUNTERS_NUM); ++ if (stats) ++ nfsd_percpu_counters_reset(stats->counter, ++ EXP_STATS_COUNTERS_NUM); + } + + static void export_stats_destroy(struct export_stats *stats) + { +- nfsd_percpu_counters_destroy(stats->counter, EXP_STATS_COUNTERS_NUM); ++ if (stats) ++ nfsd_percpu_counters_destroy(stats->counter, ++ EXP_STATS_COUNTERS_NUM); + } + + static void svc_export_put(struct kref *ref) +@@ -353,7 +357,8 @@ static void svc_export_put(struct kref * + path_put(&exp->ex_path); + auth_domain_put(exp->ex_client); + nfsd4_fslocs_free(&exp->ex_fslocs); +- export_stats_destroy(&exp->ex_stats); ++ export_stats_destroy(exp->ex_stats); ++ kfree(exp->ex_stats); + kfree(exp->ex_uuid); + kfree_rcu(exp, ex_rcu); + } +@@ -767,13 +772,15 @@ static int svc_export_show(struct seq_fi + seq_putc(m, '\t'); + seq_escape(m, exp->ex_client->name, " \t\n\\"); + if (export_stats) { +- seq_printf(m, "\t%lld\n", exp->ex_stats.start_time); ++ struct percpu_counter *counter = exp->ex_stats->counter; ++ ++ seq_printf(m, "\t%lld\n", exp->ex_stats->start_time); + seq_printf(m, "\tfh_stale: %lld\n", +- percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_FH_STALE])); ++ percpu_counter_sum_positive(&counter[EXP_STATS_FH_STALE])); + seq_printf(m, "\tio_read: %lld\n", +- percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_READ])); ++ percpu_counter_sum_positive(&counter[EXP_STATS_IO_READ])); + seq_printf(m, "\tio_write: %lld\n", +- percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_WRITE])); ++ percpu_counter_sum_positive(&counter[EXP_STATS_IO_WRITE])); + seq_putc(m, '\n'); + return 0; + } +@@ -819,7 +826,7 @@ static void svc_export_init(struct cache + new->ex_layout_types = 0; + new->ex_uuid = NULL; + new->cd = item->cd; +- export_stats_reset(&new->ex_stats); ++ export_stats_reset(new->ex_stats); + } + + static void export_update(struct cache_head *cnew, struct cache_head *citem) +@@ -856,7 +863,14 @@ static struct cache_head *svc_export_all + if (!i) + return NULL; + +- if (export_stats_init(&i->ex_stats)) { ++ i->ex_stats = kmalloc(sizeof(*(i->ex_stats)), GFP_KERNEL); ++ if (!i->ex_stats) { ++ kfree(i); ++ return NULL; ++ } ++ ++ if (export_stats_init(i->ex_stats)) { ++ kfree(i->ex_stats); + kfree(i); + return NULL; + } +--- a/fs/nfsd/export.h ++++ b/fs/nfsd/export.h +@@ -64,10 +64,10 @@ struct svc_export { + struct cache_head h; + struct auth_domain * ex_client; + int ex_flags; ++ int ex_fsid; + struct path ex_path; + kuid_t ex_anon_uid; + kgid_t ex_anon_gid; +- int ex_fsid; + unsigned char * ex_uuid; /* 16 byte fsid */ + struct nfsd4_fs_locations ex_fslocs; + uint32_t ex_nflavors; +@@ -76,8 +76,8 @@ struct svc_export { + struct nfsd4_deviceid_map *ex_devid_map; + struct cache_detail *cd; + struct rcu_head ex_rcu; +- struct export_stats ex_stats; + unsigned long ex_xprtsec_modes; ++ struct export_stats *ex_stats; + }; + + /* an "export key" (expkey) maps a filehandlefragement to an +--- a/fs/nfsd/stats.h ++++ b/fs/nfsd/stats.h +@@ -61,22 +61,22 @@ static inline void nfsd_stats_rc_nocache + static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp) + { + percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]); +- if (exp) +- percpu_counter_inc(&exp->ex_stats.counter[EXP_STATS_FH_STALE]); ++ if (exp && exp->ex_stats) ++ percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]); + } + + static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount) + { + percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount); +- if (exp) +- percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_READ], amount); ++ if (exp && exp->ex_stats) ++ percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount); + } + + static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount) + { + percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount); +- if (exp) +- percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_WRITE], amount); ++ if (exp && exp->ex_stats) ++ percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount); + } + + static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn) diff --git a/queue-6.6/nfsd-make-all-of-the-nfsd-stats-per-network-namespace.patch b/queue-6.6/nfsd-make-all-of-the-nfsd-stats-per-network-namespace.patch new file mode 100644 index 00000000000..0bcbc717fa8 --- /dev/null +++ b/queue-6.6/nfsd-make-all-of-the-nfsd-stats-per-network-namespace.patch @@ -0,0 +1,472 @@ +From stable+bounces-67402-greg=kroah.com@vger.kernel.org Tue Aug 13 00:38:33 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:36:02 -0400 +Subject: nfsd: make all of the nfsd stats per-network namespace +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Josef Bacik , Jeff Layton +Message-ID: <20240812223604.32592-11-cel@kernel.org> + +From: Josef Bacik + +[ Upstream commit 4b14885411f74b2b0ce0eb2b39d0fffe54e5ca0d ] + +We have a global set of counters that we modify for all of the nfsd +operations, but now that we're exposing these stats across all network +namespaces we need to make the stats also be per-network namespace. We +already have some caching stats that are per-network namespace, so move +these definitions into the same counter and then adjust all the helpers +and users of these stats to provide the appropriate nfsd_net struct so +that the stats are maintained for the per-network namespace objects. + +Signed-off-by: Josef Bacik +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/cache.h | 2 - + fs/nfsd/netns.h | 17 ++++++++++++++-- + fs/nfsd/nfs4proc.c | 6 ++--- + fs/nfsd/nfs4state.c | 3 +- + fs/nfsd/nfscache.c | 36 ++++++---------------------------- + fs/nfsd/nfsctl.c | 12 ++--------- + fs/nfsd/nfsfh.c | 3 +- + fs/nfsd/stats.c | 26 +++++++++++++------------ + fs/nfsd/stats.h | 54 ++++++++++++++++++---------------------------------- + fs/nfsd/vfs.c | 6 +++-- + 10 files changed, 69 insertions(+), 96 deletions(-) + +--- a/fs/nfsd/cache.h ++++ b/fs/nfsd/cache.h +@@ -80,8 +80,6 @@ enum { + + int nfsd_drc_slab_create(void); + void nfsd_drc_slab_free(void); +-int nfsd_net_reply_cache_init(struct nfsd_net *nn); +-void nfsd_net_reply_cache_destroy(struct nfsd_net *nn); + int nfsd_reply_cache_init(struct nfsd_net *); + void nfsd_reply_cache_shutdown(struct nfsd_net *); + int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, +--- a/fs/nfsd/netns.h ++++ b/fs/nfsd/netns.h +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -29,7 +30,19 @@ enum { + NFSD_STATS_PAYLOAD_MISSES, + /* amount of memory (in bytes) currently consumed by the DRC */ + NFSD_STATS_DRC_MEM_USAGE, +- NFSD_NET_COUNTERS_NUM ++ NFSD_STATS_RC_HITS, /* repcache hits */ ++ NFSD_STATS_RC_MISSES, /* repcache misses */ ++ NFSD_STATS_RC_NOCACHE, /* uncached reqs */ ++ NFSD_STATS_FH_STALE, /* FH stale error */ ++ NFSD_STATS_IO_READ, /* bytes returned to read requests */ ++ NFSD_STATS_IO_WRITE, /* bytes passed in write requests */ ++#ifdef CONFIG_NFSD_V4 ++ NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */ ++ NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP, ++#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op)) ++ NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */ ++#endif ++ NFSD_STATS_COUNTERS_NUM + }; + + /* +@@ -169,7 +182,7 @@ struct nfsd_net { + atomic_t num_drc_entries; + + /* Per-netns stats counters */ +- struct percpu_counter counter[NFSD_NET_COUNTERS_NUM]; ++ struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; + + /* longest hash chain seen */ + unsigned int longest_chain; +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2478,10 +2478,10 @@ nfsd4_proc_null(struct svc_rqst *rqstp) + return rpc_success; + } + +-static inline void nfsd4_increment_op_stats(u32 opnum) ++static inline void nfsd4_increment_op_stats(struct nfsd_net *nn, u32 opnum) + { + if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_NFS4_OP(opnum)]); + } + + static const struct nfsd4_operation nfsd4_ops[]; +@@ -2756,7 +2756,7 @@ encode_op: + status, nfsd4_op_name(op->opnum)); + + nfsd4_cstate_clear_replay(cstate); +- nfsd4_increment_op_stats(op->opnum); ++ nfsd4_increment_op_stats(nn, op->opnum); + } + + fh_put(current_fh); +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -8422,6 +8422,7 @@ __be32 + nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode) + { + __be32 status; ++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct file_lock_context *ctx; + struct file_lock *fl; + struct nfs4_delegation *dp; +@@ -8451,7 +8452,7 @@ nfsd4_deleg_getattr_conflict(struct svc_ + } + break_lease: + spin_unlock(&ctx->flc_lock); +- nfsd_stats_wdeleg_getattr_inc(); ++ nfsd_stats_wdeleg_getattr_inc(nn); + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); + if (status != nfserr_jukebox || + !nfsd_wait_for_delegreturn(rqstp, inode)) +--- a/fs/nfsd/nfscache.c ++++ b/fs/nfsd/nfscache.c +@@ -176,27 +176,6 @@ void nfsd_drc_slab_free(void) + kmem_cache_destroy(drc_slab); + } + +-/** +- * nfsd_net_reply_cache_init - per net namespace reply cache set-up +- * @nn: nfsd_net being initialized +- * +- * Returns zero on succes; otherwise a negative errno is returned. +- */ +-int nfsd_net_reply_cache_init(struct nfsd_net *nn) +-{ +- return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM); +-} +- +-/** +- * nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down +- * @nn: nfsd_net being freed +- * +- */ +-void nfsd_net_reply_cache_destroy(struct nfsd_net *nn) +-{ +- nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM); +-} +- + int nfsd_reply_cache_init(struct nfsd_net *nn) + { + unsigned int hashsize; +@@ -502,7 +481,7 @@ out: + int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, + unsigned int len, struct nfsd_cacherep **cacherep) + { +- struct nfsd_net *nn; ++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfsd_cacherep *rp, *found; + __wsum csum; + struct nfsd_drc_bucket *b; +@@ -512,7 +491,7 @@ int nfsd_cache_lookup(struct svc_rqst *r + int rtn = RC_DOIT; + + if (type == RC_NOCACHE) { +- nfsd_stats_rc_nocache_inc(); ++ nfsd_stats_rc_nocache_inc(nn); + goto out; + } + +@@ -522,7 +501,6 @@ int nfsd_cache_lookup(struct svc_rqst *r + * Since the common case is a cache miss followed by an insert, + * preallocate an entry. + */ +- nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + rp = nfsd_cacherep_alloc(rqstp, csum, nn); + if (!rp) + goto out; +@@ -540,7 +518,7 @@ int nfsd_cache_lookup(struct svc_rqst *r + freed = nfsd_cacherep_dispose(&dispose); + trace_nfsd_drc_gc(nn, freed); + +- nfsd_stats_rc_misses_inc(); ++ nfsd_stats_rc_misses_inc(nn); + atomic_inc(&nn->num_drc_entries); + nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp)); + goto out; +@@ -548,7 +526,7 @@ int nfsd_cache_lookup(struct svc_rqst *r + found_entry: + /* We found a matching entry which is either in progress or done. */ + nfsd_reply_cache_free_locked(NULL, rp, nn); +- nfsd_stats_rc_hits_inc(); ++ nfsd_stats_rc_hits_inc(nn); + rtn = RC_DROPIT; + rp = found; + +@@ -692,11 +670,11 @@ int nfsd_reply_cache_stats_show(struct s + seq_printf(m, "mem usage: %lld\n", + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_DRC_MEM_USAGE])); + seq_printf(m, "cache hits: %lld\n", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS])); + seq_printf(m, "cache misses: %lld\n", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES])); + seq_printf(m, "not cached: %lld\n", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE])); + seq_printf(m, "payload misses: %lld\n", + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_PAYLOAD_MISSES])); + seq_printf(m, "longest chain len: %u\n", nn->longest_chain); +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -1524,7 +1524,7 @@ static __net_init int nfsd_net_init(stru + retval = nfsd_idmap_init(net); + if (retval) + goto out_idmap_error; +- retval = nfsd_net_reply_cache_init(nn); ++ retval = nfsd_stat_counters_init(nn); + if (retval) + goto out_repcache_error; + nn->nfsd_versions = NULL; +@@ -1554,7 +1554,7 @@ static __net_exit void nfsd_net_exit(str + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfsd_proc_stat_shutdown(net); +- nfsd_net_reply_cache_destroy(nn); ++ nfsd_stat_counters_destroy(nn); + nfsd_idmap_shutdown(net); + nfsd_export_shutdown(net); + nfsd_netns_free_versions(nn); +@@ -1577,12 +1577,9 @@ static int __init init_nfsd(void) + retval = nfsd4_init_pnfs(); + if (retval) + goto out_free_slabs; +- retval = nfsd_stat_counters_init(); /* Statistics */ +- if (retval) +- goto out_free_pnfs; + retval = nfsd_drc_slab_create(); + if (retval) +- goto out_free_stat; ++ goto out_free_pnfs; + nfsd_lockd_init(); /* lockd->nfsd callbacks */ + retval = create_proc_exports_entry(); + if (retval) +@@ -1612,8 +1609,6 @@ out_free_exports: + out_free_lockd: + nfsd_lockd_shutdown(); + nfsd_drc_slab_free(); +-out_free_stat: +- nfsd_stat_counters_destroy(); + out_free_pnfs: + nfsd4_exit_pnfs(); + out_free_slabs: +@@ -1630,7 +1625,6 @@ static void __exit exit_nfsd(void) + nfsd_drc_slab_free(); + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); +- nfsd_stat_counters_destroy(); + nfsd_lockd_shutdown(); + nfsd4_free_slabs(); + nfsd4_exit_pnfs(); +--- a/fs/nfsd/nfsfh.c ++++ b/fs/nfsd/nfsfh.c +@@ -327,6 +327,7 @@ out: + __be32 + fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) + { ++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct svc_export *exp = NULL; + struct dentry *dentry; + __be32 error; +@@ -395,7 +396,7 @@ skip_pseudoflavor_check: + out: + trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); + if (error == nfserr_stale) +- nfsd_stats_fh_stale_inc(exp); ++ nfsd_stats_fh_stale_inc(nn, exp); + return error; + } + +--- a/fs/nfsd/stats.c ++++ b/fs/nfsd/stats.c +@@ -34,15 +34,17 @@ struct svc_stat nfsd_svcstats = { + + static int nfsd_show(struct seq_file *seq, void *v) + { ++ struct net *net = pde_data(file_inode(seq->file)); ++ struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]), +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]), +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]), +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]), +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]), +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]), ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]), ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]), ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_FH_STALE]), ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_READ]), ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE])); + + /* thread usage: */ + seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt)); +@@ -63,10 +65,10 @@ static int nfsd_show(struct seq_file *se + seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1); + for (i = 0; i <= LAST_NFS4_OP; i++) { + seq_printf(seq, " %lld", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)])); + } + seq_printf(seq, "\nwdeleg_getattr %lld", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_WDELEG_GETATTR])); + + seq_putc(seq, '\n'); + #endif +@@ -108,14 +110,14 @@ void nfsd_percpu_counters_destroy(struct + percpu_counter_destroy(&counters[i]); + } + +-int nfsd_stat_counters_init(void) ++int nfsd_stat_counters_init(struct nfsd_net *nn) + { +- return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); ++ return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM); + } + +-void nfsd_stat_counters_destroy(void) ++void nfsd_stat_counters_destroy(struct nfsd_net *nn) + { +- nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); ++ nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM); + } + + void nfsd_proc_stat_init(struct net *net) +--- a/fs/nfsd/stats.h ++++ b/fs/nfsd/stats.h +@@ -10,26 +10,7 @@ + #include + #include + +- +-enum { +- NFSD_STATS_RC_HITS, /* repcache hits */ +- NFSD_STATS_RC_MISSES, /* repcache misses */ +- NFSD_STATS_RC_NOCACHE, /* uncached reqs */ +- NFSD_STATS_FH_STALE, /* FH stale error */ +- NFSD_STATS_IO_READ, /* bytes returned to read requests */ +- NFSD_STATS_IO_WRITE, /* bytes passed in write requests */ +-#ifdef CONFIG_NFSD_V4 +- NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */ +- NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP, +-#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op)) +- NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */ +-#endif +- NFSD_STATS_COUNTERS_NUM +-}; +- + struct nfsd_stats { +- struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; +- + atomic_t th_cnt; /* number of available threads */ + }; + +@@ -40,43 +21,46 @@ extern struct svc_stat nfsd_svcstats; + int nfsd_percpu_counters_init(struct percpu_counter *counters, int num); + void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num); + void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num); +-int nfsd_stat_counters_init(void); +-void nfsd_stat_counters_destroy(void); ++int nfsd_stat_counters_init(struct nfsd_net *nn); ++void nfsd_stat_counters_destroy(struct nfsd_net *nn); + void nfsd_proc_stat_init(struct net *net); + void nfsd_proc_stat_shutdown(struct net *net); + +-static inline void nfsd_stats_rc_hits_inc(void) ++static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn) + { +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_HITS]); + } + +-static inline void nfsd_stats_rc_misses_inc(void) ++static inline void nfsd_stats_rc_misses_inc(struct nfsd_net *nn) + { +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_MISSES]); + } + +-static inline void nfsd_stats_rc_nocache_inc(void) ++static inline void nfsd_stats_rc_nocache_inc(struct nfsd_net *nn) + { +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_NOCACHE]); + } + +-static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp) ++static inline void nfsd_stats_fh_stale_inc(struct nfsd_net *nn, ++ struct svc_export *exp) + { +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_FH_STALE]); + if (exp && exp->ex_stats) + percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]); + } + +-static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount) ++static inline void nfsd_stats_io_read_add(struct nfsd_net *nn, ++ struct svc_export *exp, s64 amount) + { +- percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount); ++ percpu_counter_add(&nn->counter[NFSD_STATS_IO_READ], amount); + if (exp && exp->ex_stats) + percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount); + } + +-static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount) ++static inline void nfsd_stats_io_write_add(struct nfsd_net *nn, ++ struct svc_export *exp, s64 amount) + { +- percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount); ++ percpu_counter_add(&nn->counter[NFSD_STATS_IO_WRITE], amount); + if (exp && exp->ex_stats) + percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount); + } +@@ -97,9 +81,9 @@ static inline void nfsd_stats_drc_mem_us + } + + #ifdef CONFIG_NFSD_V4 +-static inline void nfsd_stats_wdeleg_getattr_inc(void) ++static inline void nfsd_stats_wdeleg_getattr_inc(struct nfsd_net *nn) + { +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_WDELEG_GETATTR]); + } + #endif + #endif /* _NFSD_STATS_H */ +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -985,7 +985,9 @@ static __be32 nfsd_finish_read(struct sv + unsigned long *count, u32 *eof, ssize_t host_err) + { + if (host_err >= 0) { +- nfsd_stats_io_read_add(fhp->fh_export, host_err); ++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); ++ ++ nfsd_stats_io_read_add(nn, fhp->fh_export, host_err); + *eof = nfsd_eof_on_read(file, offset, host_err, *count); + *count = host_err; + fsnotify_access(file); +@@ -1168,7 +1170,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, s + goto out_nfserr; + } + *cnt = host_err; +- nfsd_stats_io_write_add(exp, *cnt); ++ nfsd_stats_io_write_add(nn, exp, *cnt); + fsnotify_modify(file); + host_err = filemap_check_wb_err(file->f_mapping, since); + if (host_err < 0) diff --git a/queue-6.6/nfsd-make-svc_stat-per-network-namespace-instead-of-global.patch b/queue-6.6/nfsd-make-svc_stat-per-network-namespace-instead-of-global.patch new file mode 100644 index 00000000000..a00ed6098f8 --- /dev/null +++ b/queue-6.6/nfsd-make-svc_stat-per-network-namespace-instead-of-global.patch @@ -0,0 +1,114 @@ +From stable+bounces-67404-greg=kroah.com@vger.kernel.org Tue Aug 13 00:38:38 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:36:04 -0400 +Subject: nfsd: make svc_stat per-network namespace instead of global +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Josef Bacik , Jeff Layton +Message-ID: <20240812223604.32592-13-cel@kernel.org> + +From: Josef Bacik + +[ Upstream commit 16fb9808ab2c99979f081987752abcbc5b092eac ] + +The final bit of stats that is global is the rpc svc_stat. Move this +into the nfsd_net struct and use that everywhere instead of the global +struct. Remove the unused global struct. + +Signed-off-by: Josef Bacik +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/netns.h | 4 ++++ + fs/nfsd/nfsctl.c | 2 ++ + fs/nfsd/nfssvc.c | 2 +- + fs/nfsd/stats.c | 10 ++++------ + fs/nfsd/stats.h | 2 -- + 5 files changed, 11 insertions(+), 9 deletions(-) + +--- a/fs/nfsd/netns.h ++++ b/fs/nfsd/netns.h +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + /* Hash tables for nfs4_clientid state */ + #define CLIENT_HASH_BITS 4 +@@ -184,6 +185,9 @@ struct nfsd_net { + /* Per-netns stats counters */ + struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; + ++ /* sunrpc svc stats */ ++ struct svc_stat nfsd_svcstats; ++ + /* longest hash chain seen */ + unsigned int longest_chain; + +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -1527,6 +1527,8 @@ static __net_init int nfsd_net_init(stru + retval = nfsd_stat_counters_init(nn); + if (retval) + goto out_repcache_error; ++ memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats)); ++ nn->nfsd_svcstats.program = &nfsd_program; + nn->nfsd_versions = NULL; + nn->nfsd4_minorversions = NULL; + nfsd4_init_leases_net(nn); +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -670,7 +670,7 @@ int nfsd_create_serv(struct net *net) + if (nfsd_max_blksize == 0) + nfsd_max_blksize = nfsd_get_default_max_blksize(); + nfsd_reset_versions(nn); +- serv = svc_create_pooled(&nfsd_program, &nfsd_svcstats, ++ serv = svc_create_pooled(&nfsd_program, &nn->nfsd_svcstats, + nfsd_max_blksize, nfsd); + if (serv == NULL) + return -ENOMEM; +--- a/fs/nfsd/stats.c ++++ b/fs/nfsd/stats.c +@@ -27,10 +27,6 @@ + + #include "nfsd.h" + +-struct svc_stat nfsd_svcstats = { +- .program = &nfsd_program, +-}; +- + static int nfsd_show(struct seq_file *seq, void *v) + { + struct net *net = pde_data(file_inode(seq->file)); +@@ -56,7 +52,7 @@ static int nfsd_show(struct seq_file *se + seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n"); + + /* show my rpc info */ +- svc_seq_show(seq, &nfsd_svcstats); ++ svc_seq_show(seq, &nn->nfsd_svcstats); + + #ifdef CONFIG_NFSD_V4 + /* Show count for individual nfsv4 operations */ +@@ -121,7 +117,9 @@ void nfsd_stat_counters_destroy(struct n + + void nfsd_proc_stat_init(struct net *net) + { +- svc_proc_register(net, &nfsd_svcstats, &nfsd_proc_ops); ++ struct nfsd_net *nn = net_generic(net, nfsd_net_id); ++ ++ svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); + } + + void nfsd_proc_stat_shutdown(struct net *net) +--- a/fs/nfsd/stats.h ++++ b/fs/nfsd/stats.h +@@ -10,8 +10,6 @@ + #include + #include + +-extern struct svc_stat nfsd_svcstats; +- + int nfsd_percpu_counters_init(struct percpu_counter *counters, int num); + void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num); + void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num); diff --git a/queue-6.6/nfsd-remove-nfsd_stats-make-th_cnt-a-global-counter.patch b/queue-6.6/nfsd-remove-nfsd_stats-make-th_cnt-a-global-counter.patch new file mode 100644 index 00000000000..939725615bc --- /dev/null +++ b/queue-6.6/nfsd-remove-nfsd_stats-make-th_cnt-a-global-counter.patch @@ -0,0 +1,98 @@ +From stable+bounces-67403-greg=kroah.com@vger.kernel.org Tue Aug 13 00:38:35 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:36:03 -0400 +Subject: nfsd: remove nfsd_stats, make th_cnt a global counter +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Josef Bacik , Jeff Layton +Message-ID: <20240812223604.32592-12-cel@kernel.org> + +From: Josef Bacik + +[ Upstream commit e41ee44cc6a473b1f414031782c3b4283d7f3e5f ] + +This is the last global stat, take it out of the nfsd_stats struct and +make it a global part of nfsd, report it the same as always. + +Signed-off-by: Josef Bacik +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfsd.h | 1 + + fs/nfsd/nfssvc.c | 5 +++-- + fs/nfsd/stats.c | 3 +-- + fs/nfsd/stats.h | 6 ------ + 4 files changed, 5 insertions(+), 10 deletions(-) + +--- a/fs/nfsd/nfsd.h ++++ b/fs/nfsd/nfsd.h +@@ -69,6 +69,7 @@ extern struct mutex nfsd_mutex; + extern spinlock_t nfsd_drc_lock; + extern unsigned long nfsd_drc_max_mem; + extern unsigned long nfsd_drc_mem_used; ++extern atomic_t nfsd_th_cnt; /* number of available threads */ + + extern const struct seq_operations nfs_exports_op; + +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -34,6 +34,7 @@ + + #define NFSDDBG_FACILITY NFSDDBG_SVC + ++atomic_t nfsd_th_cnt = ATOMIC_INIT(0); + extern struct svc_program nfsd_program; + static int nfsd(void *vrqstp); + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +@@ -945,7 +946,7 @@ nfsd(void *vrqstp) + + current->fs->umask = 0; + +- atomic_inc(&nfsdstats.th_cnt); ++ atomic_inc(&nfsd_th_cnt); + + set_freezable(); + +@@ -959,7 +960,7 @@ nfsd(void *vrqstp) + svc_recv(rqstp); + } + +- atomic_dec(&nfsdstats.th_cnt); ++ atomic_dec(&nfsd_th_cnt); + + out: + /* Release the thread */ +--- a/fs/nfsd/stats.c ++++ b/fs/nfsd/stats.c +@@ -27,7 +27,6 @@ + + #include "nfsd.h" + +-struct nfsd_stats nfsdstats; + struct svc_stat nfsd_svcstats = { + .program = &nfsd_program, + }; +@@ -47,7 +46,7 @@ static int nfsd_show(struct seq_file *se + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE])); + + /* thread usage: */ +- seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt)); ++ seq_printf(seq, "th %u 0", atomic_read(&nfsd_th_cnt)); + + /* deprecated thread usage histogram stats */ + for (i = 0; i < 10; i++) +--- a/fs/nfsd/stats.h ++++ b/fs/nfsd/stats.h +@@ -10,12 +10,6 @@ + #include + #include + +-struct nfsd_stats { +- atomic_t th_cnt; /* number of available threads */ +-}; +- +-extern struct nfsd_stats nfsdstats; +- + extern struct svc_stat nfsd_svcstats; + + int nfsd_percpu_counters_init(struct percpu_counter *counters, int num); diff --git a/queue-6.6/nfsd-rename-nfsd_net_-to-nfsd_stats_.patch b/queue-6.6/nfsd-rename-nfsd_net_-to-nfsd_stats_.patch new file mode 100644 index 00000000000..b2fbc93f2bf --- /dev/null +++ b/queue-6.6/nfsd-rename-nfsd_net_-to-nfsd_stats_.patch @@ -0,0 +1,83 @@ +From stable+bounces-67400-greg=kroah.com@vger.kernel.org Tue Aug 13 00:38:28 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:36:00 -0400 +Subject: nfsd: rename NFSD_NET_* to NFSD_STATS_* +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Josef Bacik , Jeff Layton +Message-ID: <20240812223604.32592-9-cel@kernel.org> + +From: Josef Bacik + +[ Upstream commit d98416cc2154053950610bb6880911e3dcbdf8c5 ] + +We're going to merge the stats all into per network namespace in +subsequent patches, rename these nn counters to be consistent with the +rest of the stats. + +Signed-off-by: Josef Bacik +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/netns.h | 4 ++-- + fs/nfsd/nfscache.c | 4 ++-- + fs/nfsd/stats.h | 6 +++--- + 3 files changed, 7 insertions(+), 7 deletions(-) + +--- a/fs/nfsd/netns.h ++++ b/fs/nfsd/netns.h +@@ -26,9 +26,9 @@ struct nfsd4_client_tracking_ops; + + enum { + /* cache misses due only to checksum comparison failures */ +- NFSD_NET_PAYLOAD_MISSES, ++ NFSD_STATS_PAYLOAD_MISSES, + /* amount of memory (in bytes) currently consumed by the DRC */ +- NFSD_NET_DRC_MEM_USAGE, ++ NFSD_STATS_DRC_MEM_USAGE, + NFSD_NET_COUNTERS_NUM + }; + +--- a/fs/nfsd/nfscache.c ++++ b/fs/nfsd/nfscache.c +@@ -690,7 +690,7 @@ int nfsd_reply_cache_stats_show(struct s + atomic_read(&nn->num_drc_entries)); + seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); + seq_printf(m, "mem usage: %lld\n", +- percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_DRC_MEM_USAGE])); + seq_printf(m, "cache hits: %lld\n", + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS])); + seq_printf(m, "cache misses: %lld\n", +@@ -698,7 +698,7 @@ int nfsd_reply_cache_stats_show(struct s + seq_printf(m, "not cached: %lld\n", + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE])); + seq_printf(m, "payload misses: %lld\n", +- percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_PAYLOAD_MISSES])); + seq_printf(m, "longest chain len: %u\n", nn->longest_chain); + seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); + return 0; +--- a/fs/nfsd/stats.h ++++ b/fs/nfsd/stats.h +@@ -81,17 +81,17 @@ static inline void nfsd_stats_io_write_a + + static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn) + { +- percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]); + } + + static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount) + { +- percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); ++ percpu_counter_add(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount); + } + + static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount) + { +- percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); ++ percpu_counter_sub(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount); + } + + #ifdef CONFIG_NFSD_V4 diff --git a/queue-6.6/nfsd-rewrite-synopsis-of-nfsd_percpu_counters_init.patch b/queue-6.6/nfsd-rewrite-synopsis-of-nfsd_percpu_counters_init.patch new file mode 100644 index 00000000000..490e1c63aae --- /dev/null +++ b/queue-6.6/nfsd-rewrite-synopsis-of-nfsd_percpu_counters_init.patch @@ -0,0 +1,59 @@ +From stable+bounces-67393-greg=kroah.com@vger.kernel.org Tue Aug 13 00:36:34 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:35:53 -0400 +Subject: NFSD: Rewrite synopsis of nfsd_percpu_counters_init() +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Chuck Lever , Amir Goldstein , Jeff Layton +Message-ID: <20240812223604.32592-2-cel@kernel.org> + +From: Chuck Lever + +[ Upstream commit 5ec39944f874e1ecc09f624a70dfaa8ac3bf9d08 ] + +In function ‘export_stats_init’, + inlined from ‘svc_export_alloc’ at fs/nfsd/export.c:866:6: +fs/nfsd/export.c:337:16: warning: ‘nfsd_percpu_counters_init’ accessing 40 bytes in a region of size 0 [-Wstringop-overflow=] + 337 | return nfsd_percpu_counters_init(&stats->counter, EXP_STATS_COUNTERS_NUM); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +fs/nfsd/export.c:337:16: note: referencing argument 1 of type ‘struct percpu_counter[0]’ +fs/nfsd/stats.h: In function ‘svc_export_alloc’: +fs/nfsd/stats.h:40:5: note: in a call to function ‘nfsd_percpu_counters_init’ + 40 | int nfsd_percpu_counters_init(struct percpu_counter counters[], int num); + | ^~~~~~~~~~~~~~~~~~~~~~~~~ + +Cc: Amir Goldstein +Reviewed-by: Jeff Layton +Stable-dep-of: 93483ac5fec6 ("nfsd: expose /proc/net/sunrpc/nfsd in net namespaces") +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/stats.c | 2 +- + fs/nfsd/stats.h | 6 +++--- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/fs/nfsd/stats.c ++++ b/fs/nfsd/stats.c +@@ -76,7 +76,7 @@ static int nfsd_show(struct seq_file *se + + DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); + +-int nfsd_percpu_counters_init(struct percpu_counter counters[], int num) ++int nfsd_percpu_counters_init(struct percpu_counter *counters, int num) + { + int i, err = 0; + +--- a/fs/nfsd/stats.h ++++ b/fs/nfsd/stats.h +@@ -37,9 +37,9 @@ extern struct nfsd_stats nfsdstats; + + extern struct svc_stat nfsd_svcstats; + +-int nfsd_percpu_counters_init(struct percpu_counter counters[], int num); +-void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num); +-void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num); ++int nfsd_percpu_counters_init(struct percpu_counter *counters, int num); ++void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num); ++void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num); + int nfsd_stat_init(void); + void nfsd_stat_shutdown(void); + diff --git a/queue-6.6/nfsd-stop-setting-pg_stats-for-unused-stats.patch b/queue-6.6/nfsd-stop-setting-pg_stats-for-unused-stats.patch new file mode 100644 index 00000000000..7a0d8bf42f0 --- /dev/null +++ b/queue-6.6/nfsd-stop-setting-pg_stats-for-unused-stats.patch @@ -0,0 +1,89 @@ +From stable+bounces-67396-greg=kroah.com@vger.kernel.org Tue Aug 13 00:37:08 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:35:56 -0400 +Subject: nfsd: stop setting ->pg_stats for unused stats +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Josef Bacik , Jeff Layton +Message-ID: <20240812223604.32592-5-cel@kernel.org> + +From: Josef Bacik + +[ Upstream commit a2214ed588fb3c5b9824a21cff870482510372bb ] + +A lot of places are setting a blank svc_stats in ->pg_stats and never +utilizing these stats. Remove all of these extra structs as we're not +reporting these stats anywhere. + +Signed-off-by: Josef Bacik +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/lockd/svc.c | 3 --- + fs/nfs/callback.c | 3 --- + fs/nfsd/nfssvc.c | 5 ----- + 3 files changed, 11 deletions(-) + +--- a/fs/lockd/svc.c ++++ b/fs/lockd/svc.c +@@ -712,8 +712,6 @@ static const struct svc_version *nlmsvc_ + #endif + }; + +-static struct svc_stat nlmsvc_stats; +- + #define NLM_NRVERS ARRAY_SIZE(nlmsvc_version) + static struct svc_program nlmsvc_program = { + .pg_prog = NLM_PROGRAM, /* program number */ +@@ -721,7 +719,6 @@ static struct svc_program nlmsvc_program + .pg_vers = nlmsvc_version, /* version table */ + .pg_name = "lockd", /* service name */ + .pg_class = "nfsd", /* share authentication with nfsd */ +- .pg_stats = &nlmsvc_stats, /* stats table */ + .pg_authenticate = &lockd_authenticate, /* export authentication */ + .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, +--- a/fs/nfs/callback.c ++++ b/fs/nfs/callback.c +@@ -399,15 +399,12 @@ static const struct svc_version *nfs4_ca + [4] = &nfs4_callback_version4, + }; + +-static struct svc_stat nfs4_callback_stats; +- + static struct svc_program nfs4_callback_program = { + .pg_prog = NFS4_CALLBACK, /* RPC service number */ + .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */ + .pg_vers = nfs4_callback_version, /* version table */ + .pg_name = "NFSv4 callback", /* service name */ + .pg_class = "nfs", /* authentication class */ +- .pg_stats = &nfs4_callback_stats, + .pg_authenticate = nfs_callback_authenticate, + .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -89,7 +89,6 @@ unsigned long nfsd_drc_max_mem; + unsigned long nfsd_drc_mem_used; + + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +-static struct svc_stat nfsd_acl_svcstats; + static const struct svc_version *nfsd_acl_version[] = { + # if defined(CONFIG_NFSD_V2_ACL) + [2] = &nfsd_acl_version2, +@@ -108,15 +107,11 @@ static struct svc_program nfsd_acl_progr + .pg_vers = nfsd_acl_version, + .pg_name = "nfsacl", + .pg_class = "nfsd", +- .pg_stats = &nfsd_acl_svcstats, + .pg_authenticate = &svc_set_client, + .pg_init_request = nfsd_acl_init_request, + .pg_rpcbind_set = nfsd_acl_rpcbind_set, + }; + +-static struct svc_stat nfsd_acl_svcstats = { +- .program = &nfsd_acl_program, +-}; + #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ + + static const struct svc_version *nfsd_version[] = { diff --git a/queue-6.6/nvme-pci-add-apst-quirk-for-lenovo-n60z-laptop.patch b/queue-6.6/nvme-pci-add-apst-quirk-for-lenovo-n60z-laptop.patch new file mode 100644 index 00000000000..f974ceab266 --- /dev/null +++ b/queue-6.6/nvme-pci-add-apst-quirk-for-lenovo-n60z-laptop.patch @@ -0,0 +1,39 @@ +From ab091ec536cb7b271983c0c063b17f62f3591583 Mon Sep 17 00:00:00 2001 +From: WangYuli +Date: Mon, 15 Jul 2024 17:31:44 +0800 +Subject: nvme/pci: Add APST quirk for Lenovo N60z laptop + +From: WangYuli + +commit ab091ec536cb7b271983c0c063b17f62f3591583 upstream. + +There is a hardware power-saving problem with the Lenovo N60z +board. When turn it on and leave it for 10 hours, there is a +20% chance that a nvme disk will not wake up until reboot. + +Link: https://lore.kernel.org/all/2B5581C46AC6E335+9c7a81f1-05fb-4fd0-9fbb-108757c21628@uniontech.com +Signed-off-by: hmy +Signed-off-by: Wentao Guan +Signed-off-by: WangYuli +Signed-off-by: Keith Busch +Signed-off-by: Greg Kroah-Hartman +--- + drivers/nvme/host/pci.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -2931,6 +2931,13 @@ static unsigned long check_vendor_combin + return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND; + } + ++ /* ++ * NVMe SSD drops off the PCIe bus after system idle ++ * for 10 hours on a Lenovo N60z board. ++ */ ++ if (dmi_match(DMI_BOARD_NAME, "LXKT-ZXEG-N6")) ++ return NVME_QUIRK_NO_APST; ++ + return 0; + } + diff --git a/queue-6.6/series b/queue-6.6/series index f1c51061243..ed9afca4c0c 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -1,3 +1,20 @@ exec-fix-toctou-between-perm-check-and-set-uid-gid-usage.patch asoc-topology-clean-up-route-loading.patch asoc-topology-fix-route-memory-corruption.patch +loongarch-define-__arch_want_new_stat-in-unistd.h.patch +nfsd-rewrite-synopsis-of-nfsd_percpu_counters_init.patch +nfsd-fix-frame-size-warning-in-svc_export_parse.patch +sunrpc-don-t-change-sv_stats-if-it-doesn-t-exist.patch +nfsd-stop-setting-pg_stats-for-unused-stats.patch +sunrpc-pass-in-the-sv_stats-struct-through-svc_create_pooled.patch +sunrpc-remove-pg_stats-from-svc_program.patch +sunrpc-use-the-struct-net-as-the-svc-proc-private.patch +nfsd-rename-nfsd_net_-to-nfsd_stats_.patch +nfsd-expose-proc-net-sunrpc-nfsd-in-net-namespaces.patch +nfsd-make-all-of-the-nfsd-stats-per-network-namespace.patch +nfsd-remove-nfsd_stats-make-th_cnt-a-global-counter.patch +nfsd-make-svc_stat-per-network-namespace-instead-of-global.patch +mm-gup-stop-abusing-try_grab_folio.patch +nvme-pci-add-apst-quirk-for-lenovo-n60z-laptop.patch +genirq-cpuhotplug-skip-suspended-interrupts-when-restoring-affinity.patch +genirq-cpuhotplug-retry-with-cpu_online_mask-when-migration-fails.patch diff --git a/queue-6.6/sunrpc-don-t-change-sv_stats-if-it-doesn-t-exist.patch b/queue-6.6/sunrpc-don-t-change-sv_stats-if-it-doesn-t-exist.patch new file mode 100644 index 00000000000..ee5f43595ba --- /dev/null +++ b/queue-6.6/sunrpc-don-t-change-sv_stats-if-it-doesn-t-exist.patch @@ -0,0 +1,115 @@ +From stable+bounces-67395-greg=kroah.com@vger.kernel.org Tue Aug 13 00:36:48 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:35:55 -0400 +Subject: sunrpc: don't change ->sv_stats if it doesn't exist +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Josef Bacik , Jeff Layton +Message-ID: <20240812223604.32592-4-cel@kernel.org> + +From: Josef Bacik + +[ Upstream commit ab42f4d9a26f1723dcfd6c93fcf768032b2bb5e7 ] + +We check for the existence of ->sv_stats elsewhere except in the core +processing code. It appears that only nfsd actual exports these values +anywhere, everybody else just has a write only copy of sv_stats in their +svc_program. Add a check for ->sv_stats before every adjustment to +allow us to eliminate the stats struct from all the users who don't +report the stats. + +Signed-off-by: Josef Bacik +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + net/sunrpc/svc.c | 27 ++++++++++++++++++--------- + 1 file changed, 18 insertions(+), 9 deletions(-) + +--- a/net/sunrpc/svc.c ++++ b/net/sunrpc/svc.c +@@ -1377,7 +1377,8 @@ svc_process_common(struct svc_rqst *rqst + goto err_bad_proc; + + /* Syntactic check complete */ +- serv->sv_stats->rpccnt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpccnt++; + trace_svc_process(rqstp, progp->pg_name); + + aoffset = xdr_stream_pos(xdr); +@@ -1429,7 +1430,8 @@ err_short_len: + goto close_xprt; + + err_bad_rpc: +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + xdr_stream_encode_u32(xdr, RPC_MSG_DENIED); + xdr_stream_encode_u32(xdr, RPC_MISMATCH); + /* Only RPCv2 supported */ +@@ -1440,7 +1442,8 @@ err_bad_rpc: + err_bad_auth: + dprintk("svc: authentication failed (%d)\n", + be32_to_cpu(rqstp->rq_auth_stat)); +- serv->sv_stats->rpcbadauth++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadauth++; + /* Restore write pointer to location of reply status: */ + xdr_truncate_encode(xdr, XDR_UNIT * 2); + xdr_stream_encode_u32(xdr, RPC_MSG_DENIED); +@@ -1450,7 +1453,8 @@ err_bad_auth: + + err_bad_prog: + dprintk("svc: unknown program %d\n", rqstp->rq_prog); +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_prog_unavail; + goto sendit; + +@@ -1458,7 +1462,8 @@ err_bad_vers: + svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n", + rqstp->rq_vers, rqstp->rq_prog, progp->pg_name); + +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_prog_mismatch; + + /* +@@ -1472,19 +1477,22 @@ err_bad_vers: + err_bad_proc: + svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc); + +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_proc_unavail; + goto sendit; + + err_garbage_args: + svc_printk(rqstp, "failed to decode RPC header\n"); + +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_garbage_args; + goto sendit; + + err_system_err: +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_system_err; + goto sendit; + } +@@ -1536,7 +1544,8 @@ void svc_process(struct svc_rqst *rqstp) + out_baddir: + svc_printk(rqstp, "bad direction 0x%08x, dropping request\n", + be32_to_cpu(*p)); +- rqstp->rq_server->sv_stats->rpcbadfmt++; ++ if (rqstp->rq_server->sv_stats) ++ rqstp->rq_server->sv_stats->rpcbadfmt++; + out_drop: + svc_drop(rqstp); + } diff --git a/queue-6.6/sunrpc-pass-in-the-sv_stats-struct-through-svc_create_pooled.patch b/queue-6.6/sunrpc-pass-in-the-sv_stats-struct-through-svc_create_pooled.patch new file mode 100644 index 00000000000..3d631cd8ade --- /dev/null +++ b/queue-6.6/sunrpc-pass-in-the-sv_stats-struct-through-svc_create_pooled.patch @@ -0,0 +1,107 @@ +From stable+bounces-67397-greg=kroah.com@vger.kernel.org Tue Aug 13 00:37:12 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:35:57 -0400 +Subject: sunrpc: pass in the sv_stats struct through svc_create_pooled +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Josef Bacik , Jeff Layton +Message-ID: <20240812223604.32592-6-cel@kernel.org> + +From: Josef Bacik + +[ Upstream commit f094323867668d50124886ad884b665de7319537 ] + +Since only one service actually reports the rpc stats there's not much +of a reason to have a pointer to it in the svc_program struct. Adjust +the svc_create_pooled function to take the sv_stats as an argument and +pass the struct through there as desired instead of getting it from the +svc_program->pg_stats. + +Signed-off-by: Josef Bacik +Reviewed-by: Jeff Layton +[ cel: adjusted to apply to v6.6.y ] +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfssvc.c | 3 ++- + include/linux/sunrpc/svc.h | 4 +++- + net/sunrpc/svc.c | 12 +++++++----- + 3 files changed, 12 insertions(+), 7 deletions(-) + +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -670,7 +670,8 @@ int nfsd_create_serv(struct net *net) + if (nfsd_max_blksize == 0) + nfsd_max_blksize = nfsd_get_default_max_blksize(); + nfsd_reset_versions(nn); +- serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd); ++ serv = svc_create_pooled(&nfsd_program, &nfsd_svcstats, ++ nfsd_max_blksize, nfsd); + if (serv == NULL) + return -ENOMEM; + +--- a/include/linux/sunrpc/svc.h ++++ b/include/linux/sunrpc/svc.h +@@ -408,7 +408,9 @@ bool svc_rqst_replace_page(struct sv + void svc_rqst_release_pages(struct svc_rqst *rqstp); + void svc_rqst_free(struct svc_rqst *); + void svc_exit_thread(struct svc_rqst *); +-struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, ++struct svc_serv * svc_create_pooled(struct svc_program *prog, ++ struct svc_stat *stats, ++ unsigned int bufsize, + int (*threadfn)(void *data)); + int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); + int svc_pool_stats_open(struct svc_serv *serv, struct file *file); +--- a/net/sunrpc/svc.c ++++ b/net/sunrpc/svc.c +@@ -453,8 +453,8 @@ __svc_init_bc(struct svc_serv *serv) + * Create an RPC service + */ + static struct svc_serv * +-__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, +- int (*threadfn)(void *data)) ++__svc_create(struct svc_program *prog, struct svc_stat *stats, ++ unsigned int bufsize, int npools, int (*threadfn)(void *data)) + { + struct svc_serv *serv; + unsigned int vers; +@@ -466,7 +466,7 @@ __svc_create(struct svc_program *prog, u + serv->sv_name = prog->pg_name; + serv->sv_program = prog; + kref_init(&serv->sv_refcnt); +- serv->sv_stats = prog->pg_stats; ++ serv->sv_stats = stats; + if (bufsize > RPCSVC_MAXPAYLOAD) + bufsize = RPCSVC_MAXPAYLOAD; + serv->sv_max_payload = bufsize? bufsize : 4096; +@@ -532,26 +532,28 @@ __svc_create(struct svc_program *prog, u + struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize, + int (*threadfn)(void *data)) + { +- return __svc_create(prog, bufsize, 1, threadfn); ++ return __svc_create(prog, NULL, bufsize, 1, threadfn); + } + EXPORT_SYMBOL_GPL(svc_create); + + /** + * svc_create_pooled - Create an RPC service with pooled threads + * @prog: the RPC program the new service will handle ++ * @stats: the stats struct if desired + * @bufsize: maximum message size for @prog + * @threadfn: a function to service RPC requests for @prog + * + * Returns an instantiated struct svc_serv object or NULL. + */ + struct svc_serv *svc_create_pooled(struct svc_program *prog, ++ struct svc_stat *stats, + unsigned int bufsize, + int (*threadfn)(void *data)) + { + struct svc_serv *serv; + unsigned int npools = svc_pool_map_get(); + +- serv = __svc_create(prog, bufsize, npools, threadfn); ++ serv = __svc_create(prog, stats, bufsize, npools, threadfn); + if (!serv) + goto out_err; + return serv; diff --git a/queue-6.6/sunrpc-remove-pg_stats-from-svc_program.patch b/queue-6.6/sunrpc-remove-pg_stats-from-svc_program.patch new file mode 100644 index 00000000000..84a12b2361b --- /dev/null +++ b/queue-6.6/sunrpc-remove-pg_stats-from-svc_program.patch @@ -0,0 +1,43 @@ +From stable+bounces-67398-greg=kroah.com@vger.kernel.org Tue Aug 13 00:37:59 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:35:58 -0400 +Subject: sunrpc: remove ->pg_stats from svc_program +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Josef Bacik , Jeff Layton +Message-ID: <20240812223604.32592-7-cel@kernel.org> + +From: Josef Bacik + +[ Upstream commit 3f6ef182f144dcc9a4d942f97b6a8ed969f13c95 ] + +Now that this isn't used anywhere, remove it. + +Signed-off-by: Josef Bacik +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfssvc.c | 1 - + include/linux/sunrpc/svc.h | 1 - + 2 files changed, 2 deletions(-) + +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -136,7 +136,6 @@ struct svc_program nfsd_program = { + .pg_vers = nfsd_version, /* version table */ + .pg_name = "nfsd", /* program name */ + .pg_class = "nfsd", /* authentication class */ +- .pg_stats = &nfsd_svcstats, /* version table */ + .pg_authenticate = &svc_set_client, /* export authentication */ + .pg_init_request = nfsd_init_request, + .pg_rpcbind_set = nfsd_rpcbind_set, +--- a/include/linux/sunrpc/svc.h ++++ b/include/linux/sunrpc/svc.h +@@ -336,7 +336,6 @@ struct svc_program { + const struct svc_version **pg_vers; /* version array */ + char * pg_name; /* service name */ + char * pg_class; /* class name: services sharing authentication */ +- struct svc_stat * pg_stats; /* rpc statistics */ + enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); + __be32 (*pg_init_request)(struct svc_rqst *, + const struct svc_program *, diff --git a/queue-6.6/sunrpc-use-the-struct-net-as-the-svc-proc-private.patch b/queue-6.6/sunrpc-use-the-struct-net-as-the-svc-proc-private.patch new file mode 100644 index 00000000000..f19f77b7c46 --- /dev/null +++ b/queue-6.6/sunrpc-use-the-struct-net-as-the-svc-proc-private.patch @@ -0,0 +1,36 @@ +From stable+bounces-67399-greg=kroah.com@vger.kernel.org Tue Aug 13 00:38:26 2024 +From: cel@kernel.org +Date: Mon, 12 Aug 2024 18:35:59 -0400 +Subject: sunrpc: use the struct net as the svc proc private +To: +Cc: , pvorel@suse.cz, sherry.yang@oracle.com, calum.mackay@oracle.com, kernel-team@fb.com, Josef Bacik , Jeff Layton +Message-ID: <20240812223604.32592-8-cel@kernel.org> + +From: Josef Bacik + +[ Upstream commit 418b9687dece5bd763c09b5c27a801a7e3387be9 ] + +nfsd is the only thing using this helper, and it doesn't use the private +currently. When we switch to per-network namespace stats we will need +the struct net * in order to get to the nfsd_net. Use the net as the +proc private so we can utilize this when we make the switch over. + +Signed-off-by: Josef Bacik +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + net/sunrpc/stats.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sunrpc/stats.c ++++ b/net/sunrpc/stats.c +@@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(rpc_proc_unregister); + struct proc_dir_entry * + svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops) + { +- return do_register(net, statp->program->pg_name, statp, proc_ops); ++ return do_register(net, statp->program->pg_name, net, proc_ops); + } + EXPORT_SYMBOL_GPL(svc_proc_register); +