From: Greg Kroah-Hartman Date: Tue, 27 Aug 2024 13:17:26 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v6.1.107~21 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=dd14205812ee1de2823c2c927cf1c8b9ea8bb97e;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch bluetooth-hci_ldisc-check-hci_uart_proto_ready-flag-in-hciuartgetproto.patch drm-amdgpu-vcn-identify-unified-queue-in-sw-init.patch drm-amdgpu-vcn-not-pause-dpg-for-unified-queue.patch gso-fix-dodgy-bit-handling-for-gso_udp_l4.patch kvm-x86-fire-timer-when-it-is-migrated-and-expired-and-in-oneshot-mode.patch mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch net-drop-bad-gso-csum_start-and-offset-in-virtio_net_hdr.patch net-more-strict-virtio_net_hdr_gso_udp_l4-validation.patch nfsd-call-nfsd_last_thread-before-final-nfsd_put.patch nfsd-don-t-call-locks_release_private-twice-concurrently.patch nfsd-drop-the-nfsd_put-helper.patch nfsd-fix-a-regression-in-nfsd_setattr.patch nfsd-separate-nfsd_last_thread-from-nfsd_put.patch nfsd-simplify-code-around-svc_exit_thread-call-in-nfsd.patch nfsd-simplify-error-paths-in-nfsd_svc.patch revert-s390-dasd-establish-dma-alignment.patch udp-allow-header-check-for-dodgy-gso_udp_l4-packets.patch --- diff --git a/queue-6.1/alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch b/queue-6.1/alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch new file mode 100644 index 00000000000..8ca96780a47 --- /dev/null +++ b/queue-6.1/alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch @@ -0,0 +1,38 @@ +From ccbfcac05866ebe6eb3bc6d07b51d4ed4fcde436 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Sat, 10 Aug 2024 10:48:32 +0200 +Subject: ALSA: timer: Relax start tick time check for slave timer elements + +From: Takashi Iwai + +commit ccbfcac05866ebe6eb3bc6d07b51d4ed4fcde436 upstream. + +The recent addition of a sanity check for a too low start tick time +seems breaking some applications that uses aloop with a certain slave +timer setup. They may have the initial resolution 0, hence it's +treated as if it were a too low value. + +Relax and skip the check for the slave timer instance for addressing +the regression. + +Fixes: 4a63bd179fa8 ("ALSA: timer: Set lower bound of start tick time") +Cc: +Link: https://github.com/raspberrypi/linux/issues/6294 +Link: https://patch.msgid.link/20240810084833.10939-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/core/timer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/sound/core/timer.c ++++ b/sound/core/timer.c +@@ -556,7 +556,7 @@ static int snd_timer_start1(struct snd_t + /* check the actual time for the start tick; + * bail out as error if it's way too low (< 100us) + */ +- if (start) { ++ if (start && !(timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) { + if ((u64)snd_timer_hw_resolution(timer) * ticks < 100000) { + result = -EINVAL; + goto unlock; diff --git a/queue-6.1/bluetooth-hci_ldisc-check-hci_uart_proto_ready-flag-in-hciuartgetproto.patch b/queue-6.1/bluetooth-hci_ldisc-check-hci_uart_proto_ready-flag-in-hciuartgetproto.patch new file mode 100644 index 00000000000..ef5c312102f --- /dev/null +++ b/queue-6.1/bluetooth-hci_ldisc-check-hci_uart_proto_ready-flag-in-hciuartgetproto.patch @@ -0,0 +1,42 @@ +From 9c33663af9ad115f90c076a1828129a3fbadea98 Mon Sep 17 00:00:00 2001 +From: "Lee, Chun-Yi" +Date: Mon, 10 Jul 2023 23:17:23 +0800 +Subject: Bluetooth: hci_ldisc: check HCI_UART_PROTO_READY flag in HCIUARTGETPROTO + +From: Lee, Chun-Yi + +commit 9c33663af9ad115f90c076a1828129a3fbadea98 upstream. + +This patch adds code to check HCI_UART_PROTO_READY flag before +accessing hci_uart->proto. It fixes the race condition in +hci_uart_tty_ioctl() between HCIUARTSETPROTO and HCIUARTGETPROTO. +This issue bug found by Yu Hao and Weiteng Chen: + +BUG: general protection fault in hci_uart_tty_ioctl [1] + +The information of C reproducer can also reference the link [2] + +Reported-by: Yu Hao +Closes: https://lore.kernel.org/all/CA+UBctC3p49aTgzbVgkSZ2+TQcqq4fPDO7yZitFT5uBPDeCO2g@mail.gmail.com/ [1] +Reported-by: Weiteng Chen +Closes: https://lore.kernel.org/lkml/CA+UBctDPEvHdkHMwD340=n02rh+jNRJNNQ5LBZNA+Wm4Keh2ow@mail.gmail.com/T/ [2] +Signed-off-by: "Lee, Chun-Yi" +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Harshit Mogalapalli +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/hci_ldisc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/bluetooth/hci_ldisc.c ++++ b/drivers/bluetooth/hci_ldisc.c +@@ -770,7 +770,8 @@ static int hci_uart_tty_ioctl(struct tty + break; + + case HCIUARTGETPROTO: +- if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) ++ if (test_bit(HCI_UART_PROTO_SET, &hu->flags) && ++ test_bit(HCI_UART_PROTO_READY, &hu->flags)) + err = hu->proto->id; + else + err = -EUNATCH; diff --git a/queue-6.1/drm-amdgpu-vcn-identify-unified-queue-in-sw-init.patch b/queue-6.1/drm-amdgpu-vcn-identify-unified-queue-in-sw-init.patch new file mode 100644 index 00000000000..7bdf1f56b35 --- /dev/null +++ b/queue-6.1/drm-amdgpu-vcn-identify-unified-queue-in-sw-init.patch @@ -0,0 +1,171 @@ +From ecfa23c8df7ef3ea2a429dfe039341bf792e95b4 Mon Sep 17 00:00:00 2001 +From: Boyuan Zhang +Date: Thu, 11 Jul 2024 16:19:54 -0400 +Subject: drm/amdgpu/vcn: identify unified queue in sw init + +From: Boyuan Zhang + +commit ecfa23c8df7ef3ea2a429dfe039341bf792e95b4 upstream. + +Determine whether VCN using unified queue in sw_init, instead of calling +functions later on. + +v2: fix coding style + +Signed-off-by: Boyuan Zhang +Acked-by: Alex Deucher +Reviewed-by: Ruijing Dong +Signed-off-by: Alex Deucher +Signed-off-by: Mario Limonciello +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 39 ++++++++++++-------------------- + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 + 2 files changed, 16 insertions(+), 24 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +@@ -239,6 +239,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_dev + return r; + } + ++ /* from vcn4 and above, only unified queue is used */ ++ adev->vcn.using_unified_queue = ++ adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0); ++ + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); + +@@ -357,18 +361,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_dev + return 0; + } + +-/* from vcn4 and above, only unified queue is used */ +-static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) +-{ +- struct amdgpu_device *adev = ring->adev; +- bool ret = false; +- +- if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) +- ret = true; +- +- return ret; +-} +- + bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) + { + bool ret = false; +@@ -806,12 +798,11 @@ static int amdgpu_vcn_dec_sw_send_msg(st + struct amdgpu_job *job; + struct amdgpu_ib *ib; + uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); +- bool sq = amdgpu_vcn_using_unified_queue(ring); + uint32_t *ib_checksum; + uint32_t ib_pack_in_dw; + int i, r; + +- if (sq) ++ if (adev->vcn.using_unified_queue) + ib_size_dw += 8; + + r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, +@@ -823,7 +814,7 @@ static int amdgpu_vcn_dec_sw_send_msg(st + ib->length_dw = 0; + + /* single queue headers */ +- if (sq) { ++ if (adev->vcn.using_unified_queue) { + ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) + + 4 + 2; /* engine info + decoding ib in dw */ + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); +@@ -842,7 +833,7 @@ static int amdgpu_vcn_dec_sw_send_msg(st + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + +- if (sq) ++ if (adev->vcn.using_unified_queue) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); + + r = amdgpu_job_submit_direct(job, ring, &f); +@@ -932,15 +923,15 @@ static int amdgpu_vcn_enc_get_create_msg + struct dma_fence **fence) + { + unsigned int ib_size_dw = 16; ++ struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + uint32_t *ib_checksum = NULL; + uint64_t addr; +- bool sq = amdgpu_vcn_using_unified_queue(ring); + int i, r; + +- if (sq) ++ if (adev->vcn.using_unified_queue) + ib_size_dw += 8; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, +@@ -953,7 +944,7 @@ static int amdgpu_vcn_enc_get_create_msg + + ib->length_dw = 0; + +- if (sq) ++ if (adev->vcn.using_unified_queue) + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); + + ib->ptr[ib->length_dw++] = 0x00000018; +@@ -975,7 +966,7 @@ static int amdgpu_vcn_enc_get_create_msg + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + +- if (sq) ++ if (adev->vcn.using_unified_queue) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); + + r = amdgpu_job_submit_direct(job, ring, &f); +@@ -998,15 +989,15 @@ static int amdgpu_vcn_enc_get_destroy_ms + struct dma_fence **fence) + { + unsigned int ib_size_dw = 16; ++ struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + uint32_t *ib_checksum = NULL; + uint64_t addr; +- bool sq = amdgpu_vcn_using_unified_queue(ring); + int i, r; + +- if (sq) ++ if (adev->vcn.using_unified_queue) + ib_size_dw += 8; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, +@@ -1019,7 +1010,7 @@ static int amdgpu_vcn_enc_get_destroy_ms + + ib->length_dw = 0; + +- if (sq) ++ if (adev->vcn.using_unified_queue) + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); + + ib->ptr[ib->length_dw++] = 0x00000018; +@@ -1041,7 +1032,7 @@ static int amdgpu_vcn_enc_get_destroy_ms + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + +- if (sq) ++ if (adev->vcn.using_unified_queue) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); + + r = amdgpu_job_submit_direct(job, ring, &f); +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +@@ -271,6 +271,7 @@ struct amdgpu_vcn { + + struct ras_common_if *ras_if; + struct amdgpu_vcn_ras *ras; ++ bool using_unified_queue; + }; + + struct amdgpu_fw_shared_rb_ptrs_struct { diff --git a/queue-6.1/drm-amdgpu-vcn-not-pause-dpg-for-unified-queue.patch b/queue-6.1/drm-amdgpu-vcn-not-pause-dpg-for-unified-queue.patch new file mode 100644 index 00000000000..379d8c810aa --- /dev/null +++ b/queue-6.1/drm-amdgpu-vcn-not-pause-dpg-for-unified-queue.patch @@ -0,0 +1,65 @@ +From 7d75ef3736a025db441be652c8cc8e84044a215f Mon Sep 17 00:00:00 2001 +From: Boyuan Zhang +Date: Wed, 10 Jul 2024 16:17:12 -0400 +Subject: drm/amdgpu/vcn: not pause dpg for unified queue + +From: Boyuan Zhang + +commit 7d75ef3736a025db441be652c8cc8e84044a215f upstream. + +For unified queue, DPG pause for encoding is done inside VCN firmware, +so there is no need to pause dpg based on ring type in kernel. + +For VCN3 and below, pausing DPG for encoding in kernel is still needed. + +v2: add more comments +v3: update commit message + +Signed-off-by: Boyuan Zhang +Acked-by: Alex Deucher +Reviewed-by: Ruijing Dong +Signed-off-by: Alex Deucher +Signed-off-by: Mario Limonciello +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +@@ -472,7 +472,9 @@ static void amdgpu_vcn_idle_work_handler + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); + } + +- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { ++ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ ++ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && ++ !adev->vcn.using_unified_queue) { + struct dpg_pause_state new_state; + + if (fence[j] || +@@ -518,7 +520,9 @@ void amdgpu_vcn_ring_begin_use(struct am + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); + +- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { ++ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ ++ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && ++ !adev->vcn.using_unified_queue) { + struct dpg_pause_state new_state; + + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { +@@ -544,8 +548,12 @@ void amdgpu_vcn_ring_begin_use(struct am + + void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) + { ++ struct amdgpu_device *adev = ring->adev; ++ ++ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && +- ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) ++ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && ++ !adev->vcn.using_unified_queue) + atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); + + atomic_dec(&ring->adev->vcn.total_submission_cnt); diff --git a/queue-6.1/gso-fix-dodgy-bit-handling-for-gso_udp_l4.patch b/queue-6.1/gso-fix-dodgy-bit-handling-for-gso_udp_l4.patch new file mode 100644 index 00000000000..0b6f628b473 --- /dev/null +++ b/queue-6.1/gso-fix-dodgy-bit-handling-for-gso_udp_l4.patch @@ -0,0 +1,78 @@ +From 9840036786d90cea11a90d1f30b6dc003b34ee67 Mon Sep 17 00:00:00 2001 +From: Yan Zhai +Date: Thu, 13 Jul 2023 10:28:00 -0700 +Subject: gso: fix dodgy bit handling for GSO_UDP_L4 + +From: Yan Zhai + +commit 9840036786d90cea11a90d1f30b6dc003b34ee67 upstream. + +Commit 1fd54773c267 ("udp: allow header check for dodgy GSO_UDP_L4 +packets.") checks DODGY bit for UDP, but for packets that can be fed +directly to the device after gso_segs reset, it actually falls through +to fragmentation: + +https://lore.kernel.org/all/CAJPywTKDdjtwkLVUW6LRA2FU912qcDmQOQGt2WaDo28KzYDg+A@mail.gmail.com/ + +This change restores the expected behavior of GSO_UDP_L4 packets. + +Fixes: 1fd54773c267 ("udp: allow header check for dodgy GSO_UDP_L4 packets.") +Suggested-by: Willem de Bruijn +Signed-off-by: Yan Zhai +Reviewed-by: Willem de Bruijn +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp_offload.c | 16 +++++++++++----- + net/ipv6/udp_offload.c | 3 +-- + 2 files changed, 12 insertions(+), 7 deletions(-) + +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -273,13 +273,20 @@ struct sk_buff *__udp_gso_segment(struct + __sum16 check; + __be16 newlen; + +- if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) +- return __udp_gso_segment_list(gso_skb, features, is_ipv6); +- + mss = skb_shinfo(gso_skb)->gso_size; + if (gso_skb->len <= sizeof(*uh) + mss) + return ERR_PTR(-EINVAL); + ++ if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { ++ /* Packet is from an untrusted source, reset gso_segs. */ ++ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh), ++ mss); ++ return NULL; ++ } ++ ++ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) ++ return __udp_gso_segment_list(gso_skb, features, is_ipv6); ++ + skb_pull(gso_skb, sizeof(*uh)); + + /* clear destructor to avoid skb_segment assigning it to tail */ +@@ -387,8 +394,7 @@ static struct sk_buff *udp4_ufo_fragment + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + +- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && +- !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) ++ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + return __udp_gso_segment(skb, features, false); + + mss = skb_shinfo(skb)->gso_size; +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -42,8 +42,7 @@ static struct sk_buff *udp6_ufo_fragment + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + +- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && +- !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) ++ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + return __udp_gso_segment(skb, features, true); + + mss = skb_shinfo(skb)->gso_size; diff --git a/queue-6.1/kvm-x86-fire-timer-when-it-is-migrated-and-expired-and-in-oneshot-mode.patch b/queue-6.1/kvm-x86-fire-timer-when-it-is-migrated-and-expired-and-in-oneshot-mode.patch new file mode 100644 index 00000000000..f920fc05b64 --- /dev/null +++ b/queue-6.1/kvm-x86-fire-timer-when-it-is-migrated-and-expired-and-in-oneshot-mode.patch @@ -0,0 +1,44 @@ +From 8e6ed96cdd5001c55fccc80a17f651741c1ca7d2 Mon Sep 17 00:00:00 2001 +From: Li RongQing +Date: Fri, 6 Jan 2023 12:06:25 +0800 +Subject: KVM: x86: fire timer when it is migrated and expired, and in oneshot mode + +From: Li RongQing + +commit 8e6ed96cdd5001c55fccc80a17f651741c1ca7d2 upstream. + +when the vCPU was migrated, if its timer is expired, KVM _should_ fire +the timer ASAP, zeroing the deadline here will cause the timer to +immediately fire on the destination + +Cc: Sean Christopherson +Cc: Peter Shier +Cc: Jim Mattson +Cc: Wanpeng Li +Cc: Paolo Bonzini +Signed-off-by: Li RongQing +Link: https://lore.kernel.org/r/20230106040625.8404-1-lirongqing@baidu.com +Signed-off-by: Sean Christopherson +Signed-off-by: David Hunter +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/lapic.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -1843,8 +1843,12 @@ static bool set_target_expiration(struct + if (unlikely(count_reg != APIC_TMICT)) { + deadline = tmict_to_ns(apic, + kvm_lapic_get_reg(apic, count_reg)); +- if (unlikely(deadline <= 0)) +- deadline = apic->lapic_timer.period; ++ if (unlikely(deadline <= 0)) { ++ if (apic_lvtt_period(apic)) ++ deadline = apic->lapic_timer.period; ++ else ++ deadline = 0; ++ } + else if (unlikely(deadline > apic->lapic_timer.period)) { + pr_info_ratelimited( + "kvm: vcpu %i: requested lapic timer restore with " diff --git a/queue-6.1/mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch b/queue-6.1/mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch new file mode 100644 index 00000000000..7f9dabe9fc2 --- /dev/null +++ b/queue-6.1/mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch @@ -0,0 +1,94 @@ +From fd8c35a92910f4829b7c99841f39b1b952c259d5 Mon Sep 17 00:00:00 2001 +From: Zi Yan +Date: Fri, 9 Aug 2024 10:59:05 -0400 +Subject: mm/numa: no task_numa_fault() call if PMD is changed + +From: Zi Yan + +commit fd8c35a92910f4829b7c99841f39b1b952c259d5 upstream. + +When handling a numa page fault, task_numa_fault() should be called by a +process that restores the page table of the faulted folio to avoid +duplicated stats counting. Commit c5b5a3dd2c1f ("mm: thp: refactor NUMA +fault handling") restructured do_huge_pmd_numa_page() and did not avoid +task_numa_fault() call in the second page table check after a numa +migration failure. Fix it by making all !pmd_same() return immediately. + +This issue can cause task_numa_fault() being called more than necessary +and lead to unexpected numa balancing results (It is hard to tell whether +the issue will cause positive or negative performance impact due to +duplicated numa fault counting). + +Link: https://lkml.kernel.org/r/20240809145906.1513458-3-ziy@nvidia.com +Fixes: c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling") +Reported-by: "Huang, Ying" +Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/ +Signed-off-by: Zi Yan +Acked-by: David Hildenbrand +Cc: Baolin Wang +Cc: "Huang, Ying" +Cc: Kefeng Wang +Cc: Mel Gorman +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 30 +++++++++++++----------------- + 1 file changed, 13 insertions(+), 17 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1492,7 +1492,7 @@ vm_fault_t do_huge_pmd_numa_page(struct + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { + spin_unlock(vmf->ptl); +- goto out; ++ return 0; + } + + pmd = pmd_modify(oldpmd, vma->vm_page_prot); +@@ -1525,23 +1525,16 @@ vm_fault_t do_huge_pmd_numa_page(struct + if (migrated) { + flags |= TNF_MIGRATED; + page_nid = target_nid; +- } else { +- flags |= TNF_MIGRATE_FAIL; +- vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); +- if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { +- spin_unlock(vmf->ptl); +- goto out; +- } +- goto out_map; ++ task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags); ++ return 0; + } + +-out: +- if (page_nid != NUMA_NO_NODE) +- task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, +- flags); +- +- return 0; +- ++ flags |= TNF_MIGRATE_FAIL; ++ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); ++ if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { ++ spin_unlock(vmf->ptl); ++ return 0; ++ } + out_map: + /* Restore the PMD */ + pmd = pmd_modify(oldpmd, vma->vm_page_prot); +@@ -1551,7 +1544,10 @@ out_map: + set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); + update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); + spin_unlock(vmf->ptl); +- goto out; ++ ++ if (page_nid != NUMA_NO_NODE) ++ task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags); ++ return 0; + } + + /* diff --git a/queue-6.1/mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch b/queue-6.1/mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch new file mode 100644 index 00000000000..f2789e6530f --- /dev/null +++ b/queue-6.1/mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch @@ -0,0 +1,93 @@ +From 40b760cfd44566bca791c80e0720d70d75382b84 Mon Sep 17 00:00:00 2001 +From: Zi Yan +Date: Fri, 9 Aug 2024 10:59:04 -0400 +Subject: mm/numa: no task_numa_fault() call if PTE is changed + +From: Zi Yan + +commit 40b760cfd44566bca791c80e0720d70d75382b84 upstream. + +When handling a numa page fault, task_numa_fault() should be called by a +process that restores the page table of the faulted folio to avoid +duplicated stats counting. Commit b99a342d4f11 ("NUMA balancing: reduce +TLB flush via delaying mapping on hint page fault") restructured +do_numa_page() and did not avoid task_numa_fault() call in the second page +table check after a numa migration failure. Fix it by making all +!pte_same() return immediately. + +This issue can cause task_numa_fault() being called more than necessary +and lead to unexpected numa balancing results (It is hard to tell whether +the issue will cause positive or negative performance impact due to +duplicated numa fault counting). + +Link: https://lkml.kernel.org/r/20240809145906.1513458-2-ziy@nvidia.com +Fixes: b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault") +Signed-off-by: Zi Yan +Reported-by: "Huang, Ying" +Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.intel.com/ +Acked-by: David Hildenbrand +Cc: Baolin Wang +Cc: Kefeng Wang +Cc: Mel Gorman +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory.c | 29 ++++++++++++++--------------- + 1 file changed, 14 insertions(+), 15 deletions(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -4786,7 +4786,7 @@ static vm_fault_t do_numa_page(struct vm + spin_lock(vmf->ptl); + if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); +- goto out; ++ return 0; + } + + /* Get the normal PTE */ +@@ -4841,21 +4841,17 @@ static vm_fault_t do_numa_page(struct vm + if (migrate_misplaced_page(page, vma, target_nid)) { + page_nid = target_nid; + flags |= TNF_MIGRATED; +- } else { +- flags |= TNF_MIGRATE_FAIL; +- vmf->pte = pte_offset_map(vmf->pmd, vmf->address); +- spin_lock(vmf->ptl); +- if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { +- pte_unmap_unlock(vmf->pte, vmf->ptl); +- goto out; +- } +- goto out_map; ++ task_numa_fault(last_cpupid, page_nid, 1, flags); ++ return 0; + } + +-out: +- if (page_nid != NUMA_NO_NODE) +- task_numa_fault(last_cpupid, page_nid, 1, flags); +- return 0; ++ flags |= TNF_MIGRATE_FAIL; ++ vmf->pte = pte_offset_map(vmf->pmd, vmf->address); ++ spin_lock(vmf->ptl); ++ if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ return 0; ++ } + out_map: + /* + * Make it present again, depending on how arch implements +@@ -4869,7 +4865,10 @@ out_map: + ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); + update_mmu_cache(vma, vmf->address, vmf->pte); + pte_unmap_unlock(vmf->pte, vmf->ptl); +- goto out; ++ ++ if (page_nid != NUMA_NO_NODE) ++ task_numa_fault(last_cpupid, page_nid, 1, flags); ++ return 0; + } + + static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) diff --git a/queue-6.1/mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch b/queue-6.1/mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch new file mode 100644 index 00000000000..623b41e1b2d --- /dev/null +++ b/queue-6.1/mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch @@ -0,0 +1,68 @@ +From 61ebe5a747da649057c37be1c37eb934b4af79ca Mon Sep 17 00:00:00 2001 +From: Hailong Liu +Date: Thu, 8 Aug 2024 20:19:56 +0800 +Subject: mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0 + +From: Hailong Liu + +commit 61ebe5a747da649057c37be1c37eb934b4af79ca upstream. + +The __vmap_pages_range_noflush() assumes its argument pages** contains +pages with the same page shift. However, since commit e9c3cda4d86e ("mm, +vmalloc: fix high order __GFP_NOFAIL allocations"), if gfp_flags includes +__GFP_NOFAIL with high order in vm_area_alloc_pages() and page allocation +failed for high order, the pages** may contain two different page shifts +(high order and order-0). This could lead __vmap_pages_range_noflush() to +perform incorrect mappings, potentially resulting in memory corruption. + +Users might encounter this as follows (vmap_allow_huge = true, 2M is for +PMD_SIZE): + +kvmalloc(2M, __GFP_NOFAIL|GFP_X) + __vmalloc_node_range_noprof(vm_flags=VM_ALLOW_HUGE_VMAP) + vm_area_alloc_pages(order=9) ---> order-9 allocation failed and fallback to order-0 + vmap_pages_range() + vmap_pages_range_noflush() + __vmap_pages_range_noflush(page_shift = 21) ----> wrong mapping happens + +We can remove the fallback code because if a high-order allocation fails, +__vmalloc_node_range_noprof() will retry with order-0. Therefore, it is +unnecessary to fallback to order-0 here. Therefore, fix this by removing +the fallback code. + +Link: https://lkml.kernel.org/r/20240808122019.3361-1-hailong.liu@oppo.com +Fixes: e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations") +Signed-off-by: Hailong Liu +Reported-by: Tangquan Zheng +Reviewed-by: Baoquan He +Reviewed-by: Uladzislau Rezki (Sony) +Acked-by: Barry Song +Acked-by: Michal Hocko +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 11 ++--------- + 1 file changed, 2 insertions(+), 9 deletions(-) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -2992,15 +2992,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, + page = alloc_pages(alloc_gfp, order); + else + page = alloc_pages_node(nid, alloc_gfp, order); +- if (unlikely(!page)) { +- if (!nofail) +- break; +- +- /* fall back to the zero order allocations */ +- alloc_gfp |= __GFP_NOFAIL; +- order = 0; +- continue; +- } ++ if (unlikely(!page)) ++ break; + + /* + * Higher order allocations must be able to be treated as diff --git a/queue-6.1/net-drop-bad-gso-csum_start-and-offset-in-virtio_net_hdr.patch b/queue-6.1/net-drop-bad-gso-csum_start-and-offset-in-virtio_net_hdr.patch new file mode 100644 index 00000000000..52585250a85 --- /dev/null +++ b/queue-6.1/net-drop-bad-gso-csum_start-and-offset-in-virtio_net_hdr.patch @@ -0,0 +1,147 @@ +From 89add40066f9ed9abe5f7f886fe5789ff7e0c50e Mon Sep 17 00:00:00 2001 +From: Willem de Bruijn +Date: Mon, 29 Jul 2024 16:10:12 -0400 +Subject: net: drop bad gso csum_start and offset in virtio_net_hdr + +From: Willem de Bruijn + +commit 89add40066f9ed9abe5f7f886fe5789ff7e0c50e upstream. + +Tighten csum_start and csum_offset checks in virtio_net_hdr_to_skb +for GSO packets. + +The function already checks that a checksum requested with +VIRTIO_NET_HDR_F_NEEDS_CSUM is in skb linear. But for GSO packets +this might not hold for segs after segmentation. + +Syzkaller demonstrated to reach this warning in skb_checksum_help + + offset = skb_checksum_start_offset(skb); + ret = -EINVAL; + if (WARN_ON_ONCE(offset >= skb_headlen(skb))) + +By injecting a TSO packet: + +WARNING: CPU: 1 PID: 3539 at net/core/dev.c:3284 skb_checksum_help+0x3d0/0x5b0 + ip_do_fragment+0x209/0x1b20 net/ipv4/ip_output.c:774 + ip_finish_output_gso net/ipv4/ip_output.c:279 [inline] + __ip_finish_output+0x2bd/0x4b0 net/ipv4/ip_output.c:301 + iptunnel_xmit+0x50c/0x930 net/ipv4/ip_tunnel_core.c:82 + ip_tunnel_xmit+0x2296/0x2c70 net/ipv4/ip_tunnel.c:813 + __gre_xmit net/ipv4/ip_gre.c:469 [inline] + ipgre_xmit+0x759/0xa60 net/ipv4/ip_gre.c:661 + __netdev_start_xmit include/linux/netdevice.h:4850 [inline] + netdev_start_xmit include/linux/netdevice.h:4864 [inline] + xmit_one net/core/dev.c:3595 [inline] + dev_hard_start_xmit+0x261/0x8c0 net/core/dev.c:3611 + __dev_queue_xmit+0x1b97/0x3c90 net/core/dev.c:4261 + packet_snd net/packet/af_packet.c:3073 [inline] + +The geometry of the bad input packet at tcp_gso_segment: + +[ 52.003050][ T8403] skb len=12202 headroom=244 headlen=12093 tailroom=0 +[ 52.003050][ T8403] mac=(168,24) mac_len=24 net=(192,52) trans=244 +[ 52.003050][ T8403] shinfo(txflags=0 nr_frags=1 gso(size=1552 type=3 segs=0)) +[ 52.003050][ T8403] csum(0x60000c7 start=199 offset=1536 +ip_summed=3 complete_sw=0 valid=0 level=0) + +Mitigate with stricter input validation. + +csum_offset: for GSO packets, deduce the correct value from gso_type. +This is already done for USO. Extend it to TSO. Let UFO be: +udp[46]_ufo_fragment ignores these fields and always computes the +checksum in software. + +csum_start: finding the real offset requires parsing to the transport +header. Do not add a parser, use existing segmentation parsing. Thanks +to SKB_GSO_DODGY, that also catches bad packets that are hw offloaded. +Again test both TSO and USO. Do not test UFO for the above reason, and +do not test UDP tunnel offload. + +GSO packet are almost always CHECKSUM_PARTIAL. USO packets may be +CHECKSUM_NONE since commit 10154dbded6d6 ("udp: Allow GSO transmit +from devices with no checksum offload"), but then still these fields +are initialized correctly in udp4_hwcsum/udp6_hwcsum_outgoing. So no +need to test for ip_summed == CHECKSUM_PARTIAL first. + +This revises an existing fix mentioned in the Fixes tag, which broke +small packets with GSO offload, as detected by kselftests. + +Link: https://syzkaller.appspot.com/bug?extid=e1db31216c789f552871 +Link: https://lore.kernel.org/netdev/20240723223109.2196886-1-kuba@kernel.org +Fixes: e269d79c7d35 ("net: missing check virtio") +Cc: stable@vger.kernel.org +Signed-off-by: Willem de Bruijn +Link: https://patch.msgid.link/20240729201108.1615114-1-willemdebruijn.kernel@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/virtio_net.h | 16 +++++----------- + net/ipv4/tcp_offload.c | 3 +++ + net/ipv4/udp_offload.c | 4 ++++ + 3 files changed, 12 insertions(+), 11 deletions(-) + +--- a/include/linux/virtio_net.h ++++ b/include/linux/virtio_net.h +@@ -51,7 +51,6 @@ static inline int virtio_net_hdr_to_skb( + unsigned int thlen = 0; + unsigned int p_off = 0; + unsigned int ip_proto; +- u64 ret, remainder, gso_size; + + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { +@@ -88,16 +87,6 @@ static inline int virtio_net_hdr_to_skb( + u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); + u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); + +- if (hdr->gso_size) { +- gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); +- ret = div64_u64_rem(skb->len, gso_size, &remainder); +- if (!(ret && (hdr->gso_size > needed) && +- ((remainder > needed) || (remainder == 0)))) { +- return -EINVAL; +- } +- skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG; +- } +- + if (!pskb_may_pull(skb, needed)) + return -EINVAL; + +@@ -170,6 +159,11 @@ retry: + if (gso_type != SKB_GSO_UDP_L4) + return -EINVAL; + break; ++ case SKB_GSO_TCPV4: ++ case SKB_GSO_TCPV6: ++ if (skb->csum_offset != offsetof(struct tcphdr, check)) ++ return -EINVAL; ++ break; + } + + /* Kernel has a special handling for GSO_BY_FRAGS. */ +--- a/net/ipv4/tcp_offload.c ++++ b/net/ipv4/tcp_offload.c +@@ -72,6 +72,9 @@ struct sk_buff *tcp_gso_segment(struct s + if (thlen < sizeof(*th)) + goto out; + ++ if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb))) ++ goto out; ++ + if (!pskb_may_pull(skb, thlen)) + goto out; + +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -277,6 +277,10 @@ struct sk_buff *__udp_gso_segment(struct + if (gso_skb->len <= sizeof(*uh) + mss) + return ERR_PTR(-EINVAL); + ++ if (unlikely(skb_checksum_start(gso_skb) != ++ skb_transport_header(gso_skb))) ++ return ERR_PTR(-EINVAL); ++ + if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh), diff --git a/queue-6.1/net-more-strict-virtio_net_hdr_gso_udp_l4-validation.patch b/queue-6.1/net-more-strict-virtio_net_hdr_gso_udp_l4-validation.patch new file mode 100644 index 00000000000..d58bf8dc465 --- /dev/null +++ b/queue-6.1/net-more-strict-virtio_net_hdr_gso_udp_l4-validation.patch @@ -0,0 +1,83 @@ +From fc8b2a619469378717e7270d2a4e1ef93c585f7a Mon Sep 17 00:00:00 2001 +From: Willem de Bruijn +Date: Wed, 11 Oct 2023 10:01:14 -0400 +Subject: net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation + +From: Willem de Bruijn + +commit fc8b2a619469378717e7270d2a4e1ef93c585f7a upstream. + +Syzbot reported two new paths to hit an internal WARNING using the +new virtio gso type VIRTIO_NET_HDR_GSO_UDP_L4. + + RIP: 0010:skb_checksum_help+0x4a2/0x600 net/core/dev.c:3260 + skb len=64521 gso_size=344 +and + + RIP: 0010:skb_warn_bad_offload+0x118/0x240 net/core/dev.c:3262 + +Older virtio types have historically had loose restrictions, leading +to many entirely impractical fuzzer generated packets causing +problems deep in the kernel stack. Ideally, we would have had strict +validation for all types from the start. + +New virtio types can have tighter validation. Limit UDP GSO packets +inserted via virtio to the same limits imposed by the UDP_SEGMENT +socket interface: + +1. must use checksum offload +2. checksum offload matches UDP header +3. no more segments than UDP_MAX_SEGMENTS +4. UDP GSO does not take modifier flags, notably SKB_GSO_TCP_ECN + +Fixes: 860b7f27b8f7 ("linux/virtio_net.h: Support USO offload in vnet header.") +Reported-by: syzbot+01cdbc31e9c0ae9b33ac@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/0000000000005039270605eb0b7f@google.com/ +Reported-by: syzbot+c99d835ff081ca30f986@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/0000000000005426680605eb0b9f@google.com/ +Signed-off-by: Willem de Bruijn +Reviewed-by: Eric Dumazet +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/virtio_net.h | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +--- a/include/linux/virtio_net.h ++++ b/include/linux/virtio_net.h +@@ -3,8 +3,8 @@ + #define _LINUX_VIRTIO_NET_H + + #include ++#include + #include +-#include + #include + + static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) +@@ -155,9 +155,22 @@ retry: + unsigned int nh_off = p_off; + struct skb_shared_info *shinfo = skb_shinfo(skb); + +- /* UFO may not include transport header in gso_size. */ +- if (gso_type & SKB_GSO_UDP) ++ switch (gso_type & ~SKB_GSO_TCP_ECN) { ++ case SKB_GSO_UDP: ++ /* UFO may not include transport header in gso_size. */ + nh_off -= thlen; ++ break; ++ case SKB_GSO_UDP_L4: ++ if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) ++ return -EINVAL; ++ if (skb->csum_offset != offsetof(struct udphdr, check)) ++ return -EINVAL; ++ if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS) ++ return -EINVAL; ++ if (gso_type != SKB_GSO_UDP_L4) ++ return -EINVAL; ++ break; ++ } + + /* Kernel has a special handling for GSO_BY_FRAGS. */ + if (gso_size == GSO_BY_FRAGS) diff --git a/queue-6.1/nfsd-call-nfsd_last_thread-before-final-nfsd_put.patch b/queue-6.1/nfsd-call-nfsd_last_thread-before-final-nfsd_put.patch new file mode 100644 index 00000000000..50b5f83a50a --- /dev/null +++ b/queue-6.1/nfsd-call-nfsd_last_thread-before-final-nfsd_put.patch @@ -0,0 +1,77 @@ +From stable+bounces-70236-greg=kroah.com@vger.kernel.org Mon Aug 26 17:07:51 2024 +From: cel@kernel.org +Date: Mon, 26 Aug 2024 11:07:00 -0400 +Subject: nfsd: call nfsd_last_thread() before final nfsd_put() +To: +Cc: , lilingfeng3@huawei.com, NeilBrown , Jeff Layton +Message-ID: <20240826150703.13987-5-cel@kernel.org> + +From: NeilBrown + +[ Upstream commit 2a501f55cd641eb4d3c16a2eab0d678693fac663 ] + +If write_ports_addfd or write_ports_addxprt fail, they call nfsd_put() +without calling nfsd_last_thread(). This leaves nn->nfsd_serv pointing +to a structure that has been freed. + +So remove 'static' from nfsd_last_thread() and call it when the +nfsd_serv is about to be destroyed. + +Fixes: ec52361df99b ("SUNRPC: stop using ->sv_nrthreads as a refcount") +Signed-off-by: NeilBrown +Reviewed-by: Jeff Layton +Cc: +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfsctl.c | 9 +++++++-- + fs/nfsd/nfsd.h | 1 + + fs/nfsd/nfssvc.c | 2 +- + 3 files changed, 9 insertions(+), 3 deletions(-) + +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -720,8 +720,10 @@ static ssize_t __write_ports_addfd(char + + err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); + +- if (err >= 0 && +- !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) ++ if (err < 0 && !nn->nfsd_serv->sv_nrthreads && !nn->keep_active) ++ nfsd_last_thread(net); ++ else if (err >= 0 && ++ !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) + svc_get(nn->nfsd_serv); + + nfsd_put(net); +@@ -771,6 +773,9 @@ out_close: + svc_xprt_put(xprt); + } + out_err: ++ if (!nn->nfsd_serv->sv_nrthreads && !nn->keep_active) ++ nfsd_last_thread(net); ++ + nfsd_put(net); + return err; + } +--- a/fs/nfsd/nfsd.h ++++ b/fs/nfsd/nfsd.h +@@ -139,6 +139,7 @@ int nfsd_vers(struct nfsd_net *nn, int v + int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change); + void nfsd_reset_versions(struct nfsd_net *nn); + int nfsd_create_serv(struct net *net); ++void nfsd_last_thread(struct net *net); + + extern int nfsd_max_blksize; + +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -532,7 +532,7 @@ static struct notifier_block nfsd_inet6a + /* Only used under nfsd_mutex, so this atomic may be overkill: */ + static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0); + +-static void nfsd_last_thread(struct net *net) ++void nfsd_last_thread(struct net *net) + { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv = nn->nfsd_serv; diff --git a/queue-6.1/nfsd-don-t-call-locks_release_private-twice-concurrently.patch b/queue-6.1/nfsd-don-t-call-locks_release_private-twice-concurrently.patch new file mode 100644 index 00000000000..42bdc2d1881 --- /dev/null +++ b/queue-6.1/nfsd-don-t-call-locks_release_private-twice-concurrently.patch @@ -0,0 +1,55 @@ +From stable+bounces-70238-greg=kroah.com@vger.kernel.org Mon Aug 26 17:08:14 2024 +From: cel@kernel.org +Date: Mon, 26 Aug 2024 11:07:02 -0400 +Subject: nfsd: don't call locks_release_private() twice concurrently +To: +Cc: , lilingfeng3@huawei.com, NeilBrown +Message-ID: <20240826150703.13987-7-cel@kernel.org> + +From: NeilBrown + +[ Upstream commit 05eda6e75773592760285e10ac86c56d683be17f ] + +It is possible for free_blocked_lock() to be called twice concurrently, +once from nfsd4_lock() and once from nfsd4_release_lockowner() calling +remove_blocked_locks(). This is why a kref was added. + +It is perfectly safe for locks_delete_block() and kref_put() to be +called in parallel as they use locking or atomicity respectively as +protection. However locks_release_private() has no locking. It is +safe for it to be called twice sequentially, but not concurrently. + +This patch moves that call from free_blocked_lock() where it could race +with itself, to free_nbl() where it cannot. This will slightly delay +the freeing of private info or release of the owner - but not by much. +It is arguably more natural for this freeing to happen in free_nbl() +where the structure itself is freed. + +This bug was found by code inspection - it has not been seen in practice. + +Fixes: 47446d74f170 ("nfsd4: add refcount for nfsd4_blocked_lock") +Signed-off-by: NeilBrown +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs4state.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -318,6 +318,7 @@ free_nbl(struct kref *kref) + struct nfsd4_blocked_lock *nbl; + + nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref); ++ locks_release_private(&nbl->nbl_lock); + kfree(nbl); + } + +@@ -325,7 +326,6 @@ static void + free_blocked_lock(struct nfsd4_blocked_lock *nbl) + { + locks_delete_block(&nbl->nbl_lock); +- locks_release_private(&nbl->nbl_lock); + kref_put(&nbl->nbl_kref, free_nbl); + } + diff --git a/queue-6.1/nfsd-drop-the-nfsd_put-helper.patch b/queue-6.1/nfsd-drop-the-nfsd_put-helper.patch new file mode 100644 index 00000000000..e34b3ce274f --- /dev/null +++ b/queue-6.1/nfsd-drop-the-nfsd_put-helper.patch @@ -0,0 +1,128 @@ +From stable+bounces-70237-greg=kroah.com@vger.kernel.org Mon Aug 26 17:08:04 2024 +From: cel@kernel.org +Date: Mon, 26 Aug 2024 11:07:01 -0400 +Subject: nfsd: drop the nfsd_put helper +To: +Cc: , lilingfeng3@huawei.com, Jeff Layton , Zhi Li , NeilBrown +Message-ID: <20240826150703.13987-6-cel@kernel.org> + +From: Jeff Layton + +[ Upstream commit 64e6304169f1e1f078e7f0798033f80a7fb0ea46 ] + +It's not safe to call nfsd_put once nfsd_last_thread has been called, as +that function will zero out the nn->nfsd_serv pointer. + +Drop the nfsd_put helper altogether and open-code the svc_put in its +callers instead. That allows us to not be reliant on the value of that +pointer when handling an error. + +Fixes: 2a501f55cd64 ("nfsd: call nfsd_last_thread() before final nfsd_put()") +Reported-by: Zhi Li +Cc: NeilBrown +Signed-off-by: Jeffrey Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfsctl.c | 31 +++++++++++++++++-------------- + fs/nfsd/nfsd.h | 7 ------- + 2 files changed, 17 insertions(+), 21 deletions(-) + +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -709,6 +709,7 @@ static ssize_t __write_ports_addfd(char + char *mesg = buf; + int fd, err; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); ++ struct svc_serv *serv; + + err = get_int(&mesg, &fd); + if (err != 0 || fd < 0) +@@ -718,15 +719,15 @@ static ssize_t __write_ports_addfd(char + if (err != 0) + return err; + +- err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); ++ serv = nn->nfsd_serv; ++ err = svc_addsock(serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); + +- if (err < 0 && !nn->nfsd_serv->sv_nrthreads && !nn->keep_active) ++ if (err < 0 && !serv->sv_nrthreads && !nn->keep_active) + nfsd_last_thread(net); +- else if (err >= 0 && +- !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) +- svc_get(nn->nfsd_serv); ++ else if (err >= 0 && !serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) ++ svc_get(serv); + +- nfsd_put(net); ++ svc_put(serv); + return err; + } + +@@ -740,6 +741,7 @@ static ssize_t __write_ports_addxprt(cha + struct svc_xprt *xprt; + int port, err; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); ++ struct svc_serv *serv; + + if (sscanf(buf, "%15s %5u", transport, &port) != 2) + return -EINVAL; +@@ -751,32 +753,33 @@ static ssize_t __write_ports_addxprt(cha + if (err != 0) + return err; + +- err = svc_xprt_create(nn->nfsd_serv, transport, net, ++ serv = nn->nfsd_serv; ++ err = svc_xprt_create(serv, transport, net, + PF_INET, port, SVC_SOCK_ANONYMOUS, cred); + if (err < 0) + goto out_err; + +- err = svc_xprt_create(nn->nfsd_serv, transport, net, ++ err = svc_xprt_create(serv, transport, net, + PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); + if (err < 0 && err != -EAFNOSUPPORT) + goto out_close; + +- if (!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) +- svc_get(nn->nfsd_serv); ++ if (!serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) ++ svc_get(serv); + +- nfsd_put(net); ++ svc_put(serv); + return 0; + out_close: +- xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port); ++ xprt = svc_find_xprt(serv, transport, net, PF_INET, port); + if (xprt != NULL) { + svc_xprt_close(xprt); + svc_xprt_put(xprt); + } + out_err: +- if (!nn->nfsd_serv->sv_nrthreads && !nn->keep_active) ++ if (!serv->sv_nrthreads && !nn->keep_active) + nfsd_last_thread(net); + +- nfsd_put(net); ++ svc_put(serv); + return err; + } + +--- a/fs/nfsd/nfsd.h ++++ b/fs/nfsd/nfsd.h +@@ -97,13 +97,6 @@ int nfsd_pool_stats_open(struct inode * + int nfsd_pool_stats_release(struct inode *, struct file *); + void nfsd_shutdown_threads(struct net *net); + +-static inline void nfsd_put(struct net *net) +-{ +- struct nfsd_net *nn = net_generic(net, nfsd_net_id); +- +- svc_put(nn->nfsd_serv); +-} +- + bool i_am_nfsd(void); + + struct nfsdfs_client { diff --git a/queue-6.1/nfsd-fix-a-regression-in-nfsd_setattr.patch b/queue-6.1/nfsd-fix-a-regression-in-nfsd_setattr.patch new file mode 100644 index 00000000000..e1b525d5b7e --- /dev/null +++ b/queue-6.1/nfsd-fix-a-regression-in-nfsd_setattr.patch @@ -0,0 +1,87 @@ +From stable+bounces-70239-greg=kroah.com@vger.kernel.org Mon Aug 26 17:08:22 2024 +From: cel@kernel.org +Date: Mon, 26 Aug 2024 11:07:03 -0400 +Subject: nfsd: Fix a regression in nfsd_setattr() +To: +Cc: , lilingfeng3@huawei.com, Trond Myklebust , Jeff Layton , NeilBrown +Message-ID: <20240826150703.13987-8-cel@kernel.org> + +From: Trond Myklebust + +[ Upstream commit 6412e44c40aaf8f1d7320b2099c5bdd6cb9126ac ] + +Commit bb4d53d66e4b ("NFSD: use (un)lock_inode instead of +fh_(un)lock for file operations") broke the NFSv3 pre/post op +attributes behaviour when doing a SETATTR rpc call by stripping out +the calls to fh_fill_pre_attrs() and fh_fill_post_attrs(). + +Fixes: bb4d53d66e4b ("NFSD: use (un)lock_inode instead of fh_(un)lock for file operations") +Signed-off-by: Trond Myklebust +Reviewed-by: Jeff Layton +Reviewed-by: NeilBrown +Message-ID: <20240216012451.22725-1-trondmy@kernel.org> +[ cel: adjusted to apply to v6.1.y ] +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs4proc.c | 4 ++++ + fs/nfsd/vfs.c | 6 ++++-- + 2 files changed, 8 insertions(+), 2 deletions(-) + +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -1106,6 +1106,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, st + }; + struct inode *inode; + __be32 status = nfs_ok; ++ bool save_no_wcc; + int err; + + if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { +@@ -1131,8 +1132,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, st + + if (status) + goto out; ++ save_no_wcc = cstate->current_fh.fh_no_wcc; ++ cstate->current_fh.fh_no_wcc = true; + status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, + 0, (time64_t)0); ++ cstate->current_fh.fh_no_wcc = save_no_wcc; + if (!status) + status = nfserrno(attrs.na_labelerr); + if (!status) +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -475,7 +475,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str + int accmode = NFSD_MAY_SATTR; + umode_t ftype = 0; + __be32 err; +- int host_err; ++ int host_err = 0; + bool get_write_count; + bool size_change = (iap->ia_valid & ATTR_SIZE); + int retries; +@@ -533,6 +533,7 @@ nfsd_setattr(struct svc_rqst *rqstp, str + } + + inode_lock(inode); ++ fh_fill_pre_attrs(fhp); + for (retries = 1;;) { + struct iattr attrs; + +@@ -560,13 +561,14 @@ nfsd_setattr(struct svc_rqst *rqstp, str + attr->na_aclerr = set_posix_acl(&init_user_ns, + inode, ACL_TYPE_DEFAULT, + attr->na_dpacl); ++ fh_fill_post_attrs(fhp); + inode_unlock(inode); + if (size_change) + put_write_access(inode); + out: + if (!host_err) + host_err = commit_metadata(fhp); +- return nfserrno(host_err); ++ return err != 0 ? err : nfserrno(host_err); + } + + #if defined(CONFIG_NFSD_V4) diff --git a/queue-6.1/nfsd-separate-nfsd_last_thread-from-nfsd_put.patch b/queue-6.1/nfsd-separate-nfsd_last_thread-from-nfsd_put.patch new file mode 100644 index 00000000000..58567da5f7a --- /dev/null +++ b/queue-6.1/nfsd-separate-nfsd_last_thread-from-nfsd_put.patch @@ -0,0 +1,177 @@ +From stable+bounces-70234-greg=kroah.com@vger.kernel.org Mon Aug 26 17:07:32 2024 +From: cel@kernel.org +Date: Mon, 26 Aug 2024 11:06:58 -0400 +Subject: nfsd: separate nfsd_last_thread() from nfsd_put() +To: +Cc: , lilingfeng3@huawei.com, NeilBrown +Message-ID: <20240826150703.13987-3-cel@kernel.org> + +From: NeilBrown + +[ Upstream commit 9f28a971ee9fdf1bf8ce8c88b103f483be610277 ] + +Now that the last nfsd thread is stopped by an explicit act of calling +svc_set_num_threads() with a count of zero, we only have a limited +number of places that can happen, and don't need to call +nfsd_last_thread() in nfsd_put() + +So separate that out and call it at the two places where the number of +threads is set to zero. + +Move the clearing of ->nfsd_serv and the call to svc_xprt_destroy_all() +into nfsd_last_thread(), as they are really part of the same action. + +nfsd_put() is now a thin wrapper around svc_put(), so make it a static +inline. + +nfsd_put() cannot be called after nfsd_last_thread(), so in a couple of +places we have to use svc_put() instead. + +Signed-off-by: NeilBrown +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfsd.h | 7 ++++++- + fs/nfsd/nfssvc.c | 52 +++++++++++++++++++--------------------------------- + 2 files changed, 25 insertions(+), 34 deletions(-) + +--- a/fs/nfsd/nfsd.h ++++ b/fs/nfsd/nfsd.h +@@ -97,7 +97,12 @@ int nfsd_pool_stats_open(struct inode * + int nfsd_pool_stats_release(struct inode *, struct file *); + void nfsd_shutdown_threads(struct net *net); + +-void nfsd_put(struct net *net); ++static inline void nfsd_put(struct net *net) ++{ ++ struct nfsd_net *nn = net_generic(net, nfsd_net_id); ++ ++ svc_put(nn->nfsd_serv); ++} + + bool i_am_nfsd(void); + +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -532,9 +532,14 @@ static struct notifier_block nfsd_inet6a + /* Only used under nfsd_mutex, so this atomic may be overkill: */ + static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0); + +-static void nfsd_last_thread(struct svc_serv *serv, struct net *net) ++static void nfsd_last_thread(struct net *net) + { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); ++ struct svc_serv *serv = nn->nfsd_serv; ++ ++ spin_lock(&nfsd_notifier_lock); ++ nn->nfsd_serv = NULL; ++ spin_unlock(&nfsd_notifier_lock); + + /* check if the notifier still has clients */ + if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { +@@ -544,6 +549,8 @@ static void nfsd_last_thread(struct svc_ + #endif + } + ++ svc_xprt_destroy_all(serv, net); ++ + /* + * write_ports can create the server without actually starting + * any threads--if we get shut down before any threads are +@@ -634,7 +641,8 @@ void nfsd_shutdown_threads(struct net *n + svc_get(serv); + /* Kill outstanding nfsd threads */ + svc_set_num_threads(serv, NULL, 0); +- nfsd_put(net); ++ nfsd_last_thread(net); ++ svc_put(serv); + mutex_unlock(&nfsd_mutex); + } + +@@ -665,9 +673,6 @@ int nfsd_create_serv(struct net *net) + serv->sv_maxconn = nn->max_connections; + error = svc_bind(serv, net); + if (error < 0) { +- /* NOT nfsd_put() as notifiers (see below) haven't +- * been set up yet. +- */ + svc_put(serv); + return error; + } +@@ -710,29 +715,6 @@ int nfsd_get_nrthreads(int n, int *nthre + return 0; + } + +-/* This is the callback for kref_put() below. +- * There is no code here as the first thing to be done is +- * call svc_shutdown_net(), but we cannot get the 'net' from +- * the kref. So do all the work when kref_put returns true. +- */ +-static void nfsd_noop(struct kref *ref) +-{ +-} +- +-void nfsd_put(struct net *net) +-{ +- struct nfsd_net *nn = net_generic(net, nfsd_net_id); +- +- if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) { +- svc_xprt_destroy_all(nn->nfsd_serv, net); +- nfsd_last_thread(nn->nfsd_serv, net); +- svc_destroy(&nn->nfsd_serv->sv_refcnt); +- spin_lock(&nfsd_notifier_lock); +- nn->nfsd_serv = NULL; +- spin_unlock(&nfsd_notifier_lock); +- } +-} +- + int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) + { + int i = 0; +@@ -783,7 +765,7 @@ int nfsd_set_nrthreads(int n, int *nthre + if (err) + break; + } +- nfsd_put(net); ++ svc_put(nn->nfsd_serv); + return err; + } + +@@ -798,6 +780,7 @@ nfsd_svc(int nrservs, struct net *net, c + int error; + bool nfsd_up_before; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); ++ struct svc_serv *serv; + + mutex_lock(&nfsd_mutex); + dprintk("nfsd: creating service\n"); +@@ -817,22 +800,25 @@ nfsd_svc(int nrservs, struct net *net, c + goto out; + + nfsd_up_before = nn->nfsd_net_up; ++ serv = nn->nfsd_serv; + + error = nfsd_startup_net(net, cred); + if (error) + goto out_put; +- error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); ++ error = svc_set_num_threads(serv, NULL, nrservs); + if (error) + goto out_shutdown; +- error = nn->nfsd_serv->sv_nrthreads; ++ error = serv->sv_nrthreads; ++ if (error == 0) ++ nfsd_last_thread(net); + out_shutdown: + if (error < 0 && !nfsd_up_before) + nfsd_shutdown_net(net); + out_put: + /* Threads now hold service active */ + if (xchg(&nn->keep_active, 0)) +- nfsd_put(net); +- nfsd_put(net); ++ svc_put(serv); ++ svc_put(serv); + out: + mutex_unlock(&nfsd_mutex); + return error; diff --git a/queue-6.1/nfsd-simplify-code-around-svc_exit_thread-call-in-nfsd.patch b/queue-6.1/nfsd-simplify-code-around-svc_exit_thread-call-in-nfsd.patch new file mode 100644 index 00000000000..496b2005546 --- /dev/null +++ b/queue-6.1/nfsd-simplify-code-around-svc_exit_thread-call-in-nfsd.patch @@ -0,0 +1,85 @@ +From stable+bounces-70233-greg=kroah.com@vger.kernel.org Mon Aug 26 17:07:22 2024 +From: cel@kernel.org +Date: Mon, 26 Aug 2024 11:06:57 -0400 +Subject: nfsd: Simplify code around svc_exit_thread() call in nfsd() +To: +Cc: , lilingfeng3@huawei.com, NeilBrown +Message-ID: <20240826150703.13987-2-cel@kernel.org> + +From: NeilBrown + +[ Upstream commit 18e4cf915543257eae2925671934937163f5639b ] + +Previously a thread could exit asynchronously (due to a signal) so some +care was needed to hold nfsd_mutex over the last svc_put() call. Now a +thread can only exit when svc_set_num_threads() is called, and this is +always called under nfsd_mutex. So no care is needed. + +Not only is the mutex held when a thread exits now, but the svc refcount +is elevated, so the svc_put() in svc_exit_thread() will never be a final +put, so the mutex isn't even needed at this point in the code. + +Signed-off-by: NeilBrown +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfssvc.c | 23 ----------------------- + include/linux/sunrpc/svc.h | 13 ------------- + 2 files changed, 36 deletions(-) + +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -983,31 +983,8 @@ nfsd(void *vrqstp) + atomic_dec(&nfsd_th_cnt); + + out: +- /* Take an extra ref so that the svc_put in svc_exit_thread() +- * doesn't call svc_destroy() +- */ +- svc_get(nn->nfsd_serv); +- + /* Release the thread */ + svc_exit_thread(rqstp); +- +- /* We need to drop a ref, but may not drop the last reference +- * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that +- * could deadlock with nfsd_shutdown_threads() waiting for us. +- * So three options are: +- * - drop a non-final reference, +- * - get the mutex without waiting +- * - sleep briefly andd try the above again +- */ +- while (!svc_put_not_last(nn->nfsd_serv)) { +- if (mutex_trylock(&nfsd_mutex)) { +- nfsd_put(net); +- mutex_unlock(&nfsd_mutex); +- break; +- } +- msleep(20); +- } +- + return 0; + } + +--- a/include/linux/sunrpc/svc.h ++++ b/include/linux/sunrpc/svc.h +@@ -123,19 +123,6 @@ static inline void svc_put(struct svc_se + kref_put(&serv->sv_refcnt, svc_destroy); + } + +-/** +- * svc_put_not_last - decrement non-final reference count on SUNRPC serv +- * @serv: the svc_serv to have count decremented +- * +- * Returns: %true is refcount was decremented. +- * +- * If the refcount is 1, it is not decremented and instead failure is reported. +- */ +-static inline bool svc_put_not_last(struct svc_serv *serv) +-{ +- return refcount_dec_not_one(&serv->sv_refcnt.refcount); +-} +- + /* + * Maximum payload size supported by a kernel RPC server. + * This is use to determine the max number of pages nfsd is diff --git a/queue-6.1/nfsd-simplify-error-paths-in-nfsd_svc.patch b/queue-6.1/nfsd-simplify-error-paths-in-nfsd_svc.patch new file mode 100644 index 00000000000..9749ef5db76 --- /dev/null +++ b/queue-6.1/nfsd-simplify-error-paths-in-nfsd_svc.patch @@ -0,0 +1,88 @@ +From stable+bounces-70235-greg=kroah.com@vger.kernel.org Mon Aug 26 17:07:41 2024 +From: cel@kernel.org +Date: Mon, 26 Aug 2024 11:06:59 -0400 +Subject: NFSD: simplify error paths in nfsd_svc() +To: +Cc: , lilingfeng3@huawei.com, NeilBrown , Jeff Layton +Message-ID: <20240826150703.13987-4-cel@kernel.org> + +From: NeilBrown + +[ Upstream commit bf32075256e9dd9c6b736859e2c5813981339908 ] + +The error paths in nfsd_svc() are needlessly complex and can result in a +final call to svc_put() without nfsd_last_thread() being called. This +results in the listening sockets not being closed properly. + +The per-netns setup provided by nfsd_startup_new() and removed by +nfsd_shutdown_net() is needed precisely when there are running threads. +So we don't need nfsd_up_before. We don't need to know if it *was* up. +We only need to know if any threads are left. If none are, then we must +call nfsd_shutdown_net(). But we don't need to do that explicitly as +nfsd_last_thread() does that for us. + +So simply call nfsd_last_thread() before the last svc_put() if there are +no running threads. That will always do the right thing. + +Also discard: + pr_info("nfsd: last server has exited, flushing export cache\n"); +It may not be true if an attempt to start the first server failed, and +it isn't particularly helpful and it simply reports normal behaviour. + +Signed-off-by: NeilBrown +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfssvc.c | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -562,7 +562,6 @@ static void nfsd_last_thread(struct net + return; + + nfsd_shutdown_net(net); +- pr_info("nfsd: last server has exited, flushing export cache\n"); + nfsd_export_flush(net); + } + +@@ -778,7 +777,6 @@ int + nfsd_svc(int nrservs, struct net *net, const struct cred *cred) + { + int error; +- bool nfsd_up_before; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; + +@@ -798,8 +796,6 @@ nfsd_svc(int nrservs, struct net *net, c + error = nfsd_create_serv(net); + if (error) + goto out; +- +- nfsd_up_before = nn->nfsd_net_up; + serv = nn->nfsd_serv; + + error = nfsd_startup_net(net, cred); +@@ -807,17 +803,15 @@ nfsd_svc(int nrservs, struct net *net, c + goto out_put; + error = svc_set_num_threads(serv, NULL, nrservs); + if (error) +- goto out_shutdown; ++ goto out_put; + error = serv->sv_nrthreads; +- if (error == 0) +- nfsd_last_thread(net); +-out_shutdown: +- if (error < 0 && !nfsd_up_before) +- nfsd_shutdown_net(net); + out_put: + /* Threads now hold service active */ + if (xchg(&nn->keep_active, 0)) + svc_put(serv); ++ ++ if (serv->sv_nrthreads == 0) ++ nfsd_last_thread(net); + svc_put(serv); + out: + mutex_unlock(&nfsd_mutex); diff --git a/queue-6.1/revert-s390-dasd-establish-dma-alignment.patch b/queue-6.1/revert-s390-dasd-establish-dma-alignment.patch new file mode 100644 index 00000000000..bd95ab78855 --- /dev/null +++ b/queue-6.1/revert-s390-dasd-establish-dma-alignment.patch @@ -0,0 +1,96 @@ +From hoeppner@linux.ibm.com Tue Aug 27 15:13:30 2024 +From: "Jan Höppner" +Date: Tue, 20 Aug 2024 16:13:07 +0200 +Subject: Revert "s390/dasd: Establish DMA alignment" +To: gregkh@linuxfoundation.org +Cc: stable@vger.kernel.org, sth@linux.ibm.com +Message-ID: <20240820141307.2869182-1-hoeppner@linux.ibm.com> + +From: "Jan Höppner" + +This reverts commit bc792884b76f ("s390/dasd: Establish DMA alignment"). + +Quoting the original commit: + linux-next commit bf8d08532bc1 ("iomap: add support for dma aligned + direct-io") changes the alignment requirement to come from the block + device rather than the block size, and the default alignment + requirement is 512-byte boundaries. Since DASD I/O has page + alignments for IDAW/TIDAW requests, let's override this value to + restore the expected behavior. + +I mentioned TIDAW, but that was wrong. TIDAWs have no distinct alignment +requirement (per p. 15-70 of POPS SA22-7832-13): + + Unless otherwise specified, TIDAWs may designate + a block of main storage on any boundary and length + up to 4K bytes, provided the specified block does not + cross a 4 K-byte boundary. + +IDAWs do, but the original commit neglected that while ECKD DASD are +typically formatted in 4096-byte blocks, they don't HAVE to be. Formatting +an ECKD volume with smaller blocks is permitted (dasdfmt -b xxx), and the +problematic commit enforces alignment properties to such a device that +will result in errors, such as: + + [test@host ~]# lsdasd -l a367 | grep blksz + blksz: 512 + [test@host ~]# mkfs.xfs -f /dev/disk/by-path/ccw-0.0.a367-part1 + meta-data=/dev/dasdc1 isize=512 agcount=4, agsize=230075 blks + = sectsz=512 attr=2, projid32bit=1 + = crc=1 finobt=1, sparse=1, rmapbt=1 + = reflink=1 bigtime=1 inobtcount=1 nrext64=1 + data = bsize=4096 blocks=920299, imaxpct=25 + = sunit=0 swidth=0 blks + naming =version 2 bsize=4096 ascii-ci=0, ftype=1 + log =internal log bsize=4096 blocks=16384, version=2 + = sectsz=512 sunit=0 blks, lazy-count=1 + realtime =none extsz=4096 blocks=0, rtextents=0 + error reading existing superblock: Invalid argument + mkfs.xfs: pwrite failed: Invalid argument + libxfs_bwrite: write failed on (unknown) bno 0x70565c/0x100, err=22 + mkfs.xfs: Releasing dirty buffer to free list! + found dirty buffer (bulk) on free list! + mkfs.xfs: pwrite failed: Invalid argument + ...snipped... + +The original commit omitted the FBA discipline for just this reason, +but the formatted block size of the other disciplines was overlooked. +The solution to all of this is to revert to the original behavior, +such that the block size can be respected. + +But what of the original problem? That was manifested with a direct-io +QEMU guest, where QEMU itself was changed a month or two later with +commit 25474d90aa ("block: use the request length for iov alignment") +such that the blamed kernel commit is unnecessary. + +Note: This is an adapted version of the original upstream commit +2a07bb64d801 ("s390/dasd: Remove DMA alignment"). + +Cc: stable@vger.kernel.org # 6.0+ +Signed-off-by: Jan Höppner +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/block/dasd_diag.c | 1 - + drivers/s390/block/dasd_eckd.c | 1 - + 2 files changed, 2 deletions(-) + +--- a/drivers/s390/block/dasd_diag.c ++++ b/drivers/s390/block/dasd_diag.c +@@ -639,7 +639,6 @@ static void dasd_diag_setup_blk_queue(st + /* With page sized segments each segment can be translated into one idaw/tidaw */ + blk_queue_max_segment_size(q, PAGE_SIZE); + blk_queue_segment_boundary(q, PAGE_SIZE - 1); +- blk_queue_dma_alignment(q, PAGE_SIZE - 1); + } + + static int dasd_diag_pe_handler(struct dasd_device *device, +--- a/drivers/s390/block/dasd_eckd.c ++++ b/drivers/s390/block/dasd_eckd.c +@@ -6889,7 +6889,6 @@ static void dasd_eckd_setup_blk_queue(st + /* With page sized segments each segment can be translated into one idaw/tidaw */ + blk_queue_max_segment_size(q, PAGE_SIZE); + blk_queue_segment_boundary(q, PAGE_SIZE - 1); +- blk_queue_dma_alignment(q, PAGE_SIZE - 1); + } + + static struct ccw_driver dasd_eckd_driver = { diff --git a/queue-6.1/series b/queue-6.1/series index 937d3118789..da79f8dfd33 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -282,3 +282,23 @@ revert-usb-gadget-uvc-cleanup-request-when-not-in-correct-state.patch revert-drm-amd-display-validate-hw_points_num-before-using-it.patch tcp-do-not-export-tcp_twsk_purge.patch hwmon-ltc2992-fix-memory-leak-in-ltc2992_parse_dt.patch +alsa-timer-relax-start-tick-time-check-for-slave-timer-elements.patch +mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch +mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch +mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch +nfsd-simplify-code-around-svc_exit_thread-call-in-nfsd.patch +nfsd-separate-nfsd_last_thread-from-nfsd_put.patch +nfsd-simplify-error-paths-in-nfsd_svc.patch +nfsd-call-nfsd_last_thread-before-final-nfsd_put.patch +nfsd-drop-the-nfsd_put-helper.patch +nfsd-don-t-call-locks_release_private-twice-concurrently.patch +nfsd-fix-a-regression-in-nfsd_setattr.patch +bluetooth-hci_ldisc-check-hci_uart_proto_ready-flag-in-hciuartgetproto.patch +drm-amdgpu-vcn-identify-unified-queue-in-sw-init.patch +drm-amdgpu-vcn-not-pause-dpg-for-unified-queue.patch +kvm-x86-fire-timer-when-it-is-migrated-and-expired-and-in-oneshot-mode.patch +revert-s390-dasd-establish-dma-alignment.patch +udp-allow-header-check-for-dodgy-gso_udp_l4-packets.patch +gso-fix-dodgy-bit-handling-for-gso_udp_l4.patch +net-more-strict-virtio_net_hdr_gso_udp_l4-validation.patch +net-drop-bad-gso-csum_start-and-offset-in-virtio_net_hdr.patch diff --git a/queue-6.1/udp-allow-header-check-for-dodgy-gso_udp_l4-packets.patch b/queue-6.1/udp-allow-header-check-for-dodgy-gso_udp_l4-packets.patch new file mode 100644 index 00000000000..e12b9dead17 --- /dev/null +++ b/queue-6.1/udp-allow-header-check-for-dodgy-gso_udp_l4-packets.patch @@ -0,0 +1,44 @@ +From 1fd54773c26787b9aea80e2f62c7d0780ea444d0 Mon Sep 17 00:00:00 2001 +From: Andrew Melnychenko +Date: Wed, 7 Dec 2022 13:35:53 +0200 +Subject: udp: allow header check for dodgy GSO_UDP_L4 packets. + +From: Andrew Melnychenko + +commit 1fd54773c26787b9aea80e2f62c7d0780ea444d0 upstream. + +Allow UDP_L4 for robust packets. + +Signed-off-by: Jason Wang +Signed-off-by: Andrew Melnychenko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp_offload.c | 3 ++- + net/ipv6/udp_offload.c | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -387,7 +387,8 @@ static struct sk_buff *udp4_ufo_fragment + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + +- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ++ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && ++ !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) + return __udp_gso_segment(skb, features, false); + + mss = skb_shinfo(skb)->gso_size; +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -42,7 +42,8 @@ static struct sk_buff *udp6_ufo_fragment + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + +- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ++ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && ++ !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) + return __udp_gso_segment(skb, features, true); + + mss = skb_shinfo(skb)->gso_size;