From: Greg Kroah-Hartman Date: Sun, 11 Aug 2024 16:03:09 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v6.1.105~110 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3e713b73c01fac163a5c8cb0953d1e300407a773;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch net-drop-bad-gso-csum_start-and-offset-in-virtio_net_hdr.patch profiling-remove-profile-sleep-support.patch sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch --- diff --git a/queue-6.6/irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch b/queue-6.6/irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch new file mode 100644 index 00000000000..08b8be26712 --- /dev/null +++ b/queue-6.6/irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch @@ -0,0 +1,46 @@ +From 81a91abab1307d7725fa4620952c0767beae7753 Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Tue, 23 Jul 2024 14:45:08 +0800 +Subject: irqchip/loongarch-cpu: Fix return value of lpic_gsi_to_irq() + +From: Huacai Chen + +commit 81a91abab1307d7725fa4620952c0767beae7753 upstream. + +lpic_gsi_to_irq() should return a valid Linux interrupt number if +acpi_register_gsi() succeeds, and return 0 otherwise. But lpic_gsi_to_irq() +converts a negative return value of acpi_register_gsi() to a positive value +silently. + +Convert the return value explicitly. + +Fixes: e8bba72b396c ("irqchip / ACPI: Introduce ACPI_IRQ_MODEL_LPIC for LoongArch") +Reported-by: Miao Wang +Signed-off-by: Huacai Chen +Signed-off-by: Thomas Gleixner +Reviewed-by: Jiaxun Yang +Cc: +Link: https://lore.kernel.org/r/20240723064508.35560-1-chenhuacai@loongson.cn +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-loongarch-cpu.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/irqchip/irq-loongarch-cpu.c ++++ b/drivers/irqchip/irq-loongarch-cpu.c +@@ -18,11 +18,13 @@ struct fwnode_handle *cpuintc_handle; + + static u32 lpic_gsi_to_irq(u32 gsi) + { ++ int irq = 0; ++ + /* Only pch irqdomain transferring is required for LoongArch. */ + if (gsi >= GSI_MIN_PCH_IRQ && gsi <= GSI_MAX_PCH_IRQ) +- return acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); ++ irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); + +- return 0; ++ return (irq > 0) ? irq : 0; + } + + static struct fwnode_handle *lpic_get_gsi_domain_id(u32 gsi) diff --git a/queue-6.6/irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch b/queue-6.6/irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch new file mode 100644 index 00000000000..612dc9e79b0 --- /dev/null +++ b/queue-6.6/irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch @@ -0,0 +1,105 @@ +From f872d4af79fe8c71ae291ce8875b477e1669a6c7 Mon Sep 17 00:00:00 2001 +From: Arseniy Krasnov +Date: Mon, 29 Jul 2024 16:18:50 +0300 +Subject: irqchip/meson-gpio: Convert meson_gpio_irq_controller::lock to 'raw_spinlock_t' + +From: Arseniy Krasnov + +commit f872d4af79fe8c71ae291ce8875b477e1669a6c7 upstream. + +This lock is acquired under irq_desc::lock with interrupts disabled. + +When PREEMPT_RT is enabled, 'spinlock_t' becomes preemptible, which results +in invalid lock acquire context; + + [ BUG: Invalid wait context ] + swapper/0/1 is trying to lock: + ffff0000008fed30 (&ctl->lock){....}-{3:3}, at: meson_gpio_irq_update_bits0 + other info that might help us debug this: + context-{5:5} + 3 locks held by swapper/0/1: + #0: ffff0000003cd0f8 (&dev->mutex){....}-{4:4}, at: __driver_attach+0x90c + #1: ffff000004714650 (&desc->request_mutex){+.+.}-{4:4}, at: __setup_irq0 + #2: ffff0000047144c8 (&irq_desc_lock_class){-.-.}-{2:2}, at: __setup_irq0 + stack backtrace: + CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.9.9-sdkernel #1 + Call trace: + _raw_spin_lock_irqsave+0x60/0x88 + meson_gpio_irq_update_bits+0x34/0x70 + meson8_gpio_irq_set_type+0x78/0xc4 + meson_gpio_irq_set_type+0x30/0x60 + __irq_set_trigger+0x60/0x180 + __setup_irq+0x30c/0x6e0 + request_threaded_irq+0xec/0x1a4 + +Fixes: 215f4cc0fb20 ("irqchip/meson: Add support for gpio interrupt controller") +Signed-off-by: Arseniy Krasnov +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240729131850.3015508-1-avkrasnov@salutedevices.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-meson-gpio.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/irqchip/irq-meson-gpio.c ++++ b/drivers/irqchip/irq-meson-gpio.c +@@ -173,7 +173,7 @@ struct meson_gpio_irq_controller { + void __iomem *base; + u32 channel_irqs[MAX_NUM_CHANNEL]; + DECLARE_BITMAP(channel_map, MAX_NUM_CHANNEL); +- spinlock_t lock; ++ raw_spinlock_t lock; + }; + + static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, +@@ -182,14 +182,14 @@ static void meson_gpio_irq_update_bits(s + unsigned long flags; + u32 tmp; + +- spin_lock_irqsave(&ctl->lock, flags); ++ raw_spin_lock_irqsave(&ctl->lock, flags); + + tmp = readl_relaxed(ctl->base + reg); + tmp &= ~mask; + tmp |= val; + writel_relaxed(tmp, ctl->base + reg); + +- spin_unlock_irqrestore(&ctl->lock, flags); ++ raw_spin_unlock_irqrestore(&ctl->lock, flags); + } + + static void meson_gpio_irq_init_dummy(struct meson_gpio_irq_controller *ctl) +@@ -239,12 +239,12 @@ meson_gpio_irq_request_channel(struct me + unsigned long flags; + unsigned int idx; + +- spin_lock_irqsave(&ctl->lock, flags); ++ raw_spin_lock_irqsave(&ctl->lock, flags); + + /* Find a free channel */ + idx = find_first_zero_bit(ctl->channel_map, ctl->params->nr_channels); + if (idx >= ctl->params->nr_channels) { +- spin_unlock_irqrestore(&ctl->lock, flags); ++ raw_spin_unlock_irqrestore(&ctl->lock, flags); + pr_err("No channel available\n"); + return -ENOSPC; + } +@@ -252,7 +252,7 @@ meson_gpio_irq_request_channel(struct me + /* Mark the channel as used */ + set_bit(idx, ctl->channel_map); + +- spin_unlock_irqrestore(&ctl->lock, flags); ++ raw_spin_unlock_irqrestore(&ctl->lock, flags); + + /* + * Setup the mux of the channel to route the signal of the pad +@@ -562,7 +562,7 @@ static int meson_gpio_irq_of_init(struct + if (!ctl) + return -ENOMEM; + +- spin_lock_init(&ctl->lock); ++ raw_spin_lock_init(&ctl->lock); + + ctl->base = of_iomap(node, 0); + if (!ctl->base) { diff --git a/queue-6.6/net-drop-bad-gso-csum_start-and-offset-in-virtio_net_hdr.patch b/queue-6.6/net-drop-bad-gso-csum_start-and-offset-in-virtio_net_hdr.patch new file mode 100644 index 00000000000..2f9c9ebb20e --- /dev/null +++ b/queue-6.6/net-drop-bad-gso-csum_start-and-offset-in-virtio_net_hdr.patch @@ -0,0 +1,147 @@ +From 89add40066f9ed9abe5f7f886fe5789ff7e0c50e Mon Sep 17 00:00:00 2001 +From: Willem de Bruijn +Date: Mon, 29 Jul 2024 16:10:12 -0400 +Subject: net: drop bad gso csum_start and offset in virtio_net_hdr + +From: Willem de Bruijn + +commit 89add40066f9ed9abe5f7f886fe5789ff7e0c50e upstream. + +Tighten csum_start and csum_offset checks in virtio_net_hdr_to_skb +for GSO packets. + +The function already checks that a checksum requested with +VIRTIO_NET_HDR_F_NEEDS_CSUM is in skb linear. But for GSO packets +this might not hold for segs after segmentation. + +Syzkaller demonstrated to reach this warning in skb_checksum_help + + offset = skb_checksum_start_offset(skb); + ret = -EINVAL; + if (WARN_ON_ONCE(offset >= skb_headlen(skb))) + +By injecting a TSO packet: + +WARNING: CPU: 1 PID: 3539 at net/core/dev.c:3284 skb_checksum_help+0x3d0/0x5b0 + ip_do_fragment+0x209/0x1b20 net/ipv4/ip_output.c:774 + ip_finish_output_gso net/ipv4/ip_output.c:279 [inline] + __ip_finish_output+0x2bd/0x4b0 net/ipv4/ip_output.c:301 + iptunnel_xmit+0x50c/0x930 net/ipv4/ip_tunnel_core.c:82 + ip_tunnel_xmit+0x2296/0x2c70 net/ipv4/ip_tunnel.c:813 + __gre_xmit net/ipv4/ip_gre.c:469 [inline] + ipgre_xmit+0x759/0xa60 net/ipv4/ip_gre.c:661 + __netdev_start_xmit include/linux/netdevice.h:4850 [inline] + netdev_start_xmit include/linux/netdevice.h:4864 [inline] + xmit_one net/core/dev.c:3595 [inline] + dev_hard_start_xmit+0x261/0x8c0 net/core/dev.c:3611 + __dev_queue_xmit+0x1b97/0x3c90 net/core/dev.c:4261 + packet_snd net/packet/af_packet.c:3073 [inline] + +The geometry of the bad input packet at tcp_gso_segment: + +[ 52.003050][ T8403] skb len=12202 headroom=244 headlen=12093 tailroom=0 +[ 52.003050][ T8403] mac=(168,24) mac_len=24 net=(192,52) trans=244 +[ 52.003050][ T8403] shinfo(txflags=0 nr_frags=1 gso(size=1552 type=3 segs=0)) +[ 52.003050][ T8403] csum(0x60000c7 start=199 offset=1536 +ip_summed=3 complete_sw=0 valid=0 level=0) + +Mitigate with stricter input validation. + +csum_offset: for GSO packets, deduce the correct value from gso_type. +This is already done for USO. Extend it to TSO. Let UFO be: +udp[46]_ufo_fragment ignores these fields and always computes the +checksum in software. + +csum_start: finding the real offset requires parsing to the transport +header. Do not add a parser, use existing segmentation parsing. Thanks +to SKB_GSO_DODGY, that also catches bad packets that are hw offloaded. +Again test both TSO and USO. Do not test UFO for the above reason, and +do not test UDP tunnel offload. + +GSO packet are almost always CHECKSUM_PARTIAL. USO packets may be +CHECKSUM_NONE since commit 10154dbded6d6 ("udp: Allow GSO transmit +from devices with no checksum offload"), but then still these fields +are initialized correctly in udp4_hwcsum/udp6_hwcsum_outgoing. So no +need to test for ip_summed == CHECKSUM_PARTIAL first. + +This revises an existing fix mentioned in the Fixes tag, which broke +small packets with GSO offload, as detected by kselftests. + +Link: https://syzkaller.appspot.com/bug?extid=e1db31216c789f552871 +Link: https://lore.kernel.org/netdev/20240723223109.2196886-1-kuba@kernel.org +Fixes: e269d79c7d35 ("net: missing check virtio") +Cc: stable@vger.kernel.org +Signed-off-by: Willem de Bruijn +Link: https://patch.msgid.link/20240729201108.1615114-1-willemdebruijn.kernel@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/virtio_net.h | 16 +++++----------- + net/ipv4/tcp_offload.c | 3 +++ + net/ipv4/udp_offload.c | 4 ++++ + 3 files changed, 12 insertions(+), 11 deletions(-) + +--- a/include/linux/virtio_net.h ++++ b/include/linux/virtio_net.h +@@ -56,7 +56,6 @@ static inline int virtio_net_hdr_to_skb( + unsigned int thlen = 0; + unsigned int p_off = 0; + unsigned int ip_proto; +- u64 ret, remainder, gso_size; + + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { +@@ -99,16 +98,6 @@ static inline int virtio_net_hdr_to_skb( + u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); + u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); + +- if (hdr->gso_size) { +- gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); +- ret = div64_u64_rem(skb->len, gso_size, &remainder); +- if (!(ret && (hdr->gso_size > needed) && +- ((remainder > needed) || (remainder == 0)))) { +- return -EINVAL; +- } +- skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG; +- } +- + if (!pskb_may_pull(skb, needed)) + return -EINVAL; + +@@ -182,6 +171,11 @@ retry: + if (gso_type != SKB_GSO_UDP_L4) + return -EINVAL; + break; ++ case SKB_GSO_TCPV4: ++ case SKB_GSO_TCPV6: ++ if (skb->csum_offset != offsetof(struct tcphdr, check)) ++ return -EINVAL; ++ break; + } + + /* Kernel has a special handling for GSO_BY_FRAGS. */ +--- a/net/ipv4/tcp_offload.c ++++ b/net/ipv4/tcp_offload.c +@@ -73,6 +73,9 @@ struct sk_buff *tcp_gso_segment(struct s + if (thlen < sizeof(*th)) + goto out; + ++ if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb))) ++ goto out; ++ + if (!pskb_may_pull(skb, thlen)) + goto out; + +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -278,6 +278,10 @@ struct sk_buff *__udp_gso_segment(struct + if (gso_skb->len <= sizeof(*uh) + mss) + return ERR_PTR(-EINVAL); + ++ if (unlikely(skb_checksum_start(gso_skb) != ++ skb_transport_header(gso_skb))) ++ return ERR_PTR(-EINVAL); ++ + if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh), diff --git a/queue-6.6/profiling-remove-profile-sleep-support.patch b/queue-6.6/profiling-remove-profile-sleep-support.patch new file mode 100644 index 00000000000..f77103db6f8 --- /dev/null +++ b/queue-6.6/profiling-remove-profile-sleep-support.patch @@ -0,0 +1,125 @@ +From b88f55389ad27f05ed84af9e1026aa64dbfabc9a Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Sun, 4 Aug 2024 18:48:10 +0900 +Subject: profiling: remove profile=sleep support + +From: Tetsuo Handa + +commit b88f55389ad27f05ed84af9e1026aa64dbfabc9a upstream. + +The kernel sleep profile is no longer working due to a recursive locking +bug introduced by commit 42a20f86dc19 ("sched: Add wrapper for get_wchan() +to keep task blocked") + +Booting with the 'profile=sleep' kernel command line option added or +executing + + # echo -n sleep > /sys/kernel/profiling + +after boot causes the system to lock up. + +Lockdep reports + + kthreadd/3 is trying to acquire lock: + ffff93ac82e08d58 (&p->pi_lock){....}-{2:2}, at: get_wchan+0x32/0x70 + + but task is already holding lock: + ffff93ac82e08d58 (&p->pi_lock){....}-{2:2}, at: try_to_wake_up+0x53/0x370 + +with the call trace being + + lock_acquire+0xc8/0x2f0 + get_wchan+0x32/0x70 + __update_stats_enqueue_sleeper+0x151/0x430 + enqueue_entity+0x4b0/0x520 + enqueue_task_fair+0x92/0x6b0 + ttwu_do_activate+0x73/0x140 + try_to_wake_up+0x213/0x370 + swake_up_locked+0x20/0x50 + complete+0x2f/0x40 + kthread+0xfb/0x180 + +However, since nobody noticed this regression for more than two years, +let's remove 'profile=sleep' support based on the assumption that nobody +needs this functionality. + +Fixes: 42a20f86dc19 ("sched: Add wrapper for get_wchan() to keep task blocked") +Cc: stable@vger.kernel.org # v5.16+ +Signed-off-by: Tetsuo Handa +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 4 +--- + include/linux/profile.h | 1 - + kernel/profile.c | 11 +---------- + kernel/sched/stats.c | 10 ---------- + 4 files changed, 2 insertions(+), 24 deletions(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4655,11 +4655,9 @@ + + profile= [KNL] Enable kernel profiling via /proc/profile + Format: [,] +- Param: : "schedule", "sleep", or "kvm" ++ Param: : "schedule" or "kvm" + [defaults to kernel profiling] + Param: "schedule" - profile schedule points. +- Param: "sleep" - profile D-state sleeping (millisecs). +- Requires CONFIG_SCHEDSTATS + Param: "kvm" - profile VM exits. + Param: - step/bucket size as a power of 2 for + statistical time based profiling. +--- a/include/linux/profile.h ++++ b/include/linux/profile.h +@@ -11,7 +11,6 @@ + + #define CPU_PROFILING 1 + #define SCHED_PROFILING 2 +-#define SLEEP_PROFILING 3 + #define KVM_PROFILING 4 + + struct proc_dir_entry; +--- a/kernel/profile.c ++++ b/kernel/profile.c +@@ -57,20 +57,11 @@ static DEFINE_MUTEX(profile_flip_mutex); + int profile_setup(char *str) + { + static const char schedstr[] = "schedule"; +- static const char sleepstr[] = "sleep"; + static const char kvmstr[] = "kvm"; + const char *select = NULL; + int par; + +- if (!strncmp(str, sleepstr, strlen(sleepstr))) { +-#ifdef CONFIG_SCHEDSTATS +- force_schedstat_enabled(); +- prof_on = SLEEP_PROFILING; +- select = sleepstr; +-#else +- pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n"); +-#endif /* CONFIG_SCHEDSTATS */ +- } else if (!strncmp(str, schedstr, strlen(schedstr))) { ++ if (!strncmp(str, schedstr, strlen(schedstr))) { + prof_on = SCHED_PROFILING; + select = schedstr; + } else if (!strncmp(str, kvmstr, strlen(kvmstr))) { +--- a/kernel/sched/stats.c ++++ b/kernel/sched/stats.c +@@ -92,16 +92,6 @@ void __update_stats_enqueue_sleeper(stru + + trace_sched_stat_blocked(p, delta); + +- /* +- * Blocking time is in units of nanosecs, so shift by +- * 20 to get a milliseconds-range estimation of the +- * amount of time that the task spent sleeping: +- */ +- if (unlikely(prof_on == SLEEP_PROFILING)) { +- profile_hits(SLEEP_PROFILING, +- (void *)get_wchan(p), +- delta >> 20); +- } + account_scheduler_latency(p, delta >> 10, 0); + } + } diff --git a/queue-6.6/sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch b/queue-6.6/sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch new file mode 100644 index 00000000000..5082baeea87 --- /dev/null +++ b/queue-6.6/sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch @@ -0,0 +1,58 @@ +From 77baa5bafcbe1b2a15ef9c37232c21279c95481c Mon Sep 17 00:00:00 2001 +From: Zheng Zucheng +Date: Fri, 26 Jul 2024 02:32:35 +0000 +Subject: sched/cputime: Fix mul_u64_u64_div_u64() precision for cputime + +From: Zheng Zucheng + +commit 77baa5bafcbe1b2a15ef9c37232c21279c95481c upstream. + +In extreme test scenarios: +the 14th field utime in /proc/xx/stat is greater than sum_exec_runtime, +utime = 18446744073709518790 ns, rtime = 135989749728000 ns + +In cputime_adjust() process, stime is greater than rtime due to +mul_u64_u64_div_u64() precision problem. +before call mul_u64_u64_div_u64(), +stime = 175136586720000, rtime = 135989749728000, utime = 1416780000. +after call mul_u64_u64_div_u64(), +stime = 135989949653530 + +unsigned reversion occurs because rtime is less than stime. +utime = rtime - stime = 135989749728000 - 135989949653530 + = -199925530 + = (u64)18446744073709518790 + +Trigger condition: + 1). User task run in kernel mode most of time + 2). ARM64 architecture + 3). TICK_CPU_ACCOUNTING=y + CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set + +Fix mul_u64_u64_div_u64() conversion precision by reset stime to rtime + +Fixes: 3dc167ba5729 ("sched/cputime: Improve cputime_adjust()") +Signed-off-by: Zheng Zucheng +Signed-off-by: Peter Zijlstra (Intel) +Cc: +Link: https://lkml.kernel.org/r/20240726023235.217771-1-zhengzucheng@huawei.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/cputime.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/kernel/sched/cputime.c ++++ b/kernel/sched/cputime.c +@@ -595,6 +595,12 @@ void cputime_adjust(struct task_cputime + } + + stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); ++ /* ++ * Because mul_u64_u64_div_u64() can approximate on some ++ * achitectures; enforce the constraint that: a*b/(b+c) <= a. ++ */ ++ if (unlikely(stime > rtime)) ++ stime = rtime; + + update: + /* diff --git a/queue-6.6/scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch b/queue-6.6/scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch new file mode 100644 index 00000000000..91d709411b0 --- /dev/null +++ b/queue-6.6/scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch @@ -0,0 +1,96 @@ +From 82dbb57ac8d06dfe8227ba9ab11a49de2b475ae5 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Fri, 19 Jul 2024 16:39:12 +0900 +Subject: scsi: mpt3sas: Avoid IOMMU page faults on REPORT ZONES + +From: Damien Le Moal + +commit 82dbb57ac8d06dfe8227ba9ab11a49de2b475ae5 upstream. + +Some firmware versions of the 9600 series SAS HBA byte-swap the REPORT +ZONES command reply buffer from ATA-ZAC devices by directly accessing the +buffer in the host memory. This does not respect the default command DMA +direction and causes IOMMU page faults on architectures with an IOMMU +enforcing write-only mappings for DMA_FROM_DEVICE DMA driection (e.g. AMD +hosts). + +scsi 18:0:0:0: Direct-Access-ZBC ATA WDC WSH722020AL W870 PQ: 0 ANSI: 6 +scsi 18:0:0:0: SATA: handle(0x0027), sas_addr(0x300062b2083e7c40), phy(0), device_name(0x5000cca29dc35e11) +scsi 18:0:0:0: enclosure logical id (0x300062b208097c40), slot(0) +scsi 18:0:0:0: enclosure level(0x0000), connector name( C0.0) +scsi 18:0:0:0: atapi(n), ncq(y), asyn_notify(n), smart(y), fua(y), sw_preserve(y) +scsi 18:0:0:0: qdepth(32), tagged(1), scsi_level(7), cmd_que(1) +sd 18:0:0:0: Attached scsi generic sg2 type 20 +sd 18:0:0:0: [sdc] Host-managed zoned block device +mpt3sas 0000:41:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0xfff9b200 flags=0x0050] +mpt3sas 0000:41:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0xfff9b300 flags=0x0050] +mpt3sas_cm0: mpt3sas_ctl_pre_reset_handler: Releasing the trace buffer due to adapter reset. +mpt3sas_cm0 fault info from func: mpt3sas_base_make_ioc_ready +mpt3sas_cm0: fault_state(0x2666)! +mpt3sas_cm0: sending diag reset !! +mpt3sas_cm0: diag reset: SUCCESS +sd 18:0:0:0: [sdc] REPORT ZONES start lba 0 failed +sd 18:0:0:0: [sdc] REPORT ZONES: Result: hostbyte=DID_RESET driverbyte=DRIVER_OK +sd 18:0:0:0: [sdc] 0 4096-byte logical blocks: (0 B/0 B) + +Avoid such issue by always mapping the buffer of REPORT ZONES commands +using DMA_BIDIRECTIONAL (read+write IOMMU mapping). This is done by +introducing the helper function _base_scsi_dma_map() and using this helper +in _base_build_sg_scmd() and _base_build_sg_scmd_ieee() instead of calling +directly scsi_dma_map(). + +Fixes: 471ef9d4e498 ("mpt3sas: Build MPI SGL LIST on GEN2 HBAs and IEEE SGL LIST on GEN3 HBAs") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Link: https://lore.kernel.org/r/20240719073913.179559-3-dlemoal@kernel.org +Reviewed-by: Christoph Hellwig +Reviewed-by: Johannes Thumshirn +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -2671,6 +2671,22 @@ _base_build_zero_len_sge_ieee(struct MPT + _base_add_sg_single_ieee(paddr, sgl_flags, 0, 0, -1); + } + ++static inline int _base_scsi_dma_map(struct scsi_cmnd *cmd) ++{ ++ /* ++ * Some firmware versions byte-swap the REPORT ZONES command reply from ++ * ATA-ZAC devices by directly accessing in the host buffer. This does ++ * not respect the default command DMA direction and causes IOMMU page ++ * faults on some architectures with an IOMMU enforcing write mappings ++ * (e.g. AMD hosts). Avoid such issue by making the report zones buffer ++ * mapping bi-directional. ++ */ ++ if (cmd->cmnd[0] == ZBC_IN && cmd->cmnd[1] == ZI_REPORT_ZONES) ++ cmd->sc_data_direction = DMA_BIDIRECTIONAL; ++ ++ return scsi_dma_map(cmd); ++} ++ + /** + * _base_build_sg_scmd - main sg creation routine + * pcie_device is unused here! +@@ -2717,7 +2733,7 @@ _base_build_sg_scmd(struct MPT3SAS_ADAPT + sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; + + sg_scmd = scsi_sglist(scmd); +- sges_left = scsi_dma_map(scmd); ++ sges_left = _base_scsi_dma_map(scmd); + if (sges_left < 0) + return -ENOMEM; + +@@ -2861,7 +2877,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ + } + + sg_scmd = scsi_sglist(scmd); +- sges_left = scsi_dma_map(scmd); ++ sges_left = _base_scsi_dma_map(scmd); + if (sges_left < 0) + return -ENOMEM; + diff --git a/queue-6.6/series b/queue-6.6/series index 87a84034c29..c68ee3f5df9 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -68,3 +68,9 @@ ext4-fix-uninitialized-variable-in-ext4_inlinedir_to.patch jbd2-avoid-memleak-in-jbd2_journal_write_metadata_bu.patch s390-sclp-prevent-release-of-buffer-in-i-o.patch sunrpc-fix-a-race-to-wake-a-sync-task.patch +profiling-remove-profile-sleep-support.patch +scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch +irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch +irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch +sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch +net-drop-bad-gso-csum_start-and-offset-in-virtio_net_hdr.patch