From 35c98909c012b85c7135f28c8fb9d8d9ee53ad90 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 11 Aug 2024 18:01:25 +0200 Subject: [PATCH] 6.1-stable patches added patches: irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch profiling-remove-profile-sleep-support.patch sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch --- ...-fix-return-value-of-lpic_gsi_to_irq.patch | 51 +++++++ ...rq_controller-lock-to-raw_spinlock_t.patch | 105 +++++++++++++++ ...ofiling-remove-profile-sleep-support.patch | 125 ++++++++++++++++++ ...64_u64_div_u64-precision-for-cputime.patch | 58 ++++++++ ...id-iommu-page-faults-on-report-zones.patch | 96 ++++++++++++++ queue-6.1/series | 5 + 6 files changed, 440 insertions(+) create mode 100644 queue-6.1/irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch create mode 100644 queue-6.1/irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch create mode 100644 queue-6.1/profiling-remove-profile-sleep-support.patch create mode 100644 queue-6.1/sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch create mode 100644 queue-6.1/scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch diff --git a/queue-6.1/irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch b/queue-6.1/irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch new file mode 100644 index 00000000000..0b2e64c2e25 --- /dev/null +++ b/queue-6.1/irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch @@ -0,0 +1,51 @@ +From 81a91abab1307d7725fa4620952c0767beae7753 Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Tue, 23 Jul 2024 14:45:08 +0800 +Subject: irqchip/loongarch-cpu: Fix return value of lpic_gsi_to_irq() + +From: Huacai Chen + +commit 81a91abab1307d7725fa4620952c0767beae7753 upstream. + +lpic_gsi_to_irq() should return a valid Linux interrupt number if +acpi_register_gsi() succeeds, and return 0 otherwise. But lpic_gsi_to_irq() +converts a negative return value of acpi_register_gsi() to a positive value +silently. + +Convert the return value explicitly. + +Fixes: e8bba72b396c ("irqchip / ACPI: Introduce ACPI_IRQ_MODEL_LPIC for LoongArch") +Reported-by: Miao Wang +Signed-off-by: Huacai Chen +Signed-off-by: Thomas Gleixner +Reviewed-by: Jiaxun Yang +Cc: +Link: https://lore.kernel.org/r/20240723064508.35560-1-chenhuacai@loongson.cn +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-loongarch-cpu.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c +index 9d8f2c406043..b35903a06902 100644 +--- a/drivers/irqchip/irq-loongarch-cpu.c ++++ b/drivers/irqchip/irq-loongarch-cpu.c +@@ -18,11 +18,13 @@ struct fwnode_handle *cpuintc_handle; + + static u32 lpic_gsi_to_irq(u32 gsi) + { ++ int irq = 0; ++ + /* Only pch irqdomain transferring is required for LoongArch. */ + if (gsi >= GSI_MIN_PCH_IRQ && gsi <= GSI_MAX_PCH_IRQ) +- return acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); ++ irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); + +- return 0; ++ return (irq > 0) ? irq : 0; + } + + static struct fwnode_handle *lpic_get_gsi_domain_id(u32 gsi) +-- +2.46.0 + diff --git a/queue-6.1/irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch b/queue-6.1/irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch new file mode 100644 index 00000000000..f5fddfd4e0e --- /dev/null +++ b/queue-6.1/irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch @@ -0,0 +1,105 @@ +From f872d4af79fe8c71ae291ce8875b477e1669a6c7 Mon Sep 17 00:00:00 2001 +From: Arseniy Krasnov +Date: Mon, 29 Jul 2024 16:18:50 +0300 +Subject: irqchip/meson-gpio: Convert meson_gpio_irq_controller::lock to 'raw_spinlock_t' + +From: Arseniy Krasnov + +commit f872d4af79fe8c71ae291ce8875b477e1669a6c7 upstream. + +This lock is acquired under irq_desc::lock with interrupts disabled. + +When PREEMPT_RT is enabled, 'spinlock_t' becomes preemptible, which results +in invalid lock acquire context; + + [ BUG: Invalid wait context ] + swapper/0/1 is trying to lock: + ffff0000008fed30 (&ctl->lock){....}-{3:3}, at: meson_gpio_irq_update_bits0 + other info that might help us debug this: + context-{5:5} + 3 locks held by swapper/0/1: + #0: ffff0000003cd0f8 (&dev->mutex){....}-{4:4}, at: __driver_attach+0x90c + #1: ffff000004714650 (&desc->request_mutex){+.+.}-{4:4}, at: __setup_irq0 + #2: ffff0000047144c8 (&irq_desc_lock_class){-.-.}-{2:2}, at: __setup_irq0 + stack backtrace: + CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.9.9-sdkernel #1 + Call trace: + _raw_spin_lock_irqsave+0x60/0x88 + meson_gpio_irq_update_bits+0x34/0x70 + meson8_gpio_irq_set_type+0x78/0xc4 + meson_gpio_irq_set_type+0x30/0x60 + __irq_set_trigger+0x60/0x180 + __setup_irq+0x30c/0x6e0 + request_threaded_irq+0xec/0x1a4 + +Fixes: 215f4cc0fb20 ("irqchip/meson: Add support for gpio interrupt controller") +Signed-off-by: Arseniy Krasnov +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240729131850.3015508-1-avkrasnov@salutedevices.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-meson-gpio.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/irqchip/irq-meson-gpio.c ++++ b/drivers/irqchip/irq-meson-gpio.c +@@ -168,7 +168,7 @@ struct meson_gpio_irq_controller { + void __iomem *base; + u32 channel_irqs[MAX_NUM_CHANNEL]; + DECLARE_BITMAP(channel_map, MAX_NUM_CHANNEL); +- spinlock_t lock; ++ raw_spinlock_t lock; + }; + + static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, +@@ -177,14 +177,14 @@ static void meson_gpio_irq_update_bits(s + unsigned long flags; + u32 tmp; + +- spin_lock_irqsave(&ctl->lock, flags); ++ raw_spin_lock_irqsave(&ctl->lock, flags); + + tmp = readl_relaxed(ctl->base + reg); + tmp &= ~mask; + tmp |= val; + writel_relaxed(tmp, ctl->base + reg); + +- spin_unlock_irqrestore(&ctl->lock, flags); ++ raw_spin_unlock_irqrestore(&ctl->lock, flags); + } + + static void meson_gpio_irq_init_dummy(struct meson_gpio_irq_controller *ctl) +@@ -234,12 +234,12 @@ meson_gpio_irq_request_channel(struct me + unsigned long flags; + unsigned int idx; + +- spin_lock_irqsave(&ctl->lock, flags); ++ raw_spin_lock_irqsave(&ctl->lock, flags); + + /* Find a free channel */ + idx = find_first_zero_bit(ctl->channel_map, ctl->params->nr_channels); + if (idx >= ctl->params->nr_channels) { +- spin_unlock_irqrestore(&ctl->lock, flags); ++ raw_spin_unlock_irqrestore(&ctl->lock, flags); + pr_err("No channel available\n"); + return -ENOSPC; + } +@@ -247,7 +247,7 @@ meson_gpio_irq_request_channel(struct me + /* Mark the channel as used */ + set_bit(idx, ctl->channel_map); + +- spin_unlock_irqrestore(&ctl->lock, flags); ++ raw_spin_unlock_irqrestore(&ctl->lock, flags); + + /* + * Setup the mux of the channel to route the signal of the pad +@@ -557,7 +557,7 @@ static int meson_gpio_irq_of_init(struct + if (!ctl) + return -ENOMEM; + +- spin_lock_init(&ctl->lock); ++ raw_spin_lock_init(&ctl->lock); + + ctl->base = of_iomap(node, 0); + if (!ctl->base) { diff --git a/queue-6.1/profiling-remove-profile-sleep-support.patch b/queue-6.1/profiling-remove-profile-sleep-support.patch new file mode 100644 index 00000000000..8511f90c9b8 --- /dev/null +++ b/queue-6.1/profiling-remove-profile-sleep-support.patch @@ -0,0 +1,125 @@ +From b88f55389ad27f05ed84af9e1026aa64dbfabc9a Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Sun, 4 Aug 2024 18:48:10 +0900 +Subject: profiling: remove profile=sleep support + +From: Tetsuo Handa + +commit b88f55389ad27f05ed84af9e1026aa64dbfabc9a upstream. + +The kernel sleep profile is no longer working due to a recursive locking +bug introduced by commit 42a20f86dc19 ("sched: Add wrapper for get_wchan() +to keep task blocked") + +Booting with the 'profile=sleep' kernel command line option added or +executing + + # echo -n sleep > /sys/kernel/profiling + +after boot causes the system to lock up. + +Lockdep reports + + kthreadd/3 is trying to acquire lock: + ffff93ac82e08d58 (&p->pi_lock){....}-{2:2}, at: get_wchan+0x32/0x70 + + but task is already holding lock: + ffff93ac82e08d58 (&p->pi_lock){....}-{2:2}, at: try_to_wake_up+0x53/0x370 + +with the call trace being + + lock_acquire+0xc8/0x2f0 + get_wchan+0x32/0x70 + __update_stats_enqueue_sleeper+0x151/0x430 + enqueue_entity+0x4b0/0x520 + enqueue_task_fair+0x92/0x6b0 + ttwu_do_activate+0x73/0x140 + try_to_wake_up+0x213/0x370 + swake_up_locked+0x20/0x50 + complete+0x2f/0x40 + kthread+0xfb/0x180 + +However, since nobody noticed this regression for more than two years, +let's remove 'profile=sleep' support based on the assumption that nobody +needs this functionality. + +Fixes: 42a20f86dc19 ("sched: Add wrapper for get_wchan() to keep task blocked") +Cc: stable@vger.kernel.org # v5.16+ +Signed-off-by: Tetsuo Handa +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 4 +--- + include/linux/profile.h | 1 - + kernel/profile.c | 11 +---------- + kernel/sched/stats.c | 10 ---------- + 4 files changed, 2 insertions(+), 24 deletions(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4556,11 +4556,9 @@ + + profile= [KNL] Enable kernel profiling via /proc/profile + Format: [,] +- Param: : "schedule", "sleep", or "kvm" ++ Param: : "schedule" or "kvm" + [defaults to kernel profiling] + Param: "schedule" - profile schedule points. +- Param: "sleep" - profile D-state sleeping (millisecs). +- Requires CONFIG_SCHEDSTATS + Param: "kvm" - profile VM exits. + Param: - step/bucket size as a power of 2 for + statistical time based profiling. +--- a/include/linux/profile.h ++++ b/include/linux/profile.h +@@ -11,7 +11,6 @@ + + #define CPU_PROFILING 1 + #define SCHED_PROFILING 2 +-#define SLEEP_PROFILING 3 + #define KVM_PROFILING 4 + + struct proc_dir_entry; +--- a/kernel/profile.c ++++ b/kernel/profile.c +@@ -57,20 +57,11 @@ static DEFINE_MUTEX(profile_flip_mutex); + int profile_setup(char *str) + { + static const char schedstr[] = "schedule"; +- static const char sleepstr[] = "sleep"; + static const char kvmstr[] = "kvm"; + const char *select = NULL; + int par; + +- if (!strncmp(str, sleepstr, strlen(sleepstr))) { +-#ifdef CONFIG_SCHEDSTATS +- force_schedstat_enabled(); +- prof_on = SLEEP_PROFILING; +- select = sleepstr; +-#else +- pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n"); +-#endif /* CONFIG_SCHEDSTATS */ +- } else if (!strncmp(str, schedstr, strlen(schedstr))) { ++ if (!strncmp(str, schedstr, strlen(schedstr))) { + prof_on = SCHED_PROFILING; + select = schedstr; + } else if (!strncmp(str, kvmstr, strlen(kvmstr))) { +--- a/kernel/sched/stats.c ++++ b/kernel/sched/stats.c +@@ -92,16 +92,6 @@ void __update_stats_enqueue_sleeper(stru + + trace_sched_stat_blocked(p, delta); + +- /* +- * Blocking time is in units of nanosecs, so shift by +- * 20 to get a milliseconds-range estimation of the +- * amount of time that the task spent sleeping: +- */ +- if (unlikely(prof_on == SLEEP_PROFILING)) { +- profile_hits(SLEEP_PROFILING, +- (void *)get_wchan(p), +- delta >> 20); +- } + account_scheduler_latency(p, delta >> 10, 0); + } + } diff --git a/queue-6.1/sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch b/queue-6.1/sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch new file mode 100644 index 00000000000..d4cedea8a8e --- /dev/null +++ b/queue-6.1/sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch @@ -0,0 +1,58 @@ +From 77baa5bafcbe1b2a15ef9c37232c21279c95481c Mon Sep 17 00:00:00 2001 +From: Zheng Zucheng +Date: Fri, 26 Jul 2024 02:32:35 +0000 +Subject: sched/cputime: Fix mul_u64_u64_div_u64() precision for cputime + +From: Zheng Zucheng + +commit 77baa5bafcbe1b2a15ef9c37232c21279c95481c upstream. + +In extreme test scenarios: +the 14th field utime in /proc/xx/stat is greater than sum_exec_runtime, +utime = 18446744073709518790 ns, rtime = 135989749728000 ns + +In cputime_adjust() process, stime is greater than rtime due to +mul_u64_u64_div_u64() precision problem. +before call mul_u64_u64_div_u64(), +stime = 175136586720000, rtime = 135989749728000, utime = 1416780000. +after call mul_u64_u64_div_u64(), +stime = 135989949653530 + +unsigned reversion occurs because rtime is less than stime. +utime = rtime - stime = 135989749728000 - 135989949653530 + = -199925530 + = (u64)18446744073709518790 + +Trigger condition: + 1). User task run in kernel mode most of time + 2). ARM64 architecture + 3). TICK_CPU_ACCOUNTING=y + CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set + +Fix mul_u64_u64_div_u64() conversion precision by reset stime to rtime + +Fixes: 3dc167ba5729 ("sched/cputime: Improve cputime_adjust()") +Signed-off-by: Zheng Zucheng +Signed-off-by: Peter Zijlstra (Intel) +Cc: +Link: https://lkml.kernel.org/r/20240726023235.217771-1-zhengzucheng@huawei.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/cputime.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/kernel/sched/cputime.c ++++ b/kernel/sched/cputime.c +@@ -591,6 +591,12 @@ void cputime_adjust(struct task_cputime + } + + stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); ++ /* ++ * Because mul_u64_u64_div_u64() can approximate on some ++ * achitectures; enforce the constraint that: a*b/(b+c) <= a. ++ */ ++ if (unlikely(stime > rtime)) ++ stime = rtime; + + update: + /* diff --git a/queue-6.1/scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch b/queue-6.1/scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch new file mode 100644 index 00000000000..5c27f4171a7 --- /dev/null +++ b/queue-6.1/scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch @@ -0,0 +1,96 @@ +From 82dbb57ac8d06dfe8227ba9ab11a49de2b475ae5 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Fri, 19 Jul 2024 16:39:12 +0900 +Subject: scsi: mpt3sas: Avoid IOMMU page faults on REPORT ZONES + +From: Damien Le Moal + +commit 82dbb57ac8d06dfe8227ba9ab11a49de2b475ae5 upstream. + +Some firmware versions of the 9600 series SAS HBA byte-swap the REPORT +ZONES command reply buffer from ATA-ZAC devices by directly accessing the +buffer in the host memory. This does not respect the default command DMA +direction and causes IOMMU page faults on architectures with an IOMMU +enforcing write-only mappings for DMA_FROM_DEVICE DMA driection (e.g. AMD +hosts). + +scsi 18:0:0:0: Direct-Access-ZBC ATA WDC WSH722020AL W870 PQ: 0 ANSI: 6 +scsi 18:0:0:0: SATA: handle(0x0027), sas_addr(0x300062b2083e7c40), phy(0), device_name(0x5000cca29dc35e11) +scsi 18:0:0:0: enclosure logical id (0x300062b208097c40), slot(0) +scsi 18:0:0:0: enclosure level(0x0000), connector name( C0.0) +scsi 18:0:0:0: atapi(n), ncq(y), asyn_notify(n), smart(y), fua(y), sw_preserve(y) +scsi 18:0:0:0: qdepth(32), tagged(1), scsi_level(7), cmd_que(1) +sd 18:0:0:0: Attached scsi generic sg2 type 20 +sd 18:0:0:0: [sdc] Host-managed zoned block device +mpt3sas 0000:41:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0xfff9b200 flags=0x0050] +mpt3sas 0000:41:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0xfff9b300 flags=0x0050] +mpt3sas_cm0: mpt3sas_ctl_pre_reset_handler: Releasing the trace buffer due to adapter reset. +mpt3sas_cm0 fault info from func: mpt3sas_base_make_ioc_ready +mpt3sas_cm0: fault_state(0x2666)! +mpt3sas_cm0: sending diag reset !! +mpt3sas_cm0: diag reset: SUCCESS +sd 18:0:0:0: [sdc] REPORT ZONES start lba 0 failed +sd 18:0:0:0: [sdc] REPORT ZONES: Result: hostbyte=DID_RESET driverbyte=DRIVER_OK +sd 18:0:0:0: [sdc] 0 4096-byte logical blocks: (0 B/0 B) + +Avoid such issue by always mapping the buffer of REPORT ZONES commands +using DMA_BIDIRECTIONAL (read+write IOMMU mapping). This is done by +introducing the helper function _base_scsi_dma_map() and using this helper +in _base_build_sg_scmd() and _base_build_sg_scmd_ieee() instead of calling +directly scsi_dma_map(). + +Fixes: 471ef9d4e498 ("mpt3sas: Build MPI SGL LIST on GEN2 HBAs and IEEE SGL LIST on GEN3 HBAs") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Link: https://lore.kernel.org/r/20240719073913.179559-3-dlemoal@kernel.org +Reviewed-by: Christoph Hellwig +Reviewed-by: Johannes Thumshirn +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -2672,6 +2672,22 @@ _base_build_zero_len_sge_ieee(struct MPT + _base_add_sg_single_ieee(paddr, sgl_flags, 0, 0, -1); + } + ++static inline int _base_scsi_dma_map(struct scsi_cmnd *cmd) ++{ ++ /* ++ * Some firmware versions byte-swap the REPORT ZONES command reply from ++ * ATA-ZAC devices by directly accessing in the host buffer. This does ++ * not respect the default command DMA direction and causes IOMMU page ++ * faults on some architectures with an IOMMU enforcing write mappings ++ * (e.g. AMD hosts). Avoid such issue by making the report zones buffer ++ * mapping bi-directional. ++ */ ++ if (cmd->cmnd[0] == ZBC_IN && cmd->cmnd[1] == ZI_REPORT_ZONES) ++ cmd->sc_data_direction = DMA_BIDIRECTIONAL; ++ ++ return scsi_dma_map(cmd); ++} ++ + /** + * _base_build_sg_scmd - main sg creation routine + * pcie_device is unused here! +@@ -2718,7 +2734,7 @@ _base_build_sg_scmd(struct MPT3SAS_ADAPT + sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; + + sg_scmd = scsi_sglist(scmd); +- sges_left = scsi_dma_map(scmd); ++ sges_left = _base_scsi_dma_map(scmd); + if (sges_left < 0) + return -ENOMEM; + +@@ -2862,7 +2878,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ + } + + sg_scmd = scsi_sglist(scmd); +- sges_left = scsi_dma_map(scmd); ++ sges_left = _base_scsi_dma_map(scmd); + if (sges_left < 0) + return -ENOMEM; + diff --git a/queue-6.1/series b/queue-6.1/series index da54b5dd0f8..7597c02d514 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -51,3 +51,8 @@ s390-sclp-prevent-release-of-buffer-in-i-o.patch sunrpc-fix-a-race-to-wake-a-sync-task.patch bus-mhi-host-pci_generic-add-support-for-telit-fe990-modem.patch revert-bpftool-mount-bpffs-when-pinmaps-path-not-under-the-bpffs.patch +profiling-remove-profile-sleep-support.patch +scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch +irqchip-meson-gpio-convert-meson_gpio_irq_controller-lock-to-raw_spinlock_t.patch +irqchip-loongarch-cpu-fix-return-value-of-lpic_gsi_to_irq.patch +sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch -- 2.47.3