--- /dev/null
+From b88f55389ad27f05ed84af9e1026aa64dbfabc9a Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Sun, 4 Aug 2024 18:48:10 +0900
+Subject: profiling: remove profile=sleep support
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+commit b88f55389ad27f05ed84af9e1026aa64dbfabc9a upstream.
+
+The kernel sleep profile is no longer working due to a recursive locking
+bug introduced by commit 42a20f86dc19 ("sched: Add wrapper for get_wchan()
+to keep task blocked")
+
+Booting with the 'profile=sleep' kernel command line option added or
+executing
+
+ # echo -n sleep > /sys/kernel/profiling
+
+after boot causes the system to lock up.
+
+Lockdep reports
+
+ kthreadd/3 is trying to acquire lock:
+ ffff93ac82e08d58 (&p->pi_lock){....}-{2:2}, at: get_wchan+0x32/0x70
+
+ but task is already holding lock:
+ ffff93ac82e08d58 (&p->pi_lock){....}-{2:2}, at: try_to_wake_up+0x53/0x370
+
+with the call trace being
+
+ lock_acquire+0xc8/0x2f0
+ get_wchan+0x32/0x70
+ __update_stats_enqueue_sleeper+0x151/0x430
+ enqueue_entity+0x4b0/0x520
+ enqueue_task_fair+0x92/0x6b0
+ ttwu_do_activate+0x73/0x140
+ try_to_wake_up+0x213/0x370
+ swake_up_locked+0x20/0x50
+ complete+0x2f/0x40
+ kthread+0xfb/0x180
+
+However, since nobody noticed this regression for more than two years,
+let's remove 'profile=sleep' support based on the assumption that nobody
+needs this functionality.
+
+Fixes: 42a20f86dc19 ("sched: Add wrapper for get_wchan() to keep task blocked")
+Cc: stable@vger.kernel.org # v5.16+
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt | 4 +---
+ include/linux/profile.h | 1 -
+ kernel/profile.c | 16 +---------------
+ kernel/sched/fair.c | 10 ----------
+ 4 files changed, 2 insertions(+), 29 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4355,11 +4355,9 @@
+
+ profile= [KNL] Enable kernel profiling via /proc/profile
+ Format: [<profiletype>,]<number>
+- Param: <profiletype>: "schedule", "sleep", or "kvm"
++ Param: <profiletype>: "schedule" or "kvm"
+ [defaults to kernel profiling]
+ Param: "schedule" - profile schedule points.
+- Param: "sleep" - profile D-state sleeping (millisecs).
+- Requires CONFIG_SCHEDSTATS
+ Param: "kvm" - profile VM exits.
+ Param: <number> - step/bucket size as a power of 2 for
+ statistical time based profiling.
+--- a/include/linux/profile.h
++++ b/include/linux/profile.h
+@@ -11,7 +11,6 @@
+
+ #define CPU_PROFILING 1
+ #define SCHED_PROFILING 2
+-#define SLEEP_PROFILING 3
+ #define KVM_PROFILING 4
+
+ struct proc_dir_entry;
+--- a/kernel/profile.c
++++ b/kernel/profile.c
+@@ -57,24 +57,10 @@ static DEFINE_MUTEX(profile_flip_mutex);
+ int profile_setup(char *str)
+ {
+ static const char schedstr[] = "schedule";
+- static const char sleepstr[] = "sleep";
+ static const char kvmstr[] = "kvm";
+ int par;
+
+- if (!strncmp(str, sleepstr, strlen(sleepstr))) {
+-#ifdef CONFIG_SCHEDSTATS
+- force_schedstat_enabled();
+- prof_on = SLEEP_PROFILING;
+- if (str[strlen(sleepstr)] == ',')
+- str += strlen(sleepstr) + 1;
+- if (get_option(&str, &par))
+- prof_shift = clamp(par, 0, BITS_PER_LONG - 1);
+- pr_info("kernel sleep profiling enabled (shift: %u)\n",
+- prof_shift);
+-#else
+- pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
+-#endif /* CONFIG_SCHEDSTATS */
+- } else if (!strncmp(str, schedstr, strlen(schedstr))) {
++ if (!strncmp(str, schedstr, strlen(schedstr))) {
+ prof_on = SCHED_PROFILING;
+ if (str[strlen(schedstr)] == ',')
+ str += strlen(schedstr) + 1;
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -988,16 +988,6 @@ update_stats_enqueue_sleeper(struct cfs_
+
+ trace_sched_stat_blocked(tsk, delta);
+
+- /*
+- * Blocking time is in units of nanosecs, so shift by
+- * 20 to get a milliseconds-range estimation of the
+- * amount of time that the task spent sleeping:
+- */
+- if (unlikely(prof_on == SLEEP_PROFILING)) {
+- profile_hits(SLEEP_PROFILING,
+- (void *)get_wchan(tsk),
+- delta >> 20);
+- }
+ account_scheduler_latency(tsk, delta >> 10, 0);
+ }
+ }
--- /dev/null
+From 77baa5bafcbe1b2a15ef9c37232c21279c95481c Mon Sep 17 00:00:00 2001
+From: Zheng Zucheng <zhengzucheng@huawei.com>
+Date: Fri, 26 Jul 2024 02:32:35 +0000
+Subject: sched/cputime: Fix mul_u64_u64_div_u64() precision for cputime
+
+From: Zheng Zucheng <zhengzucheng@huawei.com>
+
+commit 77baa5bafcbe1b2a15ef9c37232c21279c95481c upstream.
+
+In extreme test scenarios:
+the 14th field utime in /proc/xx/stat is greater than sum_exec_runtime,
+utime = 18446744073709518790 ns, rtime = 135989749728000 ns
+
+In cputime_adjust() process, stime is greater than rtime due to
+mul_u64_u64_div_u64() precision problem.
+before call mul_u64_u64_div_u64(),
+stime = 175136586720000, rtime = 135989749728000, utime = 1416780000.
+after call mul_u64_u64_div_u64(),
+stime = 135989949653530
+
+unsigned reversion occurs because rtime is less than stime.
+utime = rtime - stime = 135989749728000 - 135989949653530
+ = -199925530
+ = (u64)18446744073709518790
+
+Trigger condition:
+ 1). User task run in kernel mode most of time
+ 2). ARM64 architecture
+ 3). TICK_CPU_ACCOUNTING=y
+ CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
+
+Fix mul_u64_u64_div_u64() conversion precision by reset stime to rtime
+
+Fixes: 3dc167ba5729 ("sched/cputime: Improve cputime_adjust()")
+Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20240726023235.217771-1-zhengzucheng@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/cputime.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/kernel/sched/cputime.c
++++ b/kernel/sched/cputime.c
+@@ -577,6 +577,12 @@ void cputime_adjust(struct task_cputime
+ }
+
+ stime = mul_u64_u64_div_u64(stime, rtime, stime + utime);
++ /*
++ * Because mul_u64_u64_div_u64() can approximate on some
++ * achitectures; enforce the constraint that: a*b/(b+c) <= a.
++ */
++ if (unlikely(stime > rtime))
++ stime = rtime;
+
+ update:
+ /*
--- /dev/null
+From 82dbb57ac8d06dfe8227ba9ab11a49de2b475ae5 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Fri, 19 Jul 2024 16:39:12 +0900
+Subject: scsi: mpt3sas: Avoid IOMMU page faults on REPORT ZONES
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 82dbb57ac8d06dfe8227ba9ab11a49de2b475ae5 upstream.
+
+Some firmware versions of the 9600 series SAS HBA byte-swap the REPORT
+ZONES command reply buffer from ATA-ZAC devices by directly accessing the
+buffer in the host memory. This does not respect the default command DMA
+direction and causes IOMMU page faults on architectures with an IOMMU
+enforcing write-only mappings for DMA_FROM_DEVICE DMA driection (e.g. AMD
+hosts).
+
+scsi 18:0:0:0: Direct-Access-ZBC ATA WDC WSH722020AL W870 PQ: 0 ANSI: 6
+scsi 18:0:0:0: SATA: handle(0x0027), sas_addr(0x300062b2083e7c40), phy(0), device_name(0x5000cca29dc35e11)
+scsi 18:0:0:0: enclosure logical id (0x300062b208097c40), slot(0)
+scsi 18:0:0:0: enclosure level(0x0000), connector name( C0.0)
+scsi 18:0:0:0: atapi(n), ncq(y), asyn_notify(n), smart(y), fua(y), sw_preserve(y)
+scsi 18:0:0:0: qdepth(32), tagged(1), scsi_level(7), cmd_que(1)
+sd 18:0:0:0: Attached scsi generic sg2 type 20
+sd 18:0:0:0: [sdc] Host-managed zoned block device
+mpt3sas 0000:41:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0xfff9b200 flags=0x0050]
+mpt3sas 0000:41:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0xfff9b300 flags=0x0050]
+mpt3sas_cm0: mpt3sas_ctl_pre_reset_handler: Releasing the trace buffer due to adapter reset.
+mpt3sas_cm0 fault info from func: mpt3sas_base_make_ioc_ready
+mpt3sas_cm0: fault_state(0x2666)!
+mpt3sas_cm0: sending diag reset !!
+mpt3sas_cm0: diag reset: SUCCESS
+sd 18:0:0:0: [sdc] REPORT ZONES start lba 0 failed
+sd 18:0:0:0: [sdc] REPORT ZONES: Result: hostbyte=DID_RESET driverbyte=DRIVER_OK
+sd 18:0:0:0: [sdc] 0 4096-byte logical blocks: (0 B/0 B)
+
+Avoid such issue by always mapping the buffer of REPORT ZONES commands
+using DMA_BIDIRECTIONAL (read+write IOMMU mapping). This is done by
+introducing the helper function _base_scsi_dma_map() and using this helper
+in _base_build_sg_scmd() and _base_build_sg_scmd_ieee() instead of calling
+directly scsi_dma_map().
+
+Fixes: 471ef9d4e498 ("mpt3sas: Build MPI SGL LIST on GEN2 HBAs and IEEE SGL LIST on GEN3 HBAs")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Link: https://lore.kernel.org/r/20240719073913.179559-3-dlemoal@kernel.org
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/mpt3sas/mpt3sas_base.c | 20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
+@@ -2672,6 +2672,22 @@ _base_build_zero_len_sge_ieee(struct MPT
+ _base_add_sg_single_ieee(paddr, sgl_flags, 0, 0, -1);
+ }
+
++static inline int _base_scsi_dma_map(struct scsi_cmnd *cmd)
++{
++ /*
++ * Some firmware versions byte-swap the REPORT ZONES command reply from
++ * ATA-ZAC devices by directly accessing in the host buffer. This does
++ * not respect the default command DMA direction and causes IOMMU page
++ * faults on some architectures with an IOMMU enforcing write mappings
++ * (e.g. AMD hosts). Avoid such issue by making the report zones buffer
++ * mapping bi-directional.
++ */
++ if (cmd->cmnd[0] == ZBC_IN && cmd->cmnd[1] == ZI_REPORT_ZONES)
++ cmd->sc_data_direction = DMA_BIDIRECTIONAL;
++
++ return scsi_dma_map(cmd);
++}
++
+ /**
+ * _base_build_sg_scmd - main sg creation routine
+ * pcie_device is unused here!
+@@ -2718,7 +2734,7 @@ _base_build_sg_scmd(struct MPT3SAS_ADAPT
+ sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
+
+ sg_scmd = scsi_sglist(scmd);
+- sges_left = scsi_dma_map(scmd);
++ sges_left = _base_scsi_dma_map(scmd);
+ if (sges_left < 0)
+ return -ENOMEM;
+
+@@ -2862,7 +2878,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_
+ }
+
+ sg_scmd = scsi_sglist(scmd);
+- sges_left = scsi_dma_map(scmd);
++ sges_left = _base_scsi_dma_map(scmd);
+ if (sges_left < 0)
+ return -ENOMEM;
+
jbd2-avoid-memleak-in-jbd2_journal_write_metadata_bu.patch
s390-sclp-prevent-release-of-buffer-in-i-o.patch
sunrpc-fix-a-race-to-wake-a-sync-task.patch
+profiling-remove-profile-sleep-support.patch
+scsi-mpt3sas-avoid-iommu-page-faults-on-report-zones.patch
+sched-cputime-fix-mul_u64_u64_div_u64-precision-for-cputime.patch