--- /dev/null
+From bcdd64e90f964aecc64ba944654092aca540ae96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 16 Feb 2025 18:55:45 +0100
+Subject: irqchip/jcore-aic, clocksource/drivers/jcore: Fix jcore-pit interrupt
+ request
+
+From: Artur Rojek <contact@artur-rojek.eu>
+
+[ Upstream commit d7e3fd658248f257006227285095d190e70ee73a ]
+
+The jcore-aic irqchip does not have separate interrupt numbers reserved for
+cpu-local vs global interrupts. Therefore the device drivers need to
+request the given interrupt as per CPU interrupt.
+
+69a9dcbd2d65 ("clocksource/drivers/jcore: Use request_percpu_irq()")
+converted the clocksource driver over to request_percpu_irq(), but failed
+to do add all the required changes, resulting in a failure to register PIT
+interrupts.
+
+Fix this by:
+
+ 1) Explicitly mark the interrupt via irq_set_percpu_devid() in
+ jcore_pit_init().
+
+ 2) Enable and disable the per CPU interrupt in the CPU hotplug callbacks.
+
+ 3) Pass the correct per-cpu cookie to the irq handler by using
+ handle_percpu_devid_irq() instead of handle_percpu_irq() in
+ handle_jcore_irq().
+
+[ tglx: Massage change log ]
+
+Fixes: 69a9dcbd2d65 ("clocksource/drivers/jcore: Use request_percpu_irq()")
+Signed-off-by: Artur Rojek <contact@artur-rojek.eu>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Uros Bizjak <ubizjak@gmail.com>
+Link: https://lore.kernel.org/all/20250216175545.35079-3-contact@artur-rojek.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/clocksource/jcore-pit.c | 15 ++++++++++++++-
+ drivers/irqchip/irq-jcore-aic.c | 2 +-
+ 2 files changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/clocksource/jcore-pit.c b/drivers/clocksource/jcore-pit.c
+index a3fe98cd38382..82815428f8f92 100644
+--- a/drivers/clocksource/jcore-pit.c
++++ b/drivers/clocksource/jcore-pit.c
+@@ -114,6 +114,18 @@ static int jcore_pit_local_init(unsigned cpu)
+ pit->periodic_delta = DIV_ROUND_CLOSEST(NSEC_PER_SEC, HZ * buspd);
+
+ clockevents_config_and_register(&pit->ced, freq, 1, ULONG_MAX);
++ enable_percpu_irq(pit->ced.irq, IRQ_TYPE_NONE);
++
++ return 0;
++}
++
++static int jcore_pit_local_teardown(unsigned cpu)
++{
++ struct jcore_pit *pit = this_cpu_ptr(jcore_pit_percpu);
++
++ pr_info("Local J-Core PIT teardown on cpu %u\n", cpu);
++
++ disable_percpu_irq(pit->ced.irq);
+
+ return 0;
+ }
+@@ -168,6 +180,7 @@ static int __init jcore_pit_init(struct device_node *node)
+ return -ENOMEM;
+ }
+
++ irq_set_percpu_devid(pit_irq);
+ err = request_percpu_irq(pit_irq, jcore_timer_interrupt,
+ "jcore_pit", jcore_pit_percpu);
+ if (err) {
+@@ -237,7 +250,7 @@ static int __init jcore_pit_init(struct device_node *node)
+
+ cpuhp_setup_state(CPUHP_AP_JCORE_TIMER_STARTING,
+ "clockevents/jcore:starting",
+- jcore_pit_local_init, NULL);
++ jcore_pit_local_init, jcore_pit_local_teardown);
+
+ return 0;
+ }
+diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c
+index b9dcc8e78c750..1f613eb7b7f03 100644
+--- a/drivers/irqchip/irq-jcore-aic.c
++++ b/drivers/irqchip/irq-jcore-aic.c
+@@ -38,7 +38,7 @@ static struct irq_chip jcore_aic;
+ static void handle_jcore_irq(struct irq_desc *desc)
+ {
+ if (irqd_is_per_cpu(irq_desc_get_irq_data(desc)))
+- handle_percpu_irq(desc);
++ handle_percpu_devid_irq(desc);
+ else
+ handle_simple_irq(desc);
+ }
+--
+2.39.5
+
--- /dev/null
+From 1292c1d129beb769cf6dc701c3ecba58685c07ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Feb 2025 16:32:50 +0100
+Subject: sched: Compact RSEQ concurrency IDs with reduced threads and affinity
+
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+
+[ Upstream commit 02d954c0fdf91845169cdacc7405b120f90afe01 ]
+
+When a process reduces its number of threads or clears bits in its CPU
+affinity mask, the mm_cid allocation should eventually converge towards
+smaller values.
+
+However, the change introduced by:
+
+commit 7e019dcc470f ("sched: Improve cache locality of RSEQ concurrency
+IDs for intermittent workloads")
+
+adds a per-mm/CPU recent_cid which is never unset unless a thread
+migrates.
+
+This is a tradeoff between:
+
+A) Preserving cache locality after a transition from many threads to few
+ threads, or after reducing the hamming weight of the allowed CPU mask.
+
+B) Making the mm_cid upper bounds wrt nr threads and allowed CPU mask
+ easy to document and understand.
+
+C) Allowing applications to eventually react to mm_cid compaction after
+ reduction of the nr threads or allowed CPU mask, making the tracking
+ of mm_cid compaction easier by shrinking it back towards 0 or not.
+
+D) Making sure applications that periodically reduce and then increase
+ again the nr threads or allowed CPU mask still benefit from good
+ cache locality with mm_cid.
+
+Introduce the following changes:
+
+* After shrinking the number of threads or reducing the number of
+ allowed CPUs, reduce the value of max_nr_cid so expansion of CID
+ allocation will preserve cache locality if the number of threads or
+ allowed CPUs increase again.
+
+* Only re-use a recent_cid if it is within the max_nr_cid upper bound,
+ else find the first available CID.
+
+Fixes: 7e019dcc470f ("sched: Improve cache locality of RSEQ concurrency IDs for intermittent workloads")
+Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Gabriele Monaco <gmonaco@redhat.com>
+Link: https://lkml.kernel.org/r/20250210153253.460471-2-gmonaco@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mm_types.h | 7 ++++---
+ kernel/sched/sched.h | 25 ++++++++++++++++++++++---
+ 2 files changed, 26 insertions(+), 6 deletions(-)
+
+diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
+index 332cee2856620..14fc1b39c0cf3 100644
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -873,10 +873,11 @@ struct mm_struct {
+ */
+ unsigned int nr_cpus_allowed;
+ /**
+- * @max_nr_cid: Maximum number of concurrency IDs allocated.
++ * @max_nr_cid: Maximum number of allowed concurrency
++ * IDs allocated.
+ *
+- * Track the highest number of concurrency IDs allocated for the
+- * mm.
++ * Track the highest number of allowed concurrency IDs
++ * allocated for the mm.
+ */
+ atomic_t max_nr_cid;
+ /**
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 66744d60904d5..f3e121888d050 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -3666,10 +3666,28 @@ static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm)
+ {
+ struct cpumask *cidmask = mm_cidmask(mm);
+ struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
+- int cid = __this_cpu_read(pcpu_cid->recent_cid);
++ int cid, max_nr_cid, allowed_max_nr_cid;
+
++ /*
++ * After shrinking the number of threads or reducing the number
++ * of allowed cpus, reduce the value of max_nr_cid so expansion
++ * of cid allocation will preserve cache locality if the number
++ * of threads or allowed cpus increase again.
++ */
++ max_nr_cid = atomic_read(&mm->max_nr_cid);
++ while ((allowed_max_nr_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed),
++ atomic_read(&mm->mm_users))),
++ max_nr_cid > allowed_max_nr_cid) {
++ /* atomic_try_cmpxchg loads previous mm->max_nr_cid into max_nr_cid. */
++ if (atomic_try_cmpxchg(&mm->max_nr_cid, &max_nr_cid, allowed_max_nr_cid)) {
++ max_nr_cid = allowed_max_nr_cid;
++ break;
++ }
++ }
+ /* Try to re-use recent cid. This improves cache locality. */
+- if (!mm_cid_is_unset(cid) && !cpumask_test_and_set_cpu(cid, cidmask))
++ cid = __this_cpu_read(pcpu_cid->recent_cid);
++ if (!mm_cid_is_unset(cid) && cid < max_nr_cid &&
++ !cpumask_test_and_set_cpu(cid, cidmask))
+ return cid;
+ /*
+ * Expand cid allocation if the maximum number of concurrency
+@@ -3677,8 +3695,9 @@ static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm)
+ * and number of threads. Expanding cid allocation as much as
+ * possible improves cache locality.
+ */
+- cid = atomic_read(&mm->max_nr_cid);
++ cid = max_nr_cid;
+ while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) {
++ /* atomic_try_cmpxchg loads previous mm->max_nr_cid into cid. */
+ if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1))
+ continue;
+ if (!cpumask_test_and_set_cpu(cid, cidmask))
+--
+2.39.5
+