t->rseq.event.ids_changed = true;
}
-/*
- * Invoked from switch_mm_cid() in context switch when the task gets a MM
- * CID assigned.
- *
- * This does not raise TIF_NOTIFY_RESUME as that happens in
- * rseq_sched_switch_event().
- */
-static __always_inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid)
-{
- /*
- * Requires a comparison as the switch_mm_cid() code does not
- * provide a conditional for it readily. So avoid excessive updates
- * when nothing changes.
- */
- if (t->rseq.ids.mm_cid != cid)
- t->rseq.event.ids_changed = true;
-}
-
/* Enforce a full update after RSEQ registration and when execve() failed */
static inline void rseq_force_update(void)
{
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_sched_switch_event(struct task_struct *t) { }
static inline void rseq_sched_set_ids_changed(struct task_struct *t) { }
-static inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid) { }
static inline void rseq_force_update(void) { }
static inline void rseq_virt_userspace_exit(void) { }
static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { }
}
}
- switch_mm_cid(prev, next);
+ mm_cid_switch_to(prev, next);
/*
* Tell rseq that the task was scheduled in. Must be after
return true;
}
-static void __maybe_unused mm_cid_fixup_tasks_to_cpus(void)
+static void mm_cid_fixup_tasks_to_cpus(void)
{
struct mm_struct *mm = current->mm;
struct task_struct *p, *t;
void sched_mm_cid_fork(struct task_struct *t)
{
struct mm_struct *mm = t->mm;
+ bool percpu;
WARN_ON_ONCE(!mm || t->mm_cid.cid != MM_CID_UNSET);
guard(mutex)(&mm->mm_cid.mutex);
- scoped_guard(raw_spinlock, &mm->mm_cid.lock) {
- sched_mm_cid_add_user(t, mm);
- /* Preset last_cid for mm_cid_select() */
- t->mm_cid.last_cid = mm->mm_cid.max_cids - 1;
+ scoped_guard(raw_spinlock_irq, &mm->mm_cid.lock) {
+ struct mm_cid_pcpu *pcp = this_cpu_ptr(mm->mm_cid.pcpu);
+
+ /* First user ? */
+ if (!mm->mm_cid.users) {
+ sched_mm_cid_add_user(t, mm);
+ t->mm_cid.cid = mm_get_cid(mm);
+ /* Required for execve() */
+ pcp->cid = t->mm_cid.cid;
+ return;
+ }
+
+ if (!sched_mm_cid_add_user(t, mm)) {
+ if (!mm->mm_cid.percpu)
+ t->mm_cid.cid = mm_get_cid(mm);
+ return;
+ }
+
+ /* Handle the mode change and transfer current's CID */
+ percpu = !!mm->mm_cid.percpu;
+ if (!percpu)
+ mm_cid_transit_to_task(current, pcp);
+ else
+ mm_cid_transfer_to_cpu(current, pcp);
+ }
+
+ if (percpu) {
+ mm_cid_fixup_tasks_to_cpus();
+ } else {
+ mm_cid_fixup_cpus_to_tasks(mm);
+ t->mm_cid.cid = mm_get_cid(mm);
}
}
static bool sched_mm_cid_remove_user(struct task_struct *t)
{
t->mm_cid.active = 0;
- mm_unset_cid_on_task(t);
+ scoped_guard(preempt) {
+ /* Clear the transition bit */
+ t->mm_cid.cid = cid_from_transit_cid(t->mm_cid.cid);
+ mm_unset_cid_on_task(t);
+ }
t->mm->mm_cid.users--;
return mm_update_max_cids(t->mm);
}
+static bool __sched_mm_cid_exit(struct task_struct *t)
+{
+ struct mm_struct *mm = t->mm;
+
+ if (!sched_mm_cid_remove_user(t))
+ return false;
+ /*
+ * Contrary to fork() this only deals with a switch back to per
+ * task mode either because the above decreased users or an
+ * affinity change increased the number of allowed CPUs and the
+ * deferred fixup did not run yet.
+ */
+ if (WARN_ON_ONCE(mm->mm_cid.percpu))
+ return false;
+ /*
+ * A failed fork(2) cleanup never gets here, so @current must have
+ * the same MM as @t. That's true for exit() and the failed
+ * pthread_create() cleanup case.
+ */
+ if (WARN_ON_ONCE(current->mm != mm))
+ return false;
+ return true;
+}
+
/*
* When a task exits, the MM CID held by the task is not longer required as
* the task cannot return to user space.
if (!mm || !t->mm_cid.active)
return;
+ /*
+ * Ensure that only one instance is doing MM CID operations within
+ * a MM. The common case is uncontended. The rare fixup case adds
+ * some overhead.
+ */
+ scoped_guard(mutex, &mm->mm_cid.mutex) {
+ /* mm_cid::mutex is sufficient to protect mm_cid::users */
+ if (likely(mm->mm_cid.users > 1)) {
+ scoped_guard(raw_spinlock_irq, &mm->mm_cid.lock) {
+ if (!__sched_mm_cid_exit(t))
+ return;
+ /* Mode change required. Transfer currents CID */
+ mm_cid_transit_to_task(current, this_cpu_ptr(mm->mm_cid.pcpu));
+ }
+ mm_cid_fixup_cpus_to_tasks(mm);
+ return;
+ }
+ /* Last user */
+ scoped_guard(raw_spinlock_irq, &mm->mm_cid.lock) {
+ /* Required across execve() */
+ if (t == current)
+ mm_cid_transit_to_task(t, this_cpu_ptr(mm->mm_cid.pcpu));
+ /* Ignore mode change. There is nothing to do. */
+ sched_mm_cid_remove_user(t);
+ }
+ }
- guard(mutex)(&mm->mm_cid.mutex);
- scoped_guard(raw_spinlock, &mm->mm_cid.lock)
- sched_mm_cid_remove_user(t);
+ /*
+ * As this is the last user (execve(), process exit or failed
+ * fork(2)) there is no concurrency anymore.
+ *
+ * Synchronize eventually pending work to ensure that there are no
+ * dangling references left. @t->mm_cid.users is zero so nothing
+ * can queue this work anymore.
+ */
+ irq_work_sync(&mm->mm_cid.irq_work);
+ cancel_work_sync(&mm->mm_cid.work);
}
/* Deactivate MM CID allocation across execve() */
void sched_mm_cid_after_execve(struct task_struct *t)
{
sched_mm_cid_fork(t);
- guard(preempt)();
- mm_cid_select(t);
}
static void mm_cid_work_fn(struct work_struct *work)
{
struct mm_struct *mm = container_of(work, struct mm_struct, mm_cid.work);
- /* Make it compile, but not functional yet */
- if (!IS_ENABLED(CONFIG_NEW_MM_CID))
- return;
-
guard(mutex)(&mm->mm_cid.mutex);
/* Did the last user task exit already? */
if (!mm->mm_cid.users)
mm_cid_schedin(next);
}
-/* Active implementation */
-static inline void init_sched_mm_cid(struct task_struct *t)
-{
- struct mm_struct *mm = t->mm;
- unsigned int max_cid;
-
- if (!mm)
- return;
-
- /* Preset last_mm_cid */
- max_cid = min_t(int, READ_ONCE(mm->mm_cid.nr_cpus_allowed), atomic_read(&mm->mm_users));
- t->mm_cid.last_cid = max_cid - 1;
-}
-
-static inline bool __mm_cid_get(struct task_struct *t, unsigned int cid, unsigned int max_cids)
-{
- struct mm_struct *mm = t->mm;
-
- if (cid >= max_cids)
- return false;
- if (test_and_set_bit(cid, mm_cidmask(mm)))
- return false;
- t->mm_cid.cid = t->mm_cid.last_cid = cid;
- __this_cpu_write(mm->mm_cid.pcpu->cid, cid);
- return true;
-}
-
-static inline bool mm_cid_get(struct task_struct *t)
-{
- struct mm_struct *mm = t->mm;
- unsigned int max_cids;
-
- max_cids = READ_ONCE(mm->mm_cid.max_cids);
-
- /* Try to reuse the last CID of this task */
- if (__mm_cid_get(t, t->mm_cid.last_cid, max_cids))
- return true;
-
- /* Try to reuse the last CID of this mm on this CPU */
- if (__mm_cid_get(t, __this_cpu_read(mm->mm_cid.pcpu->cid), max_cids))
- return true;
-
- /* Try the first zero bit in the cidmask. */
- return __mm_cid_get(t, find_first_zero_bit(mm_cidmask(mm), num_possible_cpus()), max_cids);
-}
-
-static inline void mm_cid_select(struct task_struct *t)
-{
- /*
- * mm_cid_get() can fail when the maximum CID, which is determined
- * by min(mm->nr_cpus_allowed, mm->mm_users) changes concurrently.
- * That's a transient failure as there cannot be more tasks
- * concurrently on a CPU (or about to be scheduled in) than that.
- */
- for (;;) {
- if (mm_cid_get(t))
- break;
- }
-}
-
-static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next)
-{
- if (prev->mm_cid.active) {
- if (prev->mm_cid.cid != MM_CID_UNSET)
- clear_bit(prev->mm_cid.cid, mm_cidmask(prev->mm));
- prev->mm_cid.cid = MM_CID_UNSET;
- }
-
- if (next->mm_cid.active) {
- mm_cid_select(next);
- rseq_sched_set_task_mm_cid(next, next->mm_cid.cid);
- }
-}
-
#else /* !CONFIG_SCHED_MM_CID: */
-static inline void mm_cid_select(struct task_struct *t) { }
-static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { }
static inline void mm_cid_switch_to(struct task_struct *prev, struct task_struct *next) { }
#endif /* !CONFIG_SCHED_MM_CID */