Furthermore, NMI handlers can be interrupted by what appear to RCU to be
normal interrupts. One way that this can happen is for code that
-directly invokes rcu_irq_enter() and rcu_irq_exit() to be called
+directly invokes ct_irq_enter() and ct_irq_exit() to be called
from an NMI handler. This astonishing fact of life prompted the current
-code structure, which has rcu_irq_enter() invoking
-rcu_nmi_enter() and rcu_irq_exit() invoking rcu_nmi_exit().
+code structure, which has ct_irq_enter() invoking
+rcu_nmi_enter() and ct_irq_exit() invoking rcu_nmi_exit().
And yes, I also learned of this requirement the hard way.
Loadable Modules
sections, and RCU believes this CPU to be idle, no problem. This
sort of thing is used by some architectures for light-weight
exception handlers, which can then avoid the overhead of
- rcu_irq_enter() and rcu_irq_exit() at exception entry and
+ ct_irq_enter() and ct_irq_exit() at exception entry and
exit, respectively. Some go further and avoid the entireties of
irq_enter() and irq_exit().
Just make very sure you are running some of your tests with
+-----------------------------------------------------------------------+
| **Answer**: |
+-----------------------------------------------------------------------+
-| One approach is to do ``rcu_irq_exit();rcu_irq_enter();`` every so |
+| One approach is to do ``ct_irq_exit();ct_irq_enter();`` every so |
| often. But given that long-running interrupt handlers can cause other |
| problems, not least for response time, shouldn't you work to keep |
| your interrupt handler's runtime within reasonable bounds? |
- A low-level kernel issue that either fails to invoke one of the
variants of rcu_user_enter(), rcu_user_exit(), ct_idle_enter(),
- ct_idle_exit(), rcu_irq_enter(), or rcu_irq_exit() on the one
+ ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one
hand, or that invokes one of them too many times on the other.
Historically, the most frequent issue has been an omission
of either irq_enter() or irq_exit(), which in turn invoke
- rcu_irq_enter() or rcu_irq_exit(), respectively. Building your
+ ct_irq_enter() or ct_irq_exit(), respectively. Building your
kernel with CONFIG_RCU_EQS_DEBUG=y can help track down these types
of issues, which sometimes arise in architecture-specific code.
Syscalls need to be wrapped inside user_exit()-user_enter(), either
optimized behind static key or through the slow path using TIF_NOHZ
flag. Exceptions handlers must be wrapped as well. Irqs are already
- protected inside rcu_irq_enter/rcu_irq_exit() but preemption or signal
+ protected inside ct_irq_enter/ct_irq_exit() but preemption or signal
handling on irq exit still need to be protected.
config HAVE_CONTEXT_TRACKING_USER_OFFSTACK
if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
lockdep_hardirqs_off(CALLER_ADDR0);
- rcu_irq_enter();
+ ct_irq_enter();
trace_hardirqs_off_finish();
regs->exit_rcu = true;
if (regs->exit_rcu) {
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
- rcu_irq_exit();
+ ct_irq_exit();
lockdep_hardirqs_on(CALLER_ADDR0);
return;
}
trace_hardirqs_on();
} else {
if (regs->exit_rcu)
- rcu_irq_exit();
+ ct_irq_exit();
}
}
/*
* Entry handling for valid #PF from kernel mode is slightly
- * different: RCU is already watching and rcu_irq_enter() must not
+ * different: RCU is already watching and ct_irq_enter() must not
* be invoked because a kernel fault on a user space address might
* sleep.
*
return -1;
/* Do runtime PM to manage a hierarchical CPU toplogy. */
- rcu_irq_enter_irqson();
+ ct_irq_enter_irqson();
if (s2idle)
dev_pm_genpd_suspend(pd_dev);
else
pm_runtime_put_sync_suspend(pd_dev);
- rcu_irq_exit_irqson();
+ ct_irq_exit_irqson();
state = psci_get_domain_state();
if (!state)
ret = psci_cpu_suspend_enter(state) ? -1 : idx;
- rcu_irq_enter_irqson();
+ ct_irq_enter_irqson();
if (s2idle)
dev_pm_genpd_resume(pd_dev);
else
pm_runtime_get_sync(pd_dev);
- rcu_irq_exit_irqson();
+ ct_irq_exit_irqson();
cpu_pm_exit();
return -1;
/* Do runtime PM to manage a hierarchical CPU toplogy. */
- rcu_irq_enter_irqson();
+ ct_irq_enter_irqson();
if (s2idle)
dev_pm_genpd_suspend(pd_dev);
else
pm_runtime_put_sync_suspend(pd_dev);
- rcu_irq_exit_irqson();
+ ct_irq_exit_irqson();
if (sbi_is_domain_state_available())
state = sbi_get_domain_state();
ret = sbi_suspend(state) ? -1 : idx;
- rcu_irq_enter_irqson();
+ ct_irq_enter_irqson();
if (s2idle)
dev_pm_genpd_resume(pd_dev);
else
pm_runtime_get_sync(pd_dev);
- rcu_irq_exit_irqson();
+ ct_irq_exit_irqson();
cpu_pm_exit();
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CONTEXT_TRACKING_IRQ_H
+#define _LINUX_CONTEXT_TRACKING_IRQ_H
+
+#ifdef CONFIG_CONTEXT_TRACKING_IDLE
+void ct_irq_enter(void);
+void ct_irq_exit(void);
+void ct_irq_enter_irqson(void);
+void ct_irq_exit_irqson(void);
+#else
+static inline void ct_irq_enter(void) { }
+static inline void ct_irq_exit(void) { }
+static inline void ct_irq_enter_irqson(void) { }
+static inline void ct_irq_exit_irqson(void) { }
+#endif
+
+#endif
#include <linux/percpu.h>
#include <linux/static_key.h>
+#include <linux/context_tracking_irq.h>
struct context_tracking {
/*
/**
* struct irqentry_state - Opaque object for exception state storage
* @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
- * exit path has to invoke rcu_irq_exit().
+ * exit path has to invoke ct_irq_exit().
* @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
* lockdep state is restored correctly on exit from nmi.
*
*
* For kernel mode entries RCU handling is done conditional. If RCU is
* watching then the only RCU requirement is to check whether the tick has
- * to be restarted. If RCU is not watching then rcu_irq_enter() has to be
- * invoked on entry and rcu_irq_exit() on exit.
+ * to be restarted. If RCU is not watching then ct_irq_enter() has to be
+ * invoked on entry and ct_irq_exit() on exit.
*
- * Avoiding the rcu_irq_enter/exit() calls is an optimization but also
+ * Avoiding the ct_irq_enter/exit() calls is an optimization but also
* solves the problem of kernel mode pagefaults which can schedule, which
- * is not possible after invoking rcu_irq_enter() without undoing it.
+ * is not possible after invoking ct_irq_enter() without undoing it.
*
* For user mode entries irqentry_enter_from_user_mode() is invoked to
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
#include <linux/lockdep.h>
#include <asm/processor.h>
#include <linux/cpumask.h>
+#include <linux/context_tracking_irq.h>
#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
*/
#define RCU_NONIDLE(a) \
do { \
- rcu_irq_enter_irqson(); \
+ ct_irq_enter_irqson(); \
do { a; } while (0); \
- rcu_irq_exit_irqson(); \
+ ct_irq_exit_irqson(); \
} while (0)
/*
*/ \
if (rcuidle) { \
__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
- rcu_irq_enter_irqson(); \
+ ct_irq_enter_irqson(); \
} \
\
__DO_TRACE_CALL(name, TP_ARGS(args)); \
\
if (rcuidle) { \
- rcu_irq_exit_irqson(); \
+ ct_irq_exit_irqson(); \
srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
} \
\
rcu_idle = !rcu_is_watching();
if (rcu_idle) {
local_irq_save(flags);
- rcu_irq_enter();
+ ct_irq_enter();
}
if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
fn = find_module_check_fn(ptr);
if (rcu_idle) {
- rcu_irq_exit();
+ ct_irq_exit();
local_irq_restore(flags);
}
rcu_idle_exit();
}
EXPORT_SYMBOL_GPL(ct_idle_exit);
+
+noinstr void ct_irq_enter(void)
+{
+ rcu_irq_enter();
+}
+
+noinstr void ct_irq_exit(void)
+{
+ rcu_irq_exit();
+}
+
+void ct_irq_enter_irqson(void)
+{
+ rcu_irq_enter_irqson();
+}
+
+void ct_irq_exit_irqson(void)
+{
+ rcu_irq_exit_irqson();
+}
#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
#ifdef CONFIG_CONTEXT_TRACKING_USER
* At this stage, only low level arch entry code remains and
* then we'll run in userspace. We can assume there won't be
* any RCU read-side critical section until the next call to
- * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
+ * user_exit() or ct_irq_enter(). Let's remove RCU's dependency
* on the tick.
*/
if (state == CONTEXT_USER) {
/*
* Some contexts may involve an exception occuring in an irq,
* leading to that nesting:
- * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+ * ct_irq_enter() rcu_user_exit() rcu_user_exit() ct_irq_exit()
* This would mess up the dyntick_nesting count though. And rcu_irq_*()
* helpers are enough to protect RCU uses inside the exception. So
* just return immediately if we detect we are in an IRQ.
* disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know
* this.
*/
- rcu_irq_enter_irqson();
+ ct_irq_enter_irqson();
rcu_read_lock();
ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL);
rcu_read_unlock();
- rcu_irq_exit_irqson();
+ ct_irq_exit_irqson();
return notifier_to_errno(ret);
}
unsigned long flags;
int ret;
- rcu_irq_enter_irqson();
+ ct_irq_enter_irqson();
raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL);
raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
- rcu_irq_exit_irqson();
+ ct_irq_exit_irqson();
return notifier_to_errno(ret);
}
}
/*
- * If this entry hit the idle task invoke rcu_irq_enter() whether
+ * If this entry hit the idle task invoke ct_irq_enter() whether
* RCU is watching or not.
*
* Interrupts can nest when the first interrupt invokes softirq
* not nested into another interrupt.
*
* Checking for rcu_is_watching() here would prevent the nesting
- * interrupt to invoke rcu_irq_enter(). If that nested interrupt is
+ * interrupt to invoke ct_irq_enter(). If that nested interrupt is
* the tick then rcu_flavor_sched_clock_irq() would wrongfully
* assume that it is the first interrupt and eventually claim
* quiescent state and end grace periods prematurely.
*
- * Unconditionally invoke rcu_irq_enter() so RCU state stays
+ * Unconditionally invoke ct_irq_enter() so RCU state stays
* consistent.
*
* TINY_RCU does not support EQS, so let the compiler eliminate
* as in irqentry_enter_from_user_mode().
*/
lockdep_hardirqs_off(CALLER_ADDR0);
- rcu_irq_enter();
+ ct_irq_enter();
instrumentation_begin();
trace_hardirqs_off_finish();
instrumentation_end();
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
instrumentation_end();
- rcu_irq_exit();
+ ct_irq_exit();
lockdep_hardirqs_on(CALLER_ADDR0);
return;
}
* was not watching on entry.
*/
if (state.exit_rcu)
- rcu_irq_exit();
+ ct_irq_exit();
}
}
*/
void irq_enter(void)
{
- rcu_irq_enter();
+ ct_irq_enter();
irq_enter_rcu();
}
void irq_exit(void)
{
__irq_exit_rcu();
- rcu_irq_exit();
+ ct_irq_exit();
/* must be last! */
lockdep_hardirq_exit();
}
/*
* When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
* but if the above rcu_is_watching() failed, then the NMI
- * triggered someplace critical, and rcu_irq_enter() should
+ * triggered someplace critical, and ct_irq_enter() should
* not be called from NMI.
*/
if (unlikely(in_nmi()))
return;
- rcu_irq_enter_irqson();
+ ct_irq_enter_irqson();
__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
- rcu_irq_exit_irqson();
+ ct_irq_exit_irqson();
}
/**