]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
context_tracking: Take IRQ eqs entrypoints over RCU
authorFrederic Weisbecker <frederic@kernel.org>
Wed, 8 Jun 2022 14:40:26 +0000 (16:40 +0200)
committerPaul E. McKenney <paulmck@kernel.org>
Tue, 5 Jul 2022 20:32:59 +0000 (13:32 -0700)
The RCU dynticks counter is going to be merged into the context tracking
subsystem. Prepare with moving the IRQ extended quiescent states
entrypoints to context tracking. For now those are dumb redirection to
existing RCU calls.

[ paulmck: Apply Stephen Rothwell feedback from -next. ]
[ paulmck: Apply Nathan Chancellor feedback. ]

Acked-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Cc: Uladzislau Rezki <uladzislau.rezki@sony.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Nicolas Saenz Julienne <nsaenz@kernel.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Cc: Yu Liao <liaoyu15@huawei.com>
Cc: Phil Auld <pauld@redhat.com>
Cc: Paul Gortmaker<paul.gortmaker@windriver.com>
Cc: Alex Belits <abelits@marvell.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Tested-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
18 files changed:
Documentation/RCU/Design/Requirements/Requirements.rst
Documentation/RCU/stallwarn.rst
arch/Kconfig
arch/arm64/kernel/entry-common.c
arch/x86/mm/fault.c
drivers/cpuidle/cpuidle-psci.c
drivers/cpuidle/cpuidle-riscv-sbi.c
include/linux/context_tracking_irq.h [new file with mode: 0644]
include/linux/context_tracking_state.h
include/linux/entry-common.h
include/linux/rcupdate.h
include/linux/tracepoint.h
kernel/cfi.c
kernel/context_tracking.c
kernel/cpu_pm.c
kernel/entry/common.c
kernel/softirq.c
kernel/trace/trace.c

index 04ed8bf27a0eae4086ad7219046074c8e6455e3b..074810c739367742f7a83d246974f140879018a3 100644 (file)
@@ -1844,10 +1844,10 @@ that meets this requirement.
 
 Furthermore, NMI handlers can be interrupted by what appear to RCU to be
 normal interrupts. One way that this can happen is for code that
-directly invokes rcu_irq_enter() and rcu_irq_exit() to be called
+directly invokes ct_irq_enter() and ct_irq_exit() to be called
 from an NMI handler. This astonishing fact of life prompted the current
-code structure, which has rcu_irq_enter() invoking
-rcu_nmi_enter() and rcu_irq_exit() invoking rcu_nmi_exit().
+code structure, which has ct_irq_enter() invoking
+rcu_nmi_enter() and ct_irq_exit() invoking rcu_nmi_exit().
 And yes, I also learned of this requirement the hard way.
 
 Loadable Modules
@@ -2195,7 +2195,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be:
    sections, and RCU believes this CPU to be idle, no problem. This
    sort of thing is used by some architectures for light-weight
    exception handlers, which can then avoid the overhead of
-   rcu_irq_enter() and rcu_irq_exit() at exception entry and
+   ct_irq_enter() and ct_irq_exit() at exception entry and
    exit, respectively. Some go further and avoid the entireties of
    irq_enter() and irq_exit().
    Just make very sure you are running some of your tests with
@@ -2226,7 +2226,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be:
 +-----------------------------------------------------------------------+
 | **Answer**:                                                           |
 +-----------------------------------------------------------------------+
-| One approach is to do ``rcu_irq_exit();rcu_irq_enter();`` every so    |
+| One approach is to do ``ct_irq_exit();ct_irq_enter();`` every so      |
 | often. But given that long-running interrupt handlers can cause other |
 | problems, not least for response time, shouldn't you work to keep     |
 | your interrupt handler's runtime within reasonable bounds?            |
index b95bda7755fa90846d4a8ba045748909851ae47a..ce1f58a9d954b3c878f76018428f5ba77efe57c7 100644 (file)
@@ -98,11 +98,11 @@ warnings:
 
 -      A low-level kernel issue that either fails to invoke one of the
        variants of rcu_user_enter(), rcu_user_exit(), ct_idle_enter(),
-       ct_idle_exit(), rcu_irq_enter(), or rcu_irq_exit() on the one
+       ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one
        hand, or that invokes one of them too many times on the other.
        Historically, the most frequent issue has been an omission
        of either irq_enter() or irq_exit(), which in turn invoke
-       rcu_irq_enter() or rcu_irq_exit(), respectively.  Building your
+       ct_irq_enter() or ct_irq_exit(), respectively.  Building your
        kernel with CONFIG_RCU_EQS_DEBUG=y can help track down these types
        of issues, which sometimes arise in architecture-specific code.
 
index 154b7b78da093f714a2a0f0f5bd5c5d4187425a8..342642be105fcbb43f80fab37e1bed4a3dfd6a5d 100644 (file)
@@ -782,7 +782,7 @@ config HAVE_CONTEXT_TRACKING_USER
          Syscalls need to be wrapped inside user_exit()-user_enter(), either
          optimized behind static key or through the slow path using TIF_NOHZ
          flag. Exceptions handlers must be wrapped as well. Irqs are already
-         protected inside rcu_irq_enter/rcu_irq_exit() but preemption or signal
+         protected inside ct_irq_enter/ct_irq_exit() but preemption or signal
          handling on irq exit still need to be protected.
 
 config HAVE_CONTEXT_TRACKING_USER_OFFSTACK
index 56cefd33eb8e9a1ecbdc247e8af06637c0327981..8dabe9ec10f1655b91bb1a1be44c5899f31a1020 100644 (file)
@@ -41,7 +41,7 @@ static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
 
        if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
                lockdep_hardirqs_off(CALLER_ADDR0);
-               rcu_irq_enter();
+               ct_irq_enter();
                trace_hardirqs_off_finish();
 
                regs->exit_rcu = true;
@@ -76,7 +76,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
                if (regs->exit_rcu) {
                        trace_hardirqs_on_prepare();
                        lockdep_hardirqs_on_prepare();
-                       rcu_irq_exit();
+                       ct_irq_exit();
                        lockdep_hardirqs_on(CALLER_ADDR0);
                        return;
                }
@@ -84,7 +84,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
                trace_hardirqs_on();
        } else {
                if (regs->exit_rcu)
-                       rcu_irq_exit();
+                       ct_irq_exit();
        }
 }
 
index fad8faa29d042d59ab9ae0f6d89d7aaee5b8a041..971977c438fc17194317e7910ba8389ac9c5b04d 100644 (file)
@@ -1526,7 +1526,7 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
 
        /*
         * Entry handling for valid #PF from kernel mode is slightly
-        * different: RCU is already watching and rcu_irq_enter() must not
+        * different: RCU is already watching and ct_irq_enter() must not
         * be invoked because a kernel fault on a user space address might
         * sleep.
         *
index 540105ca0781f1c3bfa42ec0b1f1fa84e5bec55c..57bc3e3ae3912a3af075446ec5e8f94484588a91 100644 (file)
@@ -69,12 +69,12 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
                return -1;
 
        /* Do runtime PM to manage a hierarchical CPU toplogy. */
-       rcu_irq_enter_irqson();
+       ct_irq_enter_irqson();
        if (s2idle)
                dev_pm_genpd_suspend(pd_dev);
        else
                pm_runtime_put_sync_suspend(pd_dev);
-       rcu_irq_exit_irqson();
+       ct_irq_exit_irqson();
 
        state = psci_get_domain_state();
        if (!state)
@@ -82,12 +82,12 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
 
        ret = psci_cpu_suspend_enter(state) ? -1 : idx;
 
-       rcu_irq_enter_irqson();
+       ct_irq_enter_irqson();
        if (s2idle)
                dev_pm_genpd_resume(pd_dev);
        else
                pm_runtime_get_sync(pd_dev);
-       rcu_irq_exit_irqson();
+       ct_irq_exit_irqson();
 
        cpu_pm_exit();
 
index 1151e5e2ba824c51c4c3ba1bbd822438f97bcca5..862a2876f1c9df7483a3d33639d7f5ce342d2198 100644 (file)
@@ -116,12 +116,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
                return -1;
 
        /* Do runtime PM to manage a hierarchical CPU toplogy. */
-       rcu_irq_enter_irqson();
+       ct_irq_enter_irqson();
        if (s2idle)
                dev_pm_genpd_suspend(pd_dev);
        else
                pm_runtime_put_sync_suspend(pd_dev);
-       rcu_irq_exit_irqson();
+       ct_irq_exit_irqson();
 
        if (sbi_is_domain_state_available())
                state = sbi_get_domain_state();
@@ -130,12 +130,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
 
        ret = sbi_suspend(state) ? -1 : idx;
 
-       rcu_irq_enter_irqson();
+       ct_irq_enter_irqson();
        if (s2idle)
                dev_pm_genpd_resume(pd_dev);
        else
                pm_runtime_get_sync(pd_dev);
-       rcu_irq_exit_irqson();
+       ct_irq_exit_irqson();
 
        cpu_pm_exit();
 
diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h
new file mode 100644 (file)
index 0000000..62f62bb
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CONTEXT_TRACKING_IRQ_H
+#define _LINUX_CONTEXT_TRACKING_IRQ_H
+
+#ifdef CONFIG_CONTEXT_TRACKING_IDLE
+void ct_irq_enter(void);
+void ct_irq_exit(void);
+void ct_irq_enter_irqson(void);
+void ct_irq_exit_irqson(void);
+#else
+static inline void ct_irq_enter(void) { }
+static inline void ct_irq_exit(void) { }
+static inline void ct_irq_enter_irqson(void) { }
+static inline void ct_irq_exit_irqson(void) { }
+#endif
+
+#endif
index 2b46afe105a96125ea4759e73adb48f5067ad774..9c16a8b2c1947a313531b4f44946878b6408fcf8 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <linux/percpu.h>
 #include <linux/static_key.h>
+#include <linux/context_tracking_irq.h>
 
 struct context_tracking {
        /*
index c92ac75d6556dd6ea901460fda5a7bb2076c2533..84a466b176cf41be0184549824a404c32a8062a7 100644 (file)
@@ -357,7 +357,7 @@ void irqentry_exit_to_user_mode(struct pt_regs *regs);
 /**
  * struct irqentry_state - Opaque object for exception state storage
  * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
- *            exit path has to invoke rcu_irq_exit().
+ *            exit path has to invoke ct_irq_exit().
  * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
  *           lockdep state is restored correctly on exit from nmi.
  *
@@ -395,12 +395,12 @@ typedef struct irqentry_state {
  *
  * For kernel mode entries RCU handling is done conditional. If RCU is
  * watching then the only RCU requirement is to check whether the tick has
- * to be restarted. If RCU is not watching then rcu_irq_enter() has to be
- * invoked on entry and rcu_irq_exit() on exit.
+ * to be restarted. If RCU is not watching then ct_irq_enter() has to be
+ * invoked on entry and ct_irq_exit() on exit.
  *
- * Avoiding the rcu_irq_enter/exit() calls is an optimization but also
+ * Avoiding the ct_irq_enter/exit() calls is an optimization but also
  * solves the problem of kernel mode pagefaults which can schedule, which
- * is not possible after invoking rcu_irq_enter() without undoing it.
+ * is not possible after invoking ct_irq_enter() without undoing it.
  *
  * For user mode entries irqentry_enter_from_user_mode() is invoked to
  * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
index 6ebe754501c38a97e697c028b7a803bbfdab827d..f1562d91c67d2f58eaf1e8eaf6470427c8c51006 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/lockdep.h>
 #include <asm/processor.h>
 #include <linux/cpumask.h>
+#include <linux/context_tracking_irq.h>
 
 #define ULONG_CMP_GE(a, b)     (ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b)     (ULONG_MAX / 2 < (a) - (b))
@@ -143,9 +144,9 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { }
  */
 #define RCU_NONIDLE(a) \
        do { \
-               rcu_irq_enter_irqson(); \
+               ct_irq_enter_irqson(); \
                do { a; } while (0); \
-               rcu_irq_exit_irqson(); \
+               ct_irq_exit_irqson(); \
        } while (0)
 
 /*
index 28031b15f878380fafdc8040d1dabe710ec6b583..55717a2eda08aeef012bc1f88e6ea4ae9ccb0317 100644 (file)
@@ -200,13 +200,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
                 */                                                     \
                if (rcuidle) {                                          \
                        __idx = srcu_read_lock_notrace(&tracepoint_srcu);\
-                       rcu_irq_enter_irqson();                         \
+                       ct_irq_enter_irqson();                          \
                }                                                       \
                                                                        \
                __DO_TRACE_CALL(name, TP_ARGS(args));                   \
                                                                        \
                if (rcuidle) {                                          \
-                       rcu_irq_exit_irqson();                          \
+                       ct_irq_exit_irqson();                           \
                        srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
                }                                                       \
                                                                        \
index 08102d19ec15a2e402c10ad8d22743caafe47d68..2046276ee2348e8cf2317d4f57d831933014f6b5 100644 (file)
@@ -295,7 +295,7 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr)
        rcu_idle = !rcu_is_watching();
        if (rcu_idle) {
                local_irq_save(flags);
-               rcu_irq_enter();
+               ct_irq_enter();
        }
 
        if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
@@ -304,7 +304,7 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr)
                fn = find_module_check_fn(ptr);
 
        if (rcu_idle) {
-               rcu_irq_exit();
+               ct_irq_exit();
                local_irq_restore(flags);
        }
 
index c0b3798d4e940274e1018c45e723b9eaf0336d16..72bd71a02c444c9c3f9ec33cd1b13a97ecb2cf1e 100644 (file)
@@ -35,6 +35,26 @@ void ct_idle_exit(void)
        rcu_idle_exit();
 }
 EXPORT_SYMBOL_GPL(ct_idle_exit);
+
+noinstr void ct_irq_enter(void)
+{
+       rcu_irq_enter();
+}
+
+noinstr void ct_irq_exit(void)
+{
+       rcu_irq_exit();
+}
+
+void ct_irq_enter_irqson(void)
+{
+       rcu_irq_enter_irqson();
+}
+
+void ct_irq_exit_irqson(void)
+{
+       rcu_irq_exit_irqson();
+}
 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
 
 #ifdef CONFIG_CONTEXT_TRACKING_USER
@@ -90,7 +110,7 @@ void noinstr __ct_user_enter(enum ctx_state state)
                         * At this stage, only low level arch entry code remains and
                         * then we'll run in userspace. We can assume there won't be
                         * any RCU read-side critical section until the next call to
-                        * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
+                        * user_exit() or ct_irq_enter(). Let's remove RCU's dependency
                         * on the tick.
                         */
                        if (state == CONTEXT_USER) {
@@ -136,7 +156,7 @@ void ct_user_enter(enum ctx_state state)
        /*
         * Some contexts may involve an exception occuring in an irq,
         * leading to that nesting:
-        * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+        * ct_irq_enter() rcu_user_exit() rcu_user_exit() ct_irq_exit()
         * This would mess up the dyntick_nesting count though. And rcu_irq_*()
         * helpers are enough to protect RCU uses inside the exception. So
         * just return immediately if we detect we are in an IRQ.
index 246efc74e3f34cc97c7cab412a4d4357d5a9b6a4..ba4ba71facf97f9b99ecc7e43d2805c1e18b958c 100644 (file)
@@ -35,11 +35,11 @@ static int cpu_pm_notify(enum cpu_pm_event event)
         * disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know
         * this.
         */
-       rcu_irq_enter_irqson();
+       ct_irq_enter_irqson();
        rcu_read_lock();
        ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL);
        rcu_read_unlock();
-       rcu_irq_exit_irqson();
+       ct_irq_exit_irqson();
 
        return notifier_to_errno(ret);
 }
@@ -49,11 +49,11 @@ static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event ev
        unsigned long flags;
        int ret;
 
-       rcu_irq_enter_irqson();
+       ct_irq_enter_irqson();
        raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
        ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL);
        raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
-       rcu_irq_exit_irqson();
+       ct_irq_exit_irqson();
 
        return notifier_to_errno(ret);
 }
index 032f164abe7cefb2011fa50f481bc6b661b88d7f..667ba5d581ff76159d21c3fc802bcc99bdfd0472 100644 (file)
@@ -321,7 +321,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
        }
 
        /*
-        * If this entry hit the idle task invoke rcu_irq_enter() whether
+        * If this entry hit the idle task invoke ct_irq_enter() whether
         * RCU is watching or not.
         *
         * Interrupts can nest when the first interrupt invokes softirq
@@ -332,12 +332,12 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
         * not nested into another interrupt.
         *
         * Checking for rcu_is_watching() here would prevent the nesting
-        * interrupt to invoke rcu_irq_enter(). If that nested interrupt is
+        * interrupt to invoke ct_irq_enter(). If that nested interrupt is
         * the tick then rcu_flavor_sched_clock_irq() would wrongfully
         * assume that it is the first interrupt and eventually claim
         * quiescent state and end grace periods prematurely.
         *
-        * Unconditionally invoke rcu_irq_enter() so RCU state stays
+        * Unconditionally invoke ct_irq_enter() so RCU state stays
         * consistent.
         *
         * TINY_RCU does not support EQS, so let the compiler eliminate
@@ -350,7 +350,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
                 * as in irqentry_enter_from_user_mode().
                 */
                lockdep_hardirqs_off(CALLER_ADDR0);
-               rcu_irq_enter();
+               ct_irq_enter();
                instrumentation_begin();
                trace_hardirqs_off_finish();
                instrumentation_end();
@@ -418,7 +418,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
                        trace_hardirqs_on_prepare();
                        lockdep_hardirqs_on_prepare();
                        instrumentation_end();
-                       rcu_irq_exit();
+                       ct_irq_exit();
                        lockdep_hardirqs_on(CALLER_ADDR0);
                        return;
                }
@@ -436,7 +436,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
                 * was not watching on entry.
                 */
                if (state.exit_rcu)
-                       rcu_irq_exit();
+                       ct_irq_exit();
        }
 }
 
index 9f0aef8aa9ff815b2a2df8bd280e558050ab83d9..c8a6913c067d9307c0f208196c047a2bfc9197bb 100644 (file)
@@ -620,7 +620,7 @@ void irq_enter_rcu(void)
  */
 void irq_enter(void)
 {
-       rcu_irq_enter();
+       ct_irq_enter();
        irq_enter_rcu();
 }
 
@@ -672,7 +672,7 @@ void irq_exit_rcu(void)
 void irq_exit(void)
 {
        __irq_exit_rcu();
-       rcu_irq_exit();
+       ct_irq_exit();
         /* must be last! */
        lockdep_hardirq_exit();
 }
index 2c95992e2c71072d5d501ce139178a25531f4945..fe78a681812638f79e8cb36a163b71fe261ca25d 100644 (file)
@@ -3107,15 +3107,15 @@ void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
        /*
         * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
         * but if the above rcu_is_watching() failed, then the NMI
-        * triggered someplace critical, and rcu_irq_enter() should
+        * triggered someplace critical, and ct_irq_enter() should
         * not be called from NMI.
         */
        if (unlikely(in_nmi()))
                return;
 
-       rcu_irq_enter_irqson();
+       ct_irq_enter_irqson();
        __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
-       rcu_irq_exit_irqson();
+       ct_irq_exit_irqson();
 }
 
 /**