]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
a1fb116244d83718875577539628a1e14f6f0d3d
[thirdparty/kernel/stable-queue.git] /
1 From 4880cafaee93d3806cb9da94ccf82068dea5a1aa Mon Sep 17 00:00:00 2001
2 From: Sasha Levin <sashal@kernel.org>
3 Date: Mon, 2 Nov 2020 15:31:27 -0500
4 Subject: ring-buffer: Fix recursion protection transitions between interrupt
5 context
6
7 From: Steven Rostedt (VMware) <rostedt@goodmis.org>
8
9 [ Upstream commit b02414c8f045ab3b9afc816c3735bc98c5c3d262 ]
10
11 The recursion protection of the ring buffer depends on preempt_count() to be
12 correct. But it is possible that the ring buffer gets called after an
13 interrupt comes in but before it updates the preempt_count(). This will
14 trigger a false positive in the recursion code.
15
16 Use the same trick from the ftrace function callback recursion code which
17 uses a "transition" bit that gets set, to allow for a single recursion for
18 to handle transitions between contexts.
19
20 Cc: stable@vger.kernel.org
21 Fixes: 567cd4da54ff4 ("ring-buffer: User context bit recursion checking")
22 Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
23 Signed-off-by: Sasha Levin <sashal@kernel.org>
24 ---
25 kernel/trace/ring_buffer.c | 54 +++++++++++++++++++++++++++++++-------
26 1 file changed, 44 insertions(+), 10 deletions(-)
27
28 diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
29 index 1c1ecc1d49ad2..547a3a5ac57b5 100644
30 --- a/kernel/trace/ring_buffer.c
31 +++ b/kernel/trace/ring_buffer.c
32 @@ -416,14 +416,16 @@ struct rb_event_info {
33
34 /*
35 * Used for which event context the event is in.
36 - * NMI = 0
37 - * IRQ = 1
38 - * SOFTIRQ = 2
39 - * NORMAL = 3
40 + * TRANSITION = 0
41 + * NMI = 1
42 + * IRQ = 2
43 + * SOFTIRQ = 3
44 + * NORMAL = 4
45 *
46 * See trace_recursive_lock() comment below for more details.
47 */
48 enum {
49 + RB_CTX_TRANSITION,
50 RB_CTX_NMI,
51 RB_CTX_IRQ,
52 RB_CTX_SOFTIRQ,
53 @@ -2585,10 +2587,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
54 * a bit of overhead in something as critical as function tracing,
55 * we use a bitmask trick.
56 *
57 - * bit 0 = NMI context
58 - * bit 1 = IRQ context
59 - * bit 2 = SoftIRQ context
60 - * bit 3 = normal context.
61 + * bit 1 = NMI context
62 + * bit 2 = IRQ context
63 + * bit 3 = SoftIRQ context
64 + * bit 4 = normal context.
65 *
66 * This works because this is the order of contexts that can
67 * preempt other contexts. A SoftIRQ never preempts an IRQ
68 @@ -2611,6 +2613,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
69 * The least significant bit can be cleared this way, and it
70 * just so happens that it is the same bit corresponding to
71 * the current context.
72 + *
73 + * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
74 + * is set when a recursion is detected at the current context, and if
75 + * the TRANSITION bit is already set, it will fail the recursion.
76 + * This is needed because there's a lag between the changing of
77 + * interrupt context and updating the preempt count. In this case,
78 + * a false positive will be found. To handle this, one extra recursion
79 + * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
80 + * bit is already set, then it is considered a recursion and the function
81 + * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
82 + *
83 + * On the trace_recursive_unlock(), the TRANSITION bit will be the first
84 + * to be cleared. Even if it wasn't the context that set it. That is,
85 + * if an interrupt comes in while NORMAL bit is set and the ring buffer
86 + * is called before preempt_count() is updated, since the check will
87 + * be on the NORMAL bit, the TRANSITION bit will then be set. If an
88 + * NMI then comes in, it will set the NMI bit, but when the NMI code
89 + * does the trace_recursive_unlock() it will clear the TRANSTION bit
90 + * and leave the NMI bit set. But this is fine, because the interrupt
91 + * code that set the TRANSITION bit will then clear the NMI bit when it
92 + * calls trace_recursive_unlock(). If another NMI comes in, it will
93 + * set the TRANSITION bit and continue.
94 + *
95 + * Note: The TRANSITION bit only handles a single transition between context.
96 */
97
98 static __always_inline int
99 @@ -2629,8 +2655,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
100 } else
101 bit = RB_CTX_NORMAL;
102
103 - if (unlikely(val & (1 << bit)))
104 - return 1;
105 + if (unlikely(val & (1 << bit))) {
106 + /*
107 + * It is possible that this was called by transitioning
108 + * between interrupt context, and preempt_count() has not
109 + * been updated yet. In this case, use the TRANSITION bit.
110 + */
111 + bit = RB_CTX_TRANSITION;
112 + if (val & (1 << bit))
113 + return 1;
114 + }
115
116 val |= (1 << bit);
117 cpu_buffer->current_context = val;
118 --
119 2.27.0
120