]>
Commit | Line | Data |
---|---|---|
2a2a4ae2 SL |
1 | From 702734eebe24adfc91a37f30ed7263b8bfa3361c Mon Sep 17 00:00:00 2001 |
2 | From: Yabin Cui <yabinc@google.com> | |
3 | Date: Fri, 17 May 2019 13:52:31 +0200 | |
4 | Subject: perf/ring_buffer: Fix exposing a temporarily decreased data_head | |
5 | ||
6 | [ Upstream commit 1b038c6e05ff70a1e66e3e571c2e6106bdb75f53 ] | |
7 | ||
8 | In perf_output_put_handle(), an IRQ/NMI can happen in below location and | |
9 | write records to the same ring buffer: | |
10 | ||
11 | ... | |
12 | local_dec_and_test(&rb->nest) | |
13 | ... <-- an IRQ/NMI can happen here | |
14 | rb->user_page->data_head = head; | |
15 | ... | |
16 | ||
17 | In this case, a value A is written to data_head in the IRQ, then a value | |
18 | B is written to data_head after the IRQ. And A > B. As a result, | |
19 | data_head is temporarily decreased from A to B. And a reader may see | |
20 | data_head < data_tail if it read the buffer frequently enough, which | |
21 | creates unexpected behaviors. | |
22 | ||
23 | This can be fixed by moving dec(&rb->nest) to after updating data_head, | |
24 | which prevents the IRQ/NMI above from updating data_head. | |
25 | ||
26 | [ Split up by peterz. ] | |
27 | ||
28 | Signed-off-by: Yabin Cui <yabinc@google.com> | |
29 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> | |
30 | Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> | |
31 | Cc: Arnaldo Carvalho de Melo <acme@kernel.org> | |
32 | Cc: Arnaldo Carvalho de Melo <acme@redhat.com> | |
33 | Cc: Jiri Olsa <jolsa@redhat.com> | |
34 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
35 | Cc: Namhyung Kim <namhyung@kernel.org> | |
36 | Cc: Peter Zijlstra <peterz@infradead.org> | |
37 | Cc: Stephane Eranian <eranian@google.com> | |
38 | Cc: Thomas Gleixner <tglx@linutronix.de> | |
39 | Cc: Vince Weaver <vincent.weaver@maine.edu> | |
40 | Cc: mark.rutland@arm.com | |
41 | Fixes: ef60777c9abd ("perf: Optimize the perf_output() path by removing IRQ-disables") | |
42 | Link: http://lkml.kernel.org/r/20190517115418.224478157@infradead.org | |
43 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
44 | Signed-off-by: Sasha Levin <sashal@kernel.org> | |
45 | --- | |
46 | kernel/events/ring_buffer.c | 24 ++++++++++++++++++++---- | |
47 | 1 file changed, 20 insertions(+), 4 deletions(-) | |
48 | ||
49 | diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c | |
50 | index 489dc6b60053..fde853270c09 100644 | |
51 | --- a/kernel/events/ring_buffer.c | |
52 | +++ b/kernel/events/ring_buffer.c | |
53 | @@ -52,11 +52,18 @@ static void perf_output_put_handle(struct perf_output_handle *handle) | |
54 | head = local_read(&rb->head); | |
55 | ||
56 | /* | |
57 | - * IRQ/NMI can happen here, which means we can miss a head update. | |
58 | + * IRQ/NMI can happen here and advance @rb->head, causing our | |
59 | + * load above to be stale. | |
60 | */ | |
61 | ||
62 | - if (!local_dec_and_test(&rb->nest)) | |
63 | + /* | |
64 | + * If this isn't the outermost nesting, we don't have to update | |
65 | + * @rb->user_page->data_head. | |
66 | + */ | |
67 | + if (local_read(&rb->nest) > 1) { | |
68 | + local_dec(&rb->nest); | |
69 | goto out; | |
70 | + } | |
71 | ||
72 | /* | |
73 | * Since the mmap() consumer (userspace) can run on a different CPU: | |
74 | @@ -88,9 +95,18 @@ static void perf_output_put_handle(struct perf_output_handle *handle) | |
75 | rb->user_page->data_head = head; | |
76 | ||
77 | /* | |
78 | - * Now check if we missed an update -- rely on previous implied | |
79 | - * compiler barriers to force a re-read. | |
80 | + * We must publish the head before decrementing the nest count, | |
81 | + * otherwise an IRQ/NMI can publish a more recent head value and our | |
82 | + * write will (temporarily) publish a stale value. | |
83 | + */ | |
84 | + barrier(); | |
85 | + local_set(&rb->nest, 0); | |
86 | + | |
87 | + /* | |
88 | + * Ensure we decrement @rb->nest before we validate the @rb->head. | |
89 | + * Otherwise we cannot be sure we caught the 'last' nested update. | |
90 | */ | |
91 | + barrier(); | |
92 | if (unlikely(head != local_read(&rb->head))) { | |
93 | local_inc(&rb->nest); | |
94 | goto again; | |
95 | -- | |
96 | 2.20.1 | |
97 |