From: Greg Kroah-Hartman Date: Mon, 1 Jul 2013 18:59:38 +0000 (-0700) Subject: 3.4-stable patches X-Git-Tag: v3.9.9~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ad99b783432f0199e846c4802132f5598048f514;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: perf-fix-mmap-accounting-hole.patch perf-fix-perf-mmap-bugs.patch --- diff --git a/queue-3.4/perf-fix-mmap-accounting-hole.patch b/queue-3.4/perf-fix-mmap-accounting-hole.patch new file mode 100644 index 00000000000..e76e00be156 --- /dev/null +++ b/queue-3.4/perf-fix-mmap-accounting-hole.patch @@ -0,0 +1,415 @@ +From 9bb5d40cd93c9dd4be74834b1dcb1ba03629716b Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 4 Jun 2013 10:44:21 +0200 +Subject: perf: Fix mmap() accounting hole + +From: Peter Zijlstra + +commit 9bb5d40cd93c9dd4be74834b1dcb1ba03629716b upstream. + +Vince's fuzzer once again found holes. This time it spotted a leak in +the locked page accounting. + +When an event had redirected output and its close() was the last +reference to the buffer we didn't have a vm context to undo accounting. + +Change the code to destroy the buffer on the last munmap() and detach +all redirected events at that time. This provides us the right context +to undo the vm accounting. + +[Backporting for 3.4-stable. +VM_RESERVED flag was replaced with pair 'VM_DONTEXPAND | VM_DONTDUMP' in +314e51b9 since 3.7.0-rc1, and 314e51b9 comes from a big patchset, we didn't +backport the patchset, so I restored 'VM_DNOTEXPAND | VM_DONTDUMP' as before: +- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; ++ vma->vm_flags |= VM_DONTCOPY | VM_RESERVED; + -- zliu] + +Reported-and-tested-by: Vince Weaver +Signed-off-by: Peter Zijlstra +Link: http://lkml.kernel.org/r/20130604084421.GI8923@twins.programming.kicks-ass.net +Cc: +Signed-off-by: Ingo Molnar +Signed-off-by: Zhouping Liu +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/events/core.c | 228 ++++++++++++++++++++++++++++++++--------------- + kernel/events/internal.h | 3 + 2 files changed, 159 insertions(+), 72 deletions(-) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -193,9 +193,6 @@ static void cpu_ctx_sched_in(struct perf + static void update_context_time(struct perf_event_context *ctx); + static u64 perf_event_time(struct perf_event *event); + +-static void ring_buffer_attach(struct perf_event *event, +- struct ring_buffer *rb); +- + void __weak perf_event_print_debug(void) { } + + extern __weak const char *perf_pmu_name(void) +@@ -2848,7 +2845,8 @@ static void free_event_rcu(struct rcu_he + kfree(event); + } + +-static bool ring_buffer_put(struct ring_buffer *rb); ++static void ring_buffer_put(struct ring_buffer *rb); ++static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); + + static void free_event(struct perf_event *event) + { +@@ -2873,15 +2871,30 @@ static void free_event(struct perf_event + if (has_branch_stack(event)) { + static_key_slow_dec_deferred(&perf_sched_events); + /* is system-wide event */ +- if (!(event->attach_state & PERF_ATTACH_TASK)) ++ if (!(event->attach_state & PERF_ATTACH_TASK)) { + atomic_dec(&per_cpu(perf_branch_stack_events, + event->cpu)); ++ } + } + } + + if (event->rb) { +- ring_buffer_put(event->rb); +- event->rb = NULL; ++ struct ring_buffer *rb; ++ ++ /* ++ * Can happen when we close an event with re-directed output. ++ * ++ * Since we have a 0 refcount, perf_mmap_close() will skip ++ * over us; possibly making our ring_buffer_put() the last. ++ */ ++ mutex_lock(&event->mmap_mutex); ++ rb = event->rb; ++ if (rb) { ++ rcu_assign_pointer(event->rb, NULL); ++ ring_buffer_detach(event, rb); ++ ring_buffer_put(rb); /* could be last */ ++ } ++ mutex_unlock(&event->mmap_mutex); + } + + if (is_cgroup_event(event)) +@@ -3119,30 +3132,13 @@ static unsigned int perf_poll(struct fil + unsigned int events = POLL_HUP; + + /* +- * Race between perf_event_set_output() and perf_poll(): perf_poll() +- * grabs the rb reference but perf_event_set_output() overrides it. +- * Here is the timeline for two threads T1, T2: +- * t0: T1, rb = rcu_dereference(event->rb) +- * t1: T2, old_rb = event->rb +- * t2: T2, event->rb = new rb +- * t3: T2, ring_buffer_detach(old_rb) +- * t4: T1, ring_buffer_attach(rb1) +- * t5: T1, poll_wait(event->waitq) +- * +- * To avoid this problem, we grab mmap_mutex in perf_poll() +- * thereby ensuring that the assignment of the new ring buffer +- * and the detachment of the old buffer appear atomic to perf_poll() ++ * Pin the event->rb by taking event->mmap_mutex; otherwise ++ * perf_event_set_output() can swizzle our rb and make us miss wakeups. + */ + mutex_lock(&event->mmap_mutex); +- +- rcu_read_lock(); +- rb = rcu_dereference(event->rb); +- if (rb) { +- ring_buffer_attach(event, rb); ++ rb = event->rb; ++ if (rb) + events = atomic_xchg(&rb->poll, 0); +- } +- rcu_read_unlock(); +- + mutex_unlock(&event->mmap_mutex); + + poll_wait(file, &event->waitq, wait); +@@ -3459,16 +3455,12 @@ static void ring_buffer_attach(struct pe + return; + + spin_lock_irqsave(&rb->event_lock, flags); +- if (!list_empty(&event->rb_entry)) +- goto unlock; +- +- list_add(&event->rb_entry, &rb->event_list); +-unlock: ++ if (list_empty(&event->rb_entry)) ++ list_add(&event->rb_entry, &rb->event_list); + spin_unlock_irqrestore(&rb->event_lock, flags); + } + +-static void ring_buffer_detach(struct perf_event *event, +- struct ring_buffer *rb) ++static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) + { + unsigned long flags; + +@@ -3487,13 +3479,10 @@ static void ring_buffer_wakeup(struct pe + + rcu_read_lock(); + rb = rcu_dereference(event->rb); +- if (!rb) +- goto unlock; +- +- list_for_each_entry_rcu(event, &rb->event_list, rb_entry) +- wake_up_all(&event->waitq); +- +-unlock: ++ if (rb) { ++ list_for_each_entry_rcu(event, &rb->event_list, rb_entry) ++ wake_up_all(&event->waitq); ++ } + rcu_read_unlock(); + } + +@@ -3520,23 +3509,14 @@ static struct ring_buffer *ring_buffer_g + return rb; + } + +-static bool ring_buffer_put(struct ring_buffer *rb) ++static void ring_buffer_put(struct ring_buffer *rb) + { +- struct perf_event *event, *n; +- unsigned long flags; +- + if (!atomic_dec_and_test(&rb->refcount)) +- return false; ++ return; + +- spin_lock_irqsave(&rb->event_lock, flags); +- list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { +- list_del_init(&event->rb_entry); +- wake_up_all(&event->waitq); +- } +- spin_unlock_irqrestore(&rb->event_lock, flags); ++ WARN_ON_ONCE(!list_empty(&rb->event_list)); + + call_rcu(&rb->rcu_head, rb_free_rcu); +- return true; + } + + static void perf_mmap_open(struct vm_area_struct *vma) +@@ -3544,28 +3524,100 @@ static void perf_mmap_open(struct vm_are + struct perf_event *event = vma->vm_file->private_data; + + atomic_inc(&event->mmap_count); ++ atomic_inc(&event->rb->mmap_count); + } + ++/* ++ * A buffer can be mmap()ed multiple times; either directly through the same ++ * event, or through other events by use of perf_event_set_output(). ++ * ++ * In order to undo the VM accounting done by perf_mmap() we need to destroy ++ * the buffer here, where we still have a VM context. This means we need ++ * to detach all events redirecting to us. ++ */ + static void perf_mmap_close(struct vm_area_struct *vma) + { + struct perf_event *event = vma->vm_file->private_data; + +- if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { +- struct ring_buffer *rb = event->rb; +- struct user_struct *mmap_user = rb->mmap_user; +- int mmap_locked = rb->mmap_locked; +- unsigned long size = perf_data_size(rb); ++ struct ring_buffer *rb = event->rb; ++ struct user_struct *mmap_user = rb->mmap_user; ++ int mmap_locked = rb->mmap_locked; ++ unsigned long size = perf_data_size(rb); + +- rcu_assign_pointer(event->rb, NULL); +- ring_buffer_detach(event, rb); +- mutex_unlock(&event->mmap_mutex); ++ atomic_dec(&rb->mmap_count); ++ ++ if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) ++ return; ++ ++ /* Detach current event from the buffer. */ ++ rcu_assign_pointer(event->rb, NULL); ++ ring_buffer_detach(event, rb); ++ mutex_unlock(&event->mmap_mutex); ++ ++ /* If there's still other mmap()s of this buffer, we're done. */ ++ if (atomic_read(&rb->mmap_count)) { ++ ring_buffer_put(rb); /* can't be last */ ++ return; ++ } + +- if (ring_buffer_put(rb)) { +- atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm); +- vma->vm_mm->pinned_vm -= mmap_locked; +- free_uid(mmap_user); ++ /* ++ * No other mmap()s, detach from all other events that might redirect ++ * into the now unreachable buffer. Somewhat complicated by the ++ * fact that rb::event_lock otherwise nests inside mmap_mutex. ++ */ ++again: ++ rcu_read_lock(); ++ list_for_each_entry_rcu(event, &rb->event_list, rb_entry) { ++ if (!atomic_long_inc_not_zero(&event->refcount)) { ++ /* ++ * This event is en-route to free_event() which will ++ * detach it and remove it from the list. ++ */ ++ continue; + } ++ rcu_read_unlock(); ++ ++ mutex_lock(&event->mmap_mutex); ++ /* ++ * Check we didn't race with perf_event_set_output() which can ++ * swizzle the rb from under us while we were waiting to ++ * acquire mmap_mutex. ++ * ++ * If we find a different rb; ignore this event, a next ++ * iteration will no longer find it on the list. We have to ++ * still restart the iteration to make sure we're not now ++ * iterating the wrong list. ++ */ ++ if (event->rb == rb) { ++ rcu_assign_pointer(event->rb, NULL); ++ ring_buffer_detach(event, rb); ++ ring_buffer_put(rb); /* can't be last, we still have one */ ++ } ++ mutex_unlock(&event->mmap_mutex); ++ put_event(event); ++ ++ /* ++ * Restart the iteration; either we're on the wrong list or ++ * destroyed its integrity by doing a deletion. ++ */ ++ goto again; + } ++ rcu_read_unlock(); ++ ++ /* ++ * It could be there's still a few 0-ref events on the list; they'll ++ * get cleaned up by free_event() -- they'll also still have their ++ * ref on the rb and will free it whenever they are done with it. ++ * ++ * Aside from that, this buffer is 'fully' detached and unmapped, ++ * undo the VM accounting. ++ */ ++ ++ atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm); ++ vma->vm_mm->pinned_vm -= mmap_locked; ++ free_uid(mmap_user); ++ ++ ring_buffer_put(rb); /* could be last */ + } + + static const struct vm_operations_struct perf_mmap_vmops = { +@@ -3615,10 +3667,24 @@ static int perf_mmap(struct file *file, + return -EINVAL; + + WARN_ON_ONCE(event->ctx->parent_ctx); ++again: + mutex_lock(&event->mmap_mutex); + if (event->rb) { +- if (event->rb->nr_pages != nr_pages) ++ if (event->rb->nr_pages != nr_pages) { + ret = -EINVAL; ++ goto unlock; ++ } ++ ++ if (!atomic_inc_not_zero(&event->rb->mmap_count)) { ++ /* ++ * Raced against perf_mmap_close() through ++ * perf_event_set_output(). Try again, hope for better ++ * luck. ++ */ ++ mutex_unlock(&event->mmap_mutex); ++ goto again; ++ } ++ + goto unlock; + } + +@@ -3660,12 +3726,14 @@ static int perf_mmap(struct file *file, + goto unlock; + } + ++ atomic_set(&rb->mmap_count, 1); + rb->mmap_locked = extra; + rb->mmap_user = get_current_user(); + + atomic_long_add(user_extra, &user->locked_vm); + vma->vm_mm->pinned_vm += extra; + ++ ring_buffer_attach(event, rb); + rcu_assign_pointer(event->rb, rb); + + perf_event_update_userpage(event); +@@ -3675,6 +3743,10 @@ unlock: + atomic_inc(&event->mmap_count); + mutex_unlock(&event->mmap_mutex); + ++ /* ++ * Since pinned accounting is per vm we cannot allow fork() to copy our ++ * vma. ++ */ + vma->vm_flags |= VM_DONTCOPY | VM_RESERVED; + vma->vm_ops = &perf_mmap_vmops; + +@@ -6164,6 +6236,8 @@ set: + if (atomic_read(&event->mmap_count)) + goto unlock; + ++ old_rb = event->rb; ++ + if (output_event) { + /* get the rb we want to redirect to */ + rb = ring_buffer_get(output_event); +@@ -6171,16 +6245,28 @@ set: + goto unlock; + } + +- old_rb = event->rb; +- rcu_assign_pointer(event->rb, rb); + if (old_rb) + ring_buffer_detach(event, old_rb); ++ ++ if (rb) ++ ring_buffer_attach(event, rb); ++ ++ rcu_assign_pointer(event->rb, rb); ++ ++ if (old_rb) { ++ ring_buffer_put(old_rb); ++ /* ++ * Since we detached before setting the new rb, so that we ++ * could attach the new rb, we could have missed a wakeup. ++ * Provide it now. ++ */ ++ wake_up_all(&event->waitq); ++ } ++ + ret = 0; + unlock: + mutex_unlock(&event->mmap_mutex); + +- if (old_rb) +- ring_buffer_put(old_rb); + out: + return ret; + } +--- a/kernel/events/internal.h ++++ b/kernel/events/internal.h +@@ -30,7 +30,8 @@ struct ring_buffer { + spinlock_t event_lock; + struct list_head event_list; + +- int mmap_locked; ++ atomic_t mmap_count; ++ unsigned long mmap_locked; + struct user_struct *mmap_user; + + struct perf_event_mmap_page *user_page; diff --git a/queue-3.4/perf-fix-perf-mmap-bugs.patch b/queue-3.4/perf-fix-perf-mmap-bugs.patch new file mode 100644 index 00000000000..596d80d8f4d --- /dev/null +++ b/queue-3.4/perf-fix-perf-mmap-bugs.patch @@ -0,0 +1,180 @@ +From 26cb63ad11e04047a64309362674bcbbd6a6f246 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 28 May 2013 10:55:48 +0200 +Subject: perf: Fix perf mmap bugs + +From: Peter Zijlstra + +commit 26cb63ad11e04047a64309362674bcbbd6a6f246 upstream. + +Vince reported a problem found by his perf specific trinity +fuzzer. + +Al noticed 2 problems with perf's mmap(): + + - it has issues against fork() since we use vma->vm_mm for accounting. + - it has an rb refcount leak on double mmap(). + +We fix the issues against fork() by using VM_DONTCOPY; I don't +think there's code out there that uses this; we didn't hear +about weird accounting problems/crashes. If we do need this to +work, the previously proposed VM_PINNED could make this work. + +Aside from the rb reference leak spotted by Al, Vince's example +prog was indeed doing a double mmap() through the use of +perf_event_set_output(). + +This exposes another problem, since we now have 2 events with +one buffer, the accounting gets screwy because we account per +event. Fix this by making the buffer responsible for its own +accounting. + +[Backporting for 3.4-stable. +VM_RESERVED flag was replaced with pair 'VM_DONTEXPAND | VM_DONTDUMP' in +314e51b9 since 3.7.0-rc1, and 314e51b9 comes from a big patchset, we didn't +backport the patchset, so I restored 'VM_DNOTEXPAND | VM_DONTDUMP' as before: +- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; ++ vma->vm_flags |= VM_DONTCOPY | VM_RESERVED; + -- zliu] + +Reported-by: Vince Weaver +Signed-off-by: Peter Zijlstra +Cc: Al Viro +Cc: Paul Mackerras +Cc: Arnaldo Carvalho de Melo +Link: http://lkml.kernel.org/r/20130528085548.GA12193@twins.programming.kicks-ass.net +Signed-off-by: Ingo Molnar +Signed-off-by: Zhouping Liu +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/perf_event.h | 3 +-- + kernel/events/core.c | 37 ++++++++++++++++++++----------------- + kernel/events/internal.h | 3 +++ + 3 files changed, 24 insertions(+), 19 deletions(-) + +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -950,8 +950,7 @@ struct perf_event { + /* mmap bits */ + struct mutex mmap_mutex; + atomic_t mmap_count; +- int mmap_locked; +- struct user_struct *mmap_user; ++ + struct ring_buffer *rb; + struct list_head rb_entry; + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2848,7 +2848,7 @@ static void free_event_rcu(struct rcu_he + kfree(event); + } + +-static void ring_buffer_put(struct ring_buffer *rb); ++static bool ring_buffer_put(struct ring_buffer *rb); + + static void free_event(struct perf_event *event) + { +@@ -3520,13 +3520,13 @@ static struct ring_buffer *ring_buffer_g + return rb; + } + +-static void ring_buffer_put(struct ring_buffer *rb) ++static bool ring_buffer_put(struct ring_buffer *rb) + { + struct perf_event *event, *n; + unsigned long flags; + + if (!atomic_dec_and_test(&rb->refcount)) +- return; ++ return false; + + spin_lock_irqsave(&rb->event_lock, flags); + list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { +@@ -3536,6 +3536,7 @@ static void ring_buffer_put(struct ring_ + spin_unlock_irqrestore(&rb->event_lock, flags); + + call_rcu(&rb->rcu_head, rb_free_rcu); ++ return true; + } + + static void perf_mmap_open(struct vm_area_struct *vma) +@@ -3550,18 +3551,20 @@ static void perf_mmap_close(struct vm_ar + struct perf_event *event = vma->vm_file->private_data; + + if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { +- unsigned long size = perf_data_size(event->rb); +- struct user_struct *user = event->mmap_user; + struct ring_buffer *rb = event->rb; ++ struct user_struct *mmap_user = rb->mmap_user; ++ int mmap_locked = rb->mmap_locked; ++ unsigned long size = perf_data_size(rb); + +- atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); +- vma->vm_mm->pinned_vm -= event->mmap_locked; + rcu_assign_pointer(event->rb, NULL); + ring_buffer_detach(event, rb); + mutex_unlock(&event->mmap_mutex); + +- ring_buffer_put(rb); +- free_uid(user); ++ if (ring_buffer_put(rb)) { ++ atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm); ++ vma->vm_mm->pinned_vm -= mmap_locked; ++ free_uid(mmap_user); ++ } + } + } + +@@ -3614,9 +3617,7 @@ static int perf_mmap(struct file *file, + WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->mmap_mutex); + if (event->rb) { +- if (event->rb->nr_pages == nr_pages) +- atomic_inc(&event->rb->refcount); +- else ++ if (event->rb->nr_pages != nr_pages) + ret = -EINVAL; + goto unlock; + } +@@ -3658,12 +3659,14 @@ static int perf_mmap(struct file *file, + ret = -ENOMEM; + goto unlock; + } +- rcu_assign_pointer(event->rb, rb); ++ ++ rb->mmap_locked = extra; ++ rb->mmap_user = get_current_user(); + + atomic_long_add(user_extra, &user->locked_vm); +- event->mmap_locked = extra; +- event->mmap_user = get_current_user(); +- vma->vm_mm->pinned_vm += event->mmap_locked; ++ vma->vm_mm->pinned_vm += extra; ++ ++ rcu_assign_pointer(event->rb, rb); + + perf_event_update_userpage(event); + +@@ -3672,7 +3675,7 @@ unlock: + atomic_inc(&event->mmap_count); + mutex_unlock(&event->mmap_mutex); + +- vma->vm_flags |= VM_RESERVED; ++ vma->vm_flags |= VM_DONTCOPY | VM_RESERVED; + vma->vm_ops = &perf_mmap_vmops; + + return ret; +--- a/kernel/events/internal.h ++++ b/kernel/events/internal.h +@@ -30,6 +30,9 @@ struct ring_buffer { + spinlock_t event_lock; + struct list_head event_list; + ++ int mmap_locked; ++ struct user_struct *mmap_user; ++ + struct perf_event_mmap_page *user_page; + void *data_pages[0]; + }; diff --git a/queue-3.4/series b/queue-3.4/series index bb75ddc3df4..4e14248d76a 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -9,3 +9,5 @@ perf-disable-monitoring-on-setuid-processes-for-regular-users.patch ubifs-prepare-to-fix-a-horrid-bug.patch ubifs-fix-a-horrid-bug.patch pch_uart-fix-a-deadlock-when-pch_uart-as-console.patch +perf-fix-perf-mmap-bugs.patch +perf-fix-mmap-accounting-hole.patch