--- /dev/null
+From 9bb5d40cd93c9dd4be74834b1dcb1ba03629716b Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 4 Jun 2013 10:44:21 +0200
+Subject: perf: Fix mmap() accounting hole
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 9bb5d40cd93c9dd4be74834b1dcb1ba03629716b upstream.
+
+Vince's fuzzer once again found holes. This time it spotted a leak in
+the locked page accounting.
+
+When an event had redirected output and its close() was the last
+reference to the buffer we didn't have a vm context to undo accounting.
+
+Change the code to destroy the buffer on the last munmap() and detach
+all redirected events at that time. This provides us the right context
+to undo the vm accounting.
+
+[Backporting for 3.4-stable.
+VM_RESERVED flag was replaced with pair 'VM_DONTEXPAND | VM_DONTDUMP' in
+314e51b9 since 3.7.0-rc1, and 314e51b9 comes from a big patchset, we didn't
+backport the patchset, so I restored 'VM_DNOTEXPAND | VM_DONTDUMP' as before:
+- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
++ vma->vm_flags |= VM_DONTCOPY | VM_RESERVED;
+ -- zliu]
+
+Reported-and-tested-by: Vince Weaver <vincent.weaver@maine.edu>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20130604084421.GI8923@twins.programming.kicks-ass.net
+Cc: <stable@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Zhouping Liu <zliu@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/events/core.c | 228 ++++++++++++++++++++++++++++++++---------------
+ kernel/events/internal.h | 3
+ 2 files changed, 159 insertions(+), 72 deletions(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -193,9 +193,6 @@ static void cpu_ctx_sched_in(struct perf
+ static void update_context_time(struct perf_event_context *ctx);
+ static u64 perf_event_time(struct perf_event *event);
+
+-static void ring_buffer_attach(struct perf_event *event,
+- struct ring_buffer *rb);
+-
+ void __weak perf_event_print_debug(void) { }
+
+ extern __weak const char *perf_pmu_name(void)
+@@ -2848,7 +2845,8 @@ static void free_event_rcu(struct rcu_he
+ kfree(event);
+ }
+
+-static bool ring_buffer_put(struct ring_buffer *rb);
++static void ring_buffer_put(struct ring_buffer *rb);
++static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
+
+ static void free_event(struct perf_event *event)
+ {
+@@ -2873,15 +2871,30 @@ static void free_event(struct perf_event
+ if (has_branch_stack(event)) {
+ static_key_slow_dec_deferred(&perf_sched_events);
+ /* is system-wide event */
+- if (!(event->attach_state & PERF_ATTACH_TASK))
++ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ atomic_dec(&per_cpu(perf_branch_stack_events,
+ event->cpu));
++ }
+ }
+ }
+
+ if (event->rb) {
+- ring_buffer_put(event->rb);
+- event->rb = NULL;
++ struct ring_buffer *rb;
++
++ /*
++ * Can happen when we close an event with re-directed output.
++ *
++ * Since we have a 0 refcount, perf_mmap_close() will skip
++ * over us; possibly making our ring_buffer_put() the last.
++ */
++ mutex_lock(&event->mmap_mutex);
++ rb = event->rb;
++ if (rb) {
++ rcu_assign_pointer(event->rb, NULL);
++ ring_buffer_detach(event, rb);
++ ring_buffer_put(rb); /* could be last */
++ }
++ mutex_unlock(&event->mmap_mutex);
+ }
+
+ if (is_cgroup_event(event))
+@@ -3119,30 +3132,13 @@ static unsigned int perf_poll(struct fil
+ unsigned int events = POLL_HUP;
+
+ /*
+- * Race between perf_event_set_output() and perf_poll(): perf_poll()
+- * grabs the rb reference but perf_event_set_output() overrides it.
+- * Here is the timeline for two threads T1, T2:
+- * t0: T1, rb = rcu_dereference(event->rb)
+- * t1: T2, old_rb = event->rb
+- * t2: T2, event->rb = new rb
+- * t3: T2, ring_buffer_detach(old_rb)
+- * t4: T1, ring_buffer_attach(rb1)
+- * t5: T1, poll_wait(event->waitq)
+- *
+- * To avoid this problem, we grab mmap_mutex in perf_poll()
+- * thereby ensuring that the assignment of the new ring buffer
+- * and the detachment of the old buffer appear atomic to perf_poll()
++ * Pin the event->rb by taking event->mmap_mutex; otherwise
++ * perf_event_set_output() can swizzle our rb and make us miss wakeups.
+ */
+ mutex_lock(&event->mmap_mutex);
+-
+- rcu_read_lock();
+- rb = rcu_dereference(event->rb);
+- if (rb) {
+- ring_buffer_attach(event, rb);
++ rb = event->rb;
++ if (rb)
+ events = atomic_xchg(&rb->poll, 0);
+- }
+- rcu_read_unlock();
+-
+ mutex_unlock(&event->mmap_mutex);
+
+ poll_wait(file, &event->waitq, wait);
+@@ -3459,16 +3455,12 @@ static void ring_buffer_attach(struct pe
+ return;
+
+ spin_lock_irqsave(&rb->event_lock, flags);
+- if (!list_empty(&event->rb_entry))
+- goto unlock;
+-
+- list_add(&event->rb_entry, &rb->event_list);
+-unlock:
++ if (list_empty(&event->rb_entry))
++ list_add(&event->rb_entry, &rb->event_list);
+ spin_unlock_irqrestore(&rb->event_lock, flags);
+ }
+
+-static void ring_buffer_detach(struct perf_event *event,
+- struct ring_buffer *rb)
++static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
+ {
+ unsigned long flags;
+
+@@ -3487,13 +3479,10 @@ static void ring_buffer_wakeup(struct pe
+
+ rcu_read_lock();
+ rb = rcu_dereference(event->rb);
+- if (!rb)
+- goto unlock;
+-
+- list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
+- wake_up_all(&event->waitq);
+-
+-unlock:
++ if (rb) {
++ list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
++ wake_up_all(&event->waitq);
++ }
+ rcu_read_unlock();
+ }
+
+@@ -3520,23 +3509,14 @@ static struct ring_buffer *ring_buffer_g
+ return rb;
+ }
+
+-static bool ring_buffer_put(struct ring_buffer *rb)
++static void ring_buffer_put(struct ring_buffer *rb)
+ {
+- struct perf_event *event, *n;
+- unsigned long flags;
+-
+ if (!atomic_dec_and_test(&rb->refcount))
+- return false;
++ return;
+
+- spin_lock_irqsave(&rb->event_lock, flags);
+- list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
+- list_del_init(&event->rb_entry);
+- wake_up_all(&event->waitq);
+- }
+- spin_unlock_irqrestore(&rb->event_lock, flags);
++ WARN_ON_ONCE(!list_empty(&rb->event_list));
+
+ call_rcu(&rb->rcu_head, rb_free_rcu);
+- return true;
+ }
+
+ static void perf_mmap_open(struct vm_area_struct *vma)
+@@ -3544,28 +3524,100 @@ static void perf_mmap_open(struct vm_are
+ struct perf_event *event = vma->vm_file->private_data;
+
+ atomic_inc(&event->mmap_count);
++ atomic_inc(&event->rb->mmap_count);
+ }
+
++/*
++ * A buffer can be mmap()ed multiple times; either directly through the same
++ * event, or through other events by use of perf_event_set_output().
++ *
++ * In order to undo the VM accounting done by perf_mmap() we need to destroy
++ * the buffer here, where we still have a VM context. This means we need
++ * to detach all events redirecting to us.
++ */
+ static void perf_mmap_close(struct vm_area_struct *vma)
+ {
+ struct perf_event *event = vma->vm_file->private_data;
+
+- if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
+- struct ring_buffer *rb = event->rb;
+- struct user_struct *mmap_user = rb->mmap_user;
+- int mmap_locked = rb->mmap_locked;
+- unsigned long size = perf_data_size(rb);
++ struct ring_buffer *rb = event->rb;
++ struct user_struct *mmap_user = rb->mmap_user;
++ int mmap_locked = rb->mmap_locked;
++ unsigned long size = perf_data_size(rb);
+
+- rcu_assign_pointer(event->rb, NULL);
+- ring_buffer_detach(event, rb);
+- mutex_unlock(&event->mmap_mutex);
++ atomic_dec(&rb->mmap_count);
++
++ if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
++ return;
++
++ /* Detach current event from the buffer. */
++ rcu_assign_pointer(event->rb, NULL);
++ ring_buffer_detach(event, rb);
++ mutex_unlock(&event->mmap_mutex);
++
++ /* If there's still other mmap()s of this buffer, we're done. */
++ if (atomic_read(&rb->mmap_count)) {
++ ring_buffer_put(rb); /* can't be last */
++ return;
++ }
+
+- if (ring_buffer_put(rb)) {
+- atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
+- vma->vm_mm->pinned_vm -= mmap_locked;
+- free_uid(mmap_user);
++ /*
++ * No other mmap()s, detach from all other events that might redirect
++ * into the now unreachable buffer. Somewhat complicated by the
++ * fact that rb::event_lock otherwise nests inside mmap_mutex.
++ */
++again:
++ rcu_read_lock();
++ list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
++ if (!atomic_long_inc_not_zero(&event->refcount)) {
++ /*
++ * This event is en-route to free_event() which will
++ * detach it and remove it from the list.
++ */
++ continue;
+ }
++ rcu_read_unlock();
++
++ mutex_lock(&event->mmap_mutex);
++ /*
++ * Check we didn't race with perf_event_set_output() which can
++ * swizzle the rb from under us while we were waiting to
++ * acquire mmap_mutex.
++ *
++ * If we find a different rb; ignore this event, a next
++ * iteration will no longer find it on the list. We have to
++ * still restart the iteration to make sure we're not now
++ * iterating the wrong list.
++ */
++ if (event->rb == rb) {
++ rcu_assign_pointer(event->rb, NULL);
++ ring_buffer_detach(event, rb);
++ ring_buffer_put(rb); /* can't be last, we still have one */
++ }
++ mutex_unlock(&event->mmap_mutex);
++ put_event(event);
++
++ /*
++ * Restart the iteration; either we're on the wrong list or
++ * destroyed its integrity by doing a deletion.
++ */
++ goto again;
+ }
++ rcu_read_unlock();
++
++ /*
++ * It could be there's still a few 0-ref events on the list; they'll
++ * get cleaned up by free_event() -- they'll also still have their
++ * ref on the rb and will free it whenever they are done with it.
++ *
++ * Aside from that, this buffer is 'fully' detached and unmapped,
++ * undo the VM accounting.
++ */
++
++ atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
++ vma->vm_mm->pinned_vm -= mmap_locked;
++ free_uid(mmap_user);
++
++ ring_buffer_put(rb); /* could be last */
+ }
+
+ static const struct vm_operations_struct perf_mmap_vmops = {
+@@ -3615,10 +3667,24 @@ static int perf_mmap(struct file *file,
+ return -EINVAL;
+
+ WARN_ON_ONCE(event->ctx->parent_ctx);
++again:
+ mutex_lock(&event->mmap_mutex);
+ if (event->rb) {
+- if (event->rb->nr_pages != nr_pages)
++ if (event->rb->nr_pages != nr_pages) {
+ ret = -EINVAL;
++ goto unlock;
++ }
++
++ if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
++ /*
++ * Raced against perf_mmap_close() through
++ * perf_event_set_output(). Try again, hope for better
++ * luck.
++ */
++ mutex_unlock(&event->mmap_mutex);
++ goto again;
++ }
++
+ goto unlock;
+ }
+
+@@ -3660,12 +3726,14 @@ static int perf_mmap(struct file *file,
+ goto unlock;
+ }
+
++ atomic_set(&rb->mmap_count, 1);
+ rb->mmap_locked = extra;
+ rb->mmap_user = get_current_user();
+
+ atomic_long_add(user_extra, &user->locked_vm);
+ vma->vm_mm->pinned_vm += extra;
+
++ ring_buffer_attach(event, rb);
+ rcu_assign_pointer(event->rb, rb);
+
+ perf_event_update_userpage(event);
+@@ -3675,6 +3743,10 @@ unlock:
+ atomic_inc(&event->mmap_count);
+ mutex_unlock(&event->mmap_mutex);
+
++ /*
++ * Since pinned accounting is per vm we cannot allow fork() to copy our
++ * vma.
++ */
+ vma->vm_flags |= VM_DONTCOPY | VM_RESERVED;
+ vma->vm_ops = &perf_mmap_vmops;
+
+@@ -6164,6 +6236,8 @@ set:
+ if (atomic_read(&event->mmap_count))
+ goto unlock;
+
++ old_rb = event->rb;
++
+ if (output_event) {
+ /* get the rb we want to redirect to */
+ rb = ring_buffer_get(output_event);
+@@ -6171,16 +6245,28 @@ set:
+ goto unlock;
+ }
+
+- old_rb = event->rb;
+- rcu_assign_pointer(event->rb, rb);
+ if (old_rb)
+ ring_buffer_detach(event, old_rb);
++
++ if (rb)
++ ring_buffer_attach(event, rb);
++
++ rcu_assign_pointer(event->rb, rb);
++
++ if (old_rb) {
++ ring_buffer_put(old_rb);
++ /*
++ * Since we detached before setting the new rb, so that we
++ * could attach the new rb, we could have missed a wakeup.
++ * Provide it now.
++ */
++ wake_up_all(&event->waitq);
++ }
++
+ ret = 0;
+ unlock:
+ mutex_unlock(&event->mmap_mutex);
+
+- if (old_rb)
+- ring_buffer_put(old_rb);
+ out:
+ return ret;
+ }
+--- a/kernel/events/internal.h
++++ b/kernel/events/internal.h
+@@ -30,7 +30,8 @@ struct ring_buffer {
+ spinlock_t event_lock;
+ struct list_head event_list;
+
+- int mmap_locked;
++ atomic_t mmap_count;
++ unsigned long mmap_locked;
+ struct user_struct *mmap_user;
+
+ struct perf_event_mmap_page *user_page;
--- /dev/null
+From 26cb63ad11e04047a64309362674bcbbd6a6f246 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 28 May 2013 10:55:48 +0200
+Subject: perf: Fix perf mmap bugs
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 26cb63ad11e04047a64309362674bcbbd6a6f246 upstream.
+
+Vince reported a problem found by his perf specific trinity
+fuzzer.
+
+Al noticed 2 problems with perf's mmap():
+
+ - it has issues against fork() since we use vma->vm_mm for accounting.
+ - it has an rb refcount leak on double mmap().
+
+We fix the issues against fork() by using VM_DONTCOPY; I don't
+think there's code out there that uses this; we didn't hear
+about weird accounting problems/crashes. If we do need this to
+work, the previously proposed VM_PINNED could make this work.
+
+Aside from the rb reference leak spotted by Al, Vince's example
+prog was indeed doing a double mmap() through the use of
+perf_event_set_output().
+
+This exposes another problem, since we now have 2 events with
+one buffer, the accounting gets screwy because we account per
+event. Fix this by making the buffer responsible for its own
+accounting.
+
+[Backporting for 3.4-stable.
+VM_RESERVED flag was replaced with pair 'VM_DONTEXPAND | VM_DONTDUMP' in
+314e51b9 since 3.7.0-rc1, and 314e51b9 comes from a big patchset, we didn't
+backport the patchset, so I restored 'VM_DNOTEXPAND | VM_DONTDUMP' as before:
+- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
++ vma->vm_flags |= VM_DONTCOPY | VM_RESERVED;
+ -- zliu]
+
+Reported-by: Vince Weaver <vincent.weaver@maine.edu>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+Link: http://lkml.kernel.org/r/20130528085548.GA12193@twins.programming.kicks-ass.net
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Zhouping Liu <zliu@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/perf_event.h | 3 +--
+ kernel/events/core.c | 37 ++++++++++++++++++++-----------------
+ kernel/events/internal.h | 3 +++
+ 3 files changed, 24 insertions(+), 19 deletions(-)
+
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -950,8 +950,7 @@ struct perf_event {
+ /* mmap bits */
+ struct mutex mmap_mutex;
+ atomic_t mmap_count;
+- int mmap_locked;
+- struct user_struct *mmap_user;
++
+ struct ring_buffer *rb;
+ struct list_head rb_entry;
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -2848,7 +2848,7 @@ static void free_event_rcu(struct rcu_he
+ kfree(event);
+ }
+
+-static void ring_buffer_put(struct ring_buffer *rb);
++static bool ring_buffer_put(struct ring_buffer *rb);
+
+ static void free_event(struct perf_event *event)
+ {
+@@ -3520,13 +3520,13 @@ static struct ring_buffer *ring_buffer_g
+ return rb;
+ }
+
+-static void ring_buffer_put(struct ring_buffer *rb)
++static bool ring_buffer_put(struct ring_buffer *rb)
+ {
+ struct perf_event *event, *n;
+ unsigned long flags;
+
+ if (!atomic_dec_and_test(&rb->refcount))
+- return;
++ return false;
+
+ spin_lock_irqsave(&rb->event_lock, flags);
+ list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
+@@ -3536,6 +3536,7 @@ static void ring_buffer_put(struct ring_
+ spin_unlock_irqrestore(&rb->event_lock, flags);
+
+ call_rcu(&rb->rcu_head, rb_free_rcu);
++ return true;
+ }
+
+ static void perf_mmap_open(struct vm_area_struct *vma)
+@@ -3550,18 +3551,20 @@ static void perf_mmap_close(struct vm_ar
+ struct perf_event *event = vma->vm_file->private_data;
+
+ if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
+- unsigned long size = perf_data_size(event->rb);
+- struct user_struct *user = event->mmap_user;
+ struct ring_buffer *rb = event->rb;
++ struct user_struct *mmap_user = rb->mmap_user;
++ int mmap_locked = rb->mmap_locked;
++ unsigned long size = perf_data_size(rb);
+
+- atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
+- vma->vm_mm->pinned_vm -= event->mmap_locked;
+ rcu_assign_pointer(event->rb, NULL);
+ ring_buffer_detach(event, rb);
+ mutex_unlock(&event->mmap_mutex);
+
+- ring_buffer_put(rb);
+- free_uid(user);
++ if (ring_buffer_put(rb)) {
++ atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
++ vma->vm_mm->pinned_vm -= mmap_locked;
++ free_uid(mmap_user);
++ }
+ }
+ }
+
+@@ -3614,9 +3617,7 @@ static int perf_mmap(struct file *file,
+ WARN_ON_ONCE(event->ctx->parent_ctx);
+ mutex_lock(&event->mmap_mutex);
+ if (event->rb) {
+- if (event->rb->nr_pages == nr_pages)
+- atomic_inc(&event->rb->refcount);
+- else
++ if (event->rb->nr_pages != nr_pages)
+ ret = -EINVAL;
+ goto unlock;
+ }
+@@ -3658,12 +3659,14 @@ static int perf_mmap(struct file *file,
+ ret = -ENOMEM;
+ goto unlock;
+ }
+- rcu_assign_pointer(event->rb, rb);
++
++ rb->mmap_locked = extra;
++ rb->mmap_user = get_current_user();
+
+ atomic_long_add(user_extra, &user->locked_vm);
+- event->mmap_locked = extra;
+- event->mmap_user = get_current_user();
+- vma->vm_mm->pinned_vm += event->mmap_locked;
++ vma->vm_mm->pinned_vm += extra;
++
++ rcu_assign_pointer(event->rb, rb);
+
+ perf_event_update_userpage(event);
+
+@@ -3672,7 +3675,7 @@ unlock:
+ atomic_inc(&event->mmap_count);
+ mutex_unlock(&event->mmap_mutex);
+
+- vma->vm_flags |= VM_RESERVED;
++ vma->vm_flags |= VM_DONTCOPY | VM_RESERVED;
+ vma->vm_ops = &perf_mmap_vmops;
+
+ return ret;
+--- a/kernel/events/internal.h
++++ b/kernel/events/internal.h
+@@ -30,6 +30,9 @@ struct ring_buffer {
+ spinlock_t event_lock;
+ struct list_head event_list;
+
++ int mmap_locked;
++ struct user_struct *mmap_user;
++
+ struct perf_event_mmap_page *user_page;
+ void *data_pages[0];
+ };