--- /dev/null
+From 8fff105e13041e49b82f92eef034f363a6b1c071 Mon Sep 17 00:00:00 2001
+From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
+Date: Tue, 17 Mar 2015 18:14:59 +0000
+Subject: arm64: perf: reject groups spanning multiple HW PMUs
+
+From: Suzuki K. Poulose <suzuki.poulose@arm.com>
+
+commit 8fff105e13041e49b82f92eef034f363a6b1c071 upstream.
+
+The perf core implicitly rejects events spanning multiple HW PMUs, as in
+these cases the event->ctx will differ. However this validation is
+performed after pmu::event_init() is called in perf_init_event(), and
+thus pmu::event_init() may be called with a group leader from a
+different HW PMU.
+
+The ARM64 PMU driver does not take this fact into account, and when
+validating groups assumes that it can call to_arm_pmu(event->pmu) for
+any HW event. When the event in question is from another HW PMU this is
+wrong, and results in dereferencing garbage.
+
+This patch updates the ARM64 PMU driver to first test for and reject
+events from other PMUs, moving the to_arm_pmu and related logic after
+this test. Fixes a crash triggered by perf_fuzzer on Linux-4.0-rc2, with
+a CCI PMU present:
+
+Bad mode in Synchronous Abort handler detected, code 0x86000006 -- IABT (current EL)
+CPU: 0 PID: 1371 Comm: perf_fuzzer Not tainted 3.19.0+ #249
+Hardware name: V2F-1XV7 Cortex-A53x2 SMM (DT)
+task: ffffffc07c73a280 ti: ffffffc07b0a0000 task.ti: ffffffc07b0a0000
+PC is at 0x0
+LR is at validate_event+0x90/0xa8
+pc : [<0000000000000000>] lr : [<ffffffc000090228>] pstate: 00000145
+sp : ffffffc07b0a3ba0
+
+[< (null)>] (null)
+[<ffffffc0000907d8>] armpmu_event_init+0x174/0x3cc
+[<ffffffc00015d870>] perf_try_init_event+0x34/0x70
+[<ffffffc000164094>] perf_init_event+0xe0/0x10c
+[<ffffffc000164348>] perf_event_alloc+0x288/0x358
+[<ffffffc000164c5c>] SyS_perf_event_open+0x464/0x98c
+Code: bad PC value
+
+Also cleans up the code to use the arm_pmu only when we know
+that we are dealing with an arm pmu event.
+
+Cc: Will Deacon <will.deacon@arm.com>
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Acked-by: Peter Ziljstra (Intel) <peterz@infradead.org>
+Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/perf_event.c | 21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+--- a/arch/arm64/kernel/perf_event.c
++++ b/arch/arm64/kernel/perf_event.c
+@@ -316,22 +316,31 @@ out:
+ }
+
+ static int
+-validate_event(struct pmu_hw_events *hw_events,
+- struct perf_event *event)
++validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
++ struct perf_event *event)
+ {
+- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
++ struct arm_pmu *armpmu;
+ struct hw_perf_event fake_event = event->hw;
+ struct pmu *leader_pmu = event->group_leader->pmu;
+
+ if (is_software_event(event))
+ return 1;
+
++ /*
++ * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
++ * core perf code won't check that the pmu->ctx == leader->ctx
++ * until after pmu->event_init(event).
++ */
++ if (event->pmu != pmu)
++ return 0;
++
+ if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
+ return 1;
+
+ if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+ return 1;
+
++ armpmu = to_arm_pmu(event->pmu);
+ return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
+ }
+
+@@ -349,15 +358,15 @@ validate_group(struct perf_event *event)
+ memset(fake_used_mask, 0, sizeof(fake_used_mask));
+ fake_pmu.used_mask = fake_used_mask;
+
+- if (!validate_event(&fake_pmu, leader))
++ if (!validate_event(event->pmu, &fake_pmu, leader))
+ return -EINVAL;
+
+ list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+- if (!validate_event(&fake_pmu, sibling))
++ if (!validate_event(event->pmu, &fake_pmu, sibling))
+ return -EINVAL;
+ }
+
+- if (!validate_event(&fake_pmu, event))
++ if (!validate_event(event->pmu, &fake_pmu, event))
+ return -EINVAL;
+
+ return 0;
--- /dev/null
+From 0d62e9dd6da45bbf0f33a8617afc5fe774c8f45f Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Wed, 5 Aug 2015 12:54:46 +0100
+Subject: ASN.1: Fix non-match detection failure on data overrun
+
+From: David Howells <dhowells@redhat.com>
+
+commit 0d62e9dd6da45bbf0f33a8617afc5fe774c8f45f upstream.
+
+If the ASN.1 decoder is asked to parse a sequence of objects, non-optional
+matches get skipped if there's no more data to be had rather than a
+data-overrun error being reported.
+
+This is due to the code segment that decides whether to skip optional
+matches (ie. matches that could get ignored because an element is marked
+OPTIONAL in the grammar) due to a lack of data also skips non-optional
+elements if the data pointer has reached the end of the buffer.
+
+This can be tested with the data decoder for the new RSA akcipher algorithm
+that takes three non-optional integers. Currently, it skips the last
+integer if there is insufficient data.
+
+Without the fix, #defining DEBUG in asn1_decoder.c will show something
+like:
+
+ next_op: pc=0/13 dp=0/270 C=0 J=0
+ - match? 30 30 00
+ - TAG: 30 266 CONS
+ next_op: pc=2/13 dp=4/270 C=1 J=0
+ - match? 02 02 00
+ - TAG: 02 257
+ - LEAF: 257
+ next_op: pc=5/13 dp=265/270 C=1 J=0
+ - match? 02 02 00
+ - TAG: 02 3
+ - LEAF: 3
+ next_op: pc=8/13 dp=270/270 C=1 J=0
+ next_op: pc=11/13 dp=270/270 C=1 J=0
+ - end cons t=4 dp=270 l=270/270
+
+The next_op line for pc=8/13 should be followed by a match line.
+
+This is not exploitable for X.509 certificates by means of shortening the
+message and fixing up the ASN.1 CONS tags because:
+
+ (1) The relevant records being built up are cleared before use.
+
+ (2) If the message is shortened sufficiently to remove the public key, the
+ ASN.1 parse of the RSA key will fail quickly due to a lack of data.
+
+ (3) Extracted signature data is either turned into MPIs (which cope with a
+ 0 length) or is simpler integers specifying algoritms and suchlike
+ (which can validly be 0); and
+
+ (4) The AKID and SKID extensions are optional and their removal is handled
+ without risking passing a NULL to asymmetric_key_generate_id().
+
+ (5) If the certificate is truncated sufficiently to remove the subject,
+ issuer or serialNumber then the ASN.1 decoder will fail with a 'Cons
+ stack underflow' return.
+
+This is not exploitable for PKCS#7 messages by means of removal of elements
+from such a message from the tail end of a sequence:
+
+ (1) Any shortened X.509 certs embedded in the PKCS#7 message are survivable
+ as detailed above.
+
+ (2) The message digest content isn't used if it shows a NULL pointer,
+ similarly, the authattrs aren't used if that shows a NULL pointer.
+
+ (3) A missing signature results in a NULL MPI - which the MPI routines deal
+ with.
+
+ (4) If data is NULL, it is expected that the message has detached content and
+ that is handled appropriately.
+
+ (5) If the serialNumber is excised, the unconditional action associated
+ with it will pick up the containing SEQUENCE instead, so no NULL
+ pointer will be seen here.
+
+ If both the issuer and the serialNumber are excised, the ASN.1 decode
+ will fail with an 'Unexpected tag' return.
+
+ In either case, there's no way to get to asymmetric_key_generate_id()
+ with a NULL pointer.
+
+ (6) Other fields are decoded to simple integers. Shortening the message
+ to omit an algorithm ID field will cause checks on this to fail early
+ in the verification process.
+
+
+This can also be tested by snipping objects off of the end of the ASN.1 stream
+such that mandatory tags are removed - or even from the end of internal
+SEQUENCEs. If any mandatory tag is missing, the error EBADMSG *should* be
+produced. Without this patch ERANGE or ENOPKG might be produced or the parse
+may apparently succeed, perhaps with ENOKEY or EKEYREJECTED being produced
+later, depending on what gets snipped.
+
+Just snipping off the final BIT_STRING or OCTET_STRING from either sample
+should be a start since both are mandatory and neither will cause an EBADMSG
+without the patches
+
+Reported-by: Marcel Holtmann <marcel@holtmann.org>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Tested-by: Marcel Holtmann <marcel@holtmann.org>
+Reviewed-by: David Woodhouse <David.Woodhouse@intel.com>
+Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/asn1_decoder.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/lib/asn1_decoder.c
++++ b/lib/asn1_decoder.c
+@@ -208,9 +208,8 @@ next_op:
+ unsigned char tmp;
+
+ /* Skip conditional matches if possible */
+- if ((op & ASN1_OP_MATCH__COND &&
+- flags & FLAG_MATCHED) ||
+- dp == datalen) {
++ if ((op & ASN1_OP_MATCH__COND && flags & FLAG_MATCHED) ||
++ (op & ASN1_OP_MATCH__SKIP && dp == datalen)) {
+ pc += asn1_op_lengths[op];
+ goto next_op;
+ }
--- /dev/null
+From f63a8daa5812afef4f06c962351687e1ff9ccb2b Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 23 Jan 2015 12:24:14 +0100
+Subject: perf: Fix event->ctx locking
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit f63a8daa5812afef4f06c962351687e1ff9ccb2b upstream.
+
+There have been a few reported issues wrt. the lack of locking around
+changing event->ctx. This patch tries to address those.
+
+It avoids the whole rwsem thing; and while it appears to work, please
+give it some thought in review.
+
+What I did fail at is sensible runtime checks on the use of
+event->ctx, the RCU use makes it very hard.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/r/20150123125834.209535886@infradead.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/events/core.c | 244 +++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 207 insertions(+), 37 deletions(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -907,6 +907,77 @@ static void put_ctx(struct perf_event_co
+ }
+
+ /*
++ * Because of perf_event::ctx migration in sys_perf_event_open::move_group and
++ * perf_pmu_migrate_context() we need some magic.
++ *
++ * Those places that change perf_event::ctx will hold both
++ * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
++ *
++ * Lock ordering is by mutex address. There is one other site where
++ * perf_event_context::mutex nests and that is put_event(). But remember that
++ * that is a parent<->child context relation, and migration does not affect
++ * children, therefore these two orderings should not interact.
++ *
++ * The change in perf_event::ctx does not affect children (as claimed above)
++ * because the sys_perf_event_open() case will install a new event and break
++ * the ctx parent<->child relation, and perf_pmu_migrate_context() is only
++ * concerned with cpuctx and that doesn't have children.
++ *
++ * The places that change perf_event::ctx will issue:
++ *
++ * perf_remove_from_context();
++ * synchronize_rcu();
++ * perf_install_in_context();
++ *
++ * to affect the change. The remove_from_context() + synchronize_rcu() should
++ * quiesce the event, after which we can install it in the new location. This
++ * means that only external vectors (perf_fops, prctl) can perturb the event
++ * while in transit. Therefore all such accessors should also acquire
++ * perf_event_context::mutex to serialize against this.
++ *
++ * However; because event->ctx can change while we're waiting to acquire
++ * ctx->mutex we must be careful and use the below perf_event_ctx_lock()
++ * function.
++ *
++ * Lock order:
++ * task_struct::perf_event_mutex
++ * perf_event_context::mutex
++ * perf_event_context::lock
++ * perf_event::child_mutex;
++ * perf_event::mmap_mutex
++ * mmap_sem
++ */
++static struct perf_event_context *perf_event_ctx_lock(struct perf_event *event)
++{
++ struct perf_event_context *ctx;
++
++again:
++ rcu_read_lock();
++ ctx = ACCESS_ONCE(event->ctx);
++ if (!atomic_inc_not_zero(&ctx->refcount)) {
++ rcu_read_unlock();
++ goto again;
++ }
++ rcu_read_unlock();
++
++ mutex_lock(&ctx->mutex);
++ if (event->ctx != ctx) {
++ mutex_unlock(&ctx->mutex);
++ put_ctx(ctx);
++ goto again;
++ }
++
++ return ctx;
++}
++
++static void perf_event_ctx_unlock(struct perf_event *event,
++ struct perf_event_context *ctx)
++{
++ mutex_unlock(&ctx->mutex);
++ put_ctx(ctx);
++}
++
++/*
+ * This must be done under the ctx->lock, such as to serialize against
+ * context_equiv(), therefore we cannot call put_ctx() since that might end up
+ * calling scheduler related locks and ctx->lock nests inside those.
+@@ -1654,7 +1725,7 @@ int __perf_event_disable(void *info)
+ * is the current context on this CPU and preemption is disabled,
+ * hence we can't get into perf_event_task_sched_out for this context.
+ */
+-void perf_event_disable(struct perf_event *event)
++static void _perf_event_disable(struct perf_event *event)
+ {
+ struct perf_event_context *ctx = event->ctx;
+ struct task_struct *task = ctx->task;
+@@ -1695,6 +1766,19 @@ retry:
+ }
+ raw_spin_unlock_irq(&ctx->lock);
+ }
++
++/*
++ * Strictly speaking kernel users cannot create groups and therefore this
++ * interface does not need the perf_event_ctx_lock() magic.
++ */
++void perf_event_disable(struct perf_event *event)
++{
++ struct perf_event_context *ctx;
++
++ ctx = perf_event_ctx_lock(event);
++ _perf_event_disable(event);
++ perf_event_ctx_unlock(event, ctx);
++}
+ EXPORT_SYMBOL_GPL(perf_event_disable);
+
+ static void perf_set_shadow_time(struct perf_event *event,
+@@ -2158,7 +2242,7 @@ unlock:
+ * perf_event_for_each_child or perf_event_for_each as described
+ * for perf_event_disable.
+ */
+-void perf_event_enable(struct perf_event *event)
++static void _perf_event_enable(struct perf_event *event)
+ {
+ struct perf_event_context *ctx = event->ctx;
+ struct task_struct *task = ctx->task;
+@@ -2214,9 +2298,21 @@ retry:
+ out:
+ raw_spin_unlock_irq(&ctx->lock);
+ }
++
++/*
++ * See perf_event_disable();
++ */
++void perf_event_enable(struct perf_event *event)
++{
++ struct perf_event_context *ctx;
++
++ ctx = perf_event_ctx_lock(event);
++ _perf_event_enable(event);
++ perf_event_ctx_unlock(event, ctx);
++}
+ EXPORT_SYMBOL_GPL(perf_event_enable);
+
+-int perf_event_refresh(struct perf_event *event, int refresh)
++static int _perf_event_refresh(struct perf_event *event, int refresh)
+ {
+ /*
+ * not supported on inherited events
+@@ -2225,10 +2321,25 @@ int perf_event_refresh(struct perf_event
+ return -EINVAL;
+
+ atomic_add(refresh, &event->event_limit);
+- perf_event_enable(event);
++ _perf_event_enable(event);
+
+ return 0;
+ }
++
++/*
++ * See perf_event_disable()
++ */
++int perf_event_refresh(struct perf_event *event, int refresh)
++{
++ struct perf_event_context *ctx;
++ int ret;
++
++ ctx = perf_event_ctx_lock(event);
++ ret = _perf_event_refresh(event, refresh);
++ perf_event_ctx_unlock(event, ctx);
++
++ return ret;
++}
+ EXPORT_SYMBOL_GPL(perf_event_refresh);
+
+ static void ctx_sched_out(struct perf_event_context *ctx,
+@@ -3421,7 +3532,16 @@ static void perf_remove_from_owner(struc
+ rcu_read_unlock();
+
+ if (owner) {
+- mutex_lock(&owner->perf_event_mutex);
++ /*
++ * If we're here through perf_event_exit_task() we're already
++ * holding ctx->mutex which would be an inversion wrt. the
++ * normal lock order.
++ *
++ * However we can safely take this lock because its the child
++ * ctx->mutex.
++ */
++ mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
++
+ /*
+ * We have to re-check the event->owner field, if it is cleared
+ * we raced with perf_event_exit_task(), acquiring the mutex
+@@ -3547,12 +3667,13 @@ static int perf_event_read_group(struct
+ u64 read_format, char __user *buf)
+ {
+ struct perf_event *leader = event->group_leader, *sub;
+- int n = 0, size = 0, ret = -EFAULT;
+ struct perf_event_context *ctx = leader->ctx;
+- u64 values[5];
++ int n = 0, size = 0, ret;
+ u64 count, enabled, running;
++ u64 values[5];
++
++ lockdep_assert_held(&ctx->mutex);
+
+- mutex_lock(&ctx->mutex);
+ count = perf_event_read_value(leader, &enabled, &running);
+
+ values[n++] = 1 + leader->nr_siblings;
+@@ -3567,7 +3688,7 @@ static int perf_event_read_group(struct
+ size = n * sizeof(u64);
+
+ if (copy_to_user(buf, values, size))
+- goto unlock;
++ return -EFAULT;
+
+ ret = size;
+
+@@ -3581,14 +3702,11 @@ static int perf_event_read_group(struct
+ size = n * sizeof(u64);
+
+ if (copy_to_user(buf + ret, values, size)) {
+- ret = -EFAULT;
+- goto unlock;
++ return -EFAULT;
+ }
+
+ ret += size;
+ }
+-unlock:
+- mutex_unlock(&ctx->mutex);
+
+ return ret;
+ }
+@@ -3660,8 +3778,14 @@ static ssize_t
+ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+ {
+ struct perf_event *event = file->private_data;
++ struct perf_event_context *ctx;
++ int ret;
+
+- return perf_read_hw(event, buf, count);
++ ctx = perf_event_ctx_lock(event);
++ ret = perf_read_hw(event, buf, count);
++ perf_event_ctx_unlock(event, ctx);
++
++ return ret;
+ }
+
+ static unsigned int perf_poll(struct file *file, poll_table *wait)
+@@ -3687,7 +3811,7 @@ static unsigned int perf_poll(struct fil
+ return events;
+ }
+
+-static void perf_event_reset(struct perf_event *event)
++static void _perf_event_reset(struct perf_event *event)
+ {
+ (void)perf_event_read(event);
+ local64_set(&event->count, 0);
+@@ -3706,6 +3830,7 @@ static void perf_event_for_each_child(st
+ struct perf_event *child;
+
+ WARN_ON_ONCE(event->ctx->parent_ctx);
++
+ mutex_lock(&event->child_mutex);
+ func(event);
+ list_for_each_entry(child, &event->child_list, child_list)
+@@ -3719,14 +3844,13 @@ static void perf_event_for_each(struct p
+ struct perf_event_context *ctx = event->ctx;
+ struct perf_event *sibling;
+
+- WARN_ON_ONCE(ctx->parent_ctx);
+- mutex_lock(&ctx->mutex);
++ lockdep_assert_held(&ctx->mutex);
++
+ event = event->group_leader;
+
+ perf_event_for_each_child(event, func);
+ list_for_each_entry(sibling, &event->sibling_list, group_entry)
+ perf_event_for_each_child(sibling, func);
+- mutex_unlock(&ctx->mutex);
+ }
+
+ struct period_event {
+@@ -3831,25 +3955,24 @@ static int perf_event_set_output(struct
+ struct perf_event *output_event);
+ static int perf_event_set_filter(struct perf_event *event, void __user *arg);
+
+-static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
+ {
+- struct perf_event *event = file->private_data;
+ void (*func)(struct perf_event *);
+ u32 flags = arg;
+
+ switch (cmd) {
+ case PERF_EVENT_IOC_ENABLE:
+- func = perf_event_enable;
++ func = _perf_event_enable;
+ break;
+ case PERF_EVENT_IOC_DISABLE:
+- func = perf_event_disable;
++ func = _perf_event_disable;
+ break;
+ case PERF_EVENT_IOC_RESET:
+- func = perf_event_reset;
++ func = _perf_event_reset;
+ break;
+
+ case PERF_EVENT_IOC_REFRESH:
+- return perf_event_refresh(event, arg);
++ return _perf_event_refresh(event, arg);
+
+ case PERF_EVENT_IOC_PERIOD:
+ return perf_event_period(event, (u64 __user *)arg);
+@@ -3896,6 +4019,19 @@ static long perf_ioctl(struct file *file
+ return 0;
+ }
+
++static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++ struct perf_event *event = file->private_data;
++ struct perf_event_context *ctx;
++ long ret;
++
++ ctx = perf_event_ctx_lock(event);
++ ret = _perf_ioctl(event, cmd, arg);
++ perf_event_ctx_unlock(event, ctx);
++
++ return ret;
++}
++
+ #ifdef CONFIG_COMPAT
+ static long perf_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+@@ -3918,11 +4054,15 @@ static long perf_compat_ioctl(struct fil
+
+ int perf_event_task_enable(void)
+ {
++ struct perf_event_context *ctx;
+ struct perf_event *event;
+
+ mutex_lock(¤t->perf_event_mutex);
+- list_for_each_entry(event, ¤t->perf_event_list, owner_entry)
+- perf_event_for_each_child(event, perf_event_enable);
++ list_for_each_entry(event, ¤t->perf_event_list, owner_entry) {
++ ctx = perf_event_ctx_lock(event);
++ perf_event_for_each_child(event, _perf_event_enable);
++ perf_event_ctx_unlock(event, ctx);
++ }
+ mutex_unlock(¤t->perf_event_mutex);
+
+ return 0;
+@@ -3930,11 +4070,15 @@ int perf_event_task_enable(void)
+
+ int perf_event_task_disable(void)
+ {
++ struct perf_event_context *ctx;
+ struct perf_event *event;
+
+ mutex_lock(¤t->perf_event_mutex);
+- list_for_each_entry(event, ¤t->perf_event_list, owner_entry)
+- perf_event_for_each_child(event, perf_event_disable);
++ list_for_each_entry(event, ¤t->perf_event_list, owner_entry) {
++ ctx = perf_event_ctx_lock(event);
++ perf_event_for_each_child(event, _perf_event_disable);
++ perf_event_ctx_unlock(event, ctx);
++ }
+ mutex_unlock(¤t->perf_event_mutex);
+
+ return 0;
+@@ -7271,6 +7415,15 @@ out:
+ return ret;
+ }
+
++static void mutex_lock_double(struct mutex *a, struct mutex *b)
++{
++ if (b < a)
++ swap(a, b);
++
++ mutex_lock(a);
++ mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
++}
++
+ /**
+ * sys_perf_event_open - open a performance event, associate it to a task/cpu
+ *
+@@ -7286,7 +7439,7 @@ SYSCALL_DEFINE5(perf_event_open,
+ struct perf_event *group_leader = NULL, *output_event = NULL;
+ struct perf_event *event, *sibling;
+ struct perf_event_attr attr;
+- struct perf_event_context *ctx;
++ struct perf_event_context *ctx, *uninitialized_var(gctx);
+ struct file *event_file = NULL;
+ struct fd group = {NULL, 0};
+ struct task_struct *task = NULL;
+@@ -7484,9 +7637,14 @@ SYSCALL_DEFINE5(perf_event_open,
+ }
+
+ if (move_group) {
+- struct perf_event_context *gctx = group_leader->ctx;
++ gctx = group_leader->ctx;
++
++ /*
++ * See perf_event_ctx_lock() for comments on the details
++ * of swizzling perf_event::ctx.
++ */
++ mutex_lock_double(&gctx->mutex, &ctx->mutex);
+
+- mutex_lock(&gctx->mutex);
+ perf_remove_from_context(group_leader, false);
+
+ /*
+@@ -7501,15 +7659,19 @@ SYSCALL_DEFINE5(perf_event_open,
+ perf_event__state_init(sibling);
+ put_ctx(gctx);
+ }
+- mutex_unlock(&gctx->mutex);
+- put_ctx(gctx);
++ } else {
++ mutex_lock(&ctx->mutex);
+ }
+
+ WARN_ON_ONCE(ctx->parent_ctx);
+- mutex_lock(&ctx->mutex);
+
+ if (move_group) {
++ /*
++ * Wait for everybody to stop referencing the events through
++ * the old lists, before installing it on new lists.
++ */
+ synchronize_rcu();
++
+ perf_install_in_context(ctx, group_leader, group_leader->cpu);
+ get_ctx(ctx);
+ list_for_each_entry(sibling, &group_leader->sibling_list,
+@@ -7521,6 +7683,11 @@ SYSCALL_DEFINE5(perf_event_open,
+
+ perf_install_in_context(ctx, event, event->cpu);
+ perf_unpin_context(ctx);
++
++ if (move_group) {
++ mutex_unlock(&gctx->mutex);
++ put_ctx(gctx);
++ }
+ mutex_unlock(&ctx->mutex);
+
+ put_online_cpus();
+@@ -7628,7 +7795,11 @@ void perf_pmu_migrate_context(struct pmu
+ src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
+ dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
+
+- mutex_lock(&src_ctx->mutex);
++ /*
++ * See perf_event_ctx_lock() for comments on the details
++ * of swizzling perf_event::ctx.
++ */
++ mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
+ list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
+ event_entry) {
+ perf_remove_from_context(event, false);
+@@ -7636,11 +7807,9 @@ void perf_pmu_migrate_context(struct pmu
+ put_ctx(src_ctx);
+ list_add(&event->migrate_entry, &events);
+ }
+- mutex_unlock(&src_ctx->mutex);
+
+ synchronize_rcu();
+
+- mutex_lock(&dst_ctx->mutex);
+ list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+ list_del(&event->migrate_entry);
+ if (event->state >= PERF_EVENT_STATE_OFF)
+@@ -7650,6 +7819,7 @@ void perf_pmu_migrate_context(struct pmu
+ get_ctx(dst_ctx);
+ }
+ mutex_unlock(&dst_ctx->mutex);
++ mutex_unlock(&src_ctx->mutex);
+ }
+ EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
+
--- /dev/null
+From 12ca6ad2e3a896256f086497a7c7406a547ee373 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 15 Dec 2015 13:49:05 +0100
+Subject: perf: Fix race in swevent hash
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 12ca6ad2e3a896256f086497a7c7406a547ee373 upstream.
+
+There's a race on CPU unplug where we free the swevent hash array
+while it can still have events on. This will result in a
+use-after-free which is BAD.
+
+Simply do not free the hash array on unplug. This leaves the thing
+around and no use-after-free takes place.
+
+When the last swevent dies, we do a for_each_possible_cpu() iteration
+anyway to clean these up, at which time we'll free it, so no leakage
+will occur.
+
+Reported-by: Sasha Levin <sasha.levin@oracle.com>
+Tested-by: Sasha Levin <sasha.levin@oracle.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Frederic Weisbecker <fweisbec@gmail.com>
+Cc: Jiri Olsa <jolsa@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Stephane Eranian <eranian@google.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/events/core.c | 20 +-------------------
+ 1 file changed, 1 insertion(+), 19 deletions(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -5851,9 +5851,6 @@ struct swevent_htable {
+
+ /* Recursion avoidance in each contexts */
+ int recursion[PERF_NR_CONTEXTS];
+-
+- /* Keeps track of cpu being initialized/exited */
+- bool online;
+ };
+
+ static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
+@@ -6111,14 +6108,8 @@ static int perf_swevent_add(struct perf_
+ hwc->state = !(flags & PERF_EF_START);
+
+ head = find_swevent_head(swhash, event);
+- if (!head) {
+- /*
+- * We can race with cpu hotplug code. Do not
+- * WARN if the cpu just got unplugged.
+- */
+- WARN_ON_ONCE(swhash->online);
++ if (WARN_ON_ONCE(!head))
+ return -EINVAL;
+- }
+
+ hlist_add_head_rcu(&event->hlist_entry, head);
+
+@@ -6185,7 +6176,6 @@ static int swevent_hlist_get_cpu(struct
+ int err = 0;
+
+ mutex_lock(&swhash->hlist_mutex);
+-
+ if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
+ struct swevent_hlist *hlist;
+
+@@ -8342,7 +8332,6 @@ static void perf_event_init_cpu(int cpu)
+ struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
+
+ mutex_lock(&swhash->hlist_mutex);
+- swhash->online = true;
+ if (swhash->hlist_refcount > 0) {
+ struct swevent_hlist *hlist;
+
+@@ -8395,14 +8384,7 @@ static void perf_event_exit_cpu_context(
+
+ static void perf_event_exit_cpu(int cpu)
+ {
+- struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
+-
+ perf_event_exit_cpu_context(cpu);
+-
+- mutex_lock(&swhash->hlist_mutex);
+- swhash->online = false;
+- swevent_hlist_release(swhash);
+- mutex_unlock(&swhash->hlist_mutex);
+ }
+ #else
+ static inline void perf_event_exit_cpu(int cpu) { }