--- /dev/null
+From d53ca5639d9100477bd8f6a9d0ef65e1cf638f7b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 May 2020 17:03:18 +0900
+Subject: kretprobe: Prevent triggering kretprobe from within kprobe_flush_task
+
+From: Jiri Olsa <jolsa@redhat.com>
+
+[ Upstream commit 9b38cc704e844e41d9cf74e647bff1d249512cb3 ]
+
+Ziqian reported lockup when adding retprobe on _raw_spin_lock_irqsave.
+My test was also able to trigger lockdep output:
+
+ ============================================
+ WARNING: possible recursive locking detected
+ 5.6.0-rc6+ #6 Not tainted
+ --------------------------------------------
+ sched-messaging/2767 is trying to acquire lock:
+ ffffffff9a492798 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_hash_lock+0x52/0xa0
+
+ but task is already holding lock:
+ ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50
+
+ other info that might help us debug this:
+ Possible unsafe locking scenario:
+
+ CPU0
+ ----
+ lock(&(kretprobe_table_locks[i].lock));
+ lock(&(kretprobe_table_locks[i].lock));
+
+ *** DEADLOCK ***
+
+ May be due to missing lock nesting notation
+
+ 1 lock held by sched-messaging/2767:
+ #0: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50
+
+ stack backtrace:
+ CPU: 3 PID: 2767 Comm: sched-messaging Not tainted 5.6.0-rc6+ #6
+ Call Trace:
+ dump_stack+0x96/0xe0
+ __lock_acquire.cold.57+0x173/0x2b7
+ ? native_queued_spin_lock_slowpath+0x42b/0x9e0
+ ? lockdep_hardirqs_on+0x590/0x590
+ ? __lock_acquire+0xf63/0x4030
+ lock_acquire+0x15a/0x3d0
+ ? kretprobe_hash_lock+0x52/0xa0
+ _raw_spin_lock_irqsave+0x36/0x70
+ ? kretprobe_hash_lock+0x52/0xa0
+ kretprobe_hash_lock+0x52/0xa0
+ trampoline_handler+0xf8/0x940
+ ? kprobe_fault_handler+0x380/0x380
+ ? find_held_lock+0x3a/0x1c0
+ kretprobe_trampoline+0x25/0x50
+ ? lock_acquired+0x392/0xbc0
+ ? _raw_spin_lock_irqsave+0x50/0x70
+ ? __get_valid_kprobe+0x1f0/0x1f0
+ ? _raw_spin_unlock_irqrestore+0x3b/0x40
+ ? finish_task_switch+0x4b9/0x6d0
+ ? __switch_to_asm+0x34/0x70
+ ? __switch_to_asm+0x40/0x70
+
+The code within the kretprobe handler checks for probe reentrancy,
+so we won't trigger any _raw_spin_lock_irqsave probe in there.
+
+The problem is in outside kprobe_flush_task, where we call:
+
+ kprobe_flush_task
+ kretprobe_table_lock
+ raw_spin_lock_irqsave
+ _raw_spin_lock_irqsave
+
+where _raw_spin_lock_irqsave triggers the kretprobe and installs
+kretprobe_trampoline handler on _raw_spin_lock_irqsave return.
+
+The kretprobe_trampoline handler is then executed with already
+locked kretprobe_table_locks, and first thing it does is to
+lock kretprobe_table_locks ;-) the whole lockup path like:
+
+ kprobe_flush_task
+ kretprobe_table_lock
+ raw_spin_lock_irqsave
+ _raw_spin_lock_irqsave ---> probe triggered, kretprobe_trampoline installed
+
+ ---> kretprobe_table_locks locked
+
+ kretprobe_trampoline
+ trampoline_handler
+ kretprobe_hash_lock(current, &head, &flags); <--- deadlock
+
+Adding kprobe_busy_begin/end helpers that mark code with fake
+probe installed to prevent triggering of another kprobe within
+this code.
+
+Using these helpers in kprobe_flush_task, so the probe recursion
+protection check is hit and the probe is never set to prevent
+above lockup.
+
+Link: http://lkml.kernel.org/r/158927059835.27680.7011202830041561604.stgit@devnote2
+
+Fixes: ef53d9c5e4da ("kprobes: improve kretprobe scalability with hashed locking")
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: "Gustavo A . R . Silva" <gustavoars@kernel.org>
+Cc: Anders Roxell <anders.roxell@linaro.org>
+Cc: "Naveen N . Rao" <naveen.n.rao@linux.ibm.com>
+Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+Cc: David Miller <davem@davemloft.net>
+Cc: Ingo Molnar <mingo@elte.hu>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Reported-by: "Ziqian SUN (Zamir)" <zsun@redhat.com>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/kprobes/core.c | 16 +++-------------
+ include/linux/kprobes.h | 4 ++++
+ kernel/kprobes.c | 24 ++++++++++++++++++++++++
+ 3 files changed, 31 insertions(+), 13 deletions(-)
+
+diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
+index 9d7bb8de2917e..02665ffef0506 100644
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -744,16 +744,11 @@ asm(
+ NOKPROBE_SYMBOL(kretprobe_trampoline);
+ STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
+
+-static struct kprobe kretprobe_kprobe = {
+- .addr = (void *)kretprobe_trampoline,
+-};
+-
+ /*
+ * Called from kretprobe_trampoline
+ */
+ __visible __used void *trampoline_handler(struct pt_regs *regs)
+ {
+- struct kprobe_ctlblk *kcb;
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *tmp;
+@@ -763,16 +758,12 @@ __visible __used void *trampoline_handler(struct pt_regs *regs)
+ void *frame_pointer;
+ bool skipped = false;
+
+- preempt_disable();
+-
+ /*
+ * Set a dummy kprobe for avoiding kretprobe recursion.
+ * Since kretprobe never run in kprobe handler, kprobe must not
+ * be running at this point.
+ */
+- kcb = get_kprobe_ctlblk();
+- __this_cpu_write(current_kprobe, &kretprobe_kprobe);
+- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
++ kprobe_busy_begin();
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+@@ -851,7 +842,7 @@ __visible __used void *trampoline_handler(struct pt_regs *regs)
+ __this_cpu_write(current_kprobe, &ri->rp->kp);
+ ri->ret_addr = correct_ret_addr;
+ ri->rp->handler(ri, regs);
+- __this_cpu_write(current_kprobe, &kretprobe_kprobe);
++ __this_cpu_write(current_kprobe, &kprobe_busy);
+ }
+
+ recycle_rp_inst(ri, &empty_rp);
+@@ -867,8 +858,7 @@ __visible __used void *trampoline_handler(struct pt_regs *regs)
+
+ kretprobe_hash_unlock(current, &flags);
+
+- __this_cpu_write(current_kprobe, NULL);
+- preempt_enable();
++ kprobe_busy_end();
+
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
+index 520702b821340..be7a49f437ea3 100644
+--- a/include/linux/kprobes.h
++++ b/include/linux/kprobes.h
+@@ -385,6 +385,10 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void)
+ return this_cpu_ptr(&kprobe_ctlblk);
+ }
+
++extern struct kprobe kprobe_busy;
++void kprobe_busy_begin(void);
++void kprobe_busy_end(void);
++
+ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset);
+ int register_kprobe(struct kprobe *p);
+ void unregister_kprobe(struct kprobe *p);
+diff --git a/kernel/kprobes.c b/kernel/kprobes.c
+index 484670370a6a1..f2d2194b51ca3 100644
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -1218,6 +1218,26 @@ __releases(hlist_lock)
+ }
+ NOKPROBE_SYMBOL(kretprobe_table_unlock);
+
++struct kprobe kprobe_busy = {
++ .addr = (void *) get_kprobe,
++};
++
++void kprobe_busy_begin(void)
++{
++ struct kprobe_ctlblk *kcb;
++
++ preempt_disable();
++ __this_cpu_write(current_kprobe, &kprobe_busy);
++ kcb = get_kprobe_ctlblk();
++ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
++}
++
++void kprobe_busy_end(void)
++{
++ __this_cpu_write(current_kprobe, NULL);
++ preempt_enable();
++}
++
+ /*
+ * This function is called from finish_task_switch when task tk becomes dead,
+ * so that we can recycle any function-return probe instances associated
+@@ -1235,6 +1255,8 @@ void kprobe_flush_task(struct task_struct *tk)
+ /* Early boot. kretprobe_table_locks not yet initialized. */
+ return;
+
++ kprobe_busy_begin();
++
+ INIT_HLIST_HEAD(&empty_rp);
+ hash = hash_ptr(tk, KPROBE_HASH_BITS);
+ head = &kretprobe_inst_table[hash];
+@@ -1248,6 +1270,8 @@ void kprobe_flush_task(struct task_struct *tk)
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
++
++ kprobe_busy_end();
+ }
+ NOKPROBE_SYMBOL(kprobe_flush_task);
+
+--
+2.25.1
+
--- /dev/null
+From 4a1ae6014f34302f72a8df98e8005788be133e92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jun 2020 16:49:44 +0200
+Subject: net: core: device_rename: Use rwsem instead of a seqcount
+
+From: Ahmed S. Darwish <a.darwish@linutronix.de>
+
+[ Upstream commit 11d6011c2cf29f7c8181ebde6c8bc0c4d83adcd7 ]
+
+Sequence counters write paths are critical sections that must never be
+preempted, and blocking, even for CONFIG_PREEMPTION=n, is not allowed.
+
+Commit 5dbe7c178d3f ("net: fix kernel deadlock with interface rename and
+netdev name retrieval.") handled a deadlock, observed with
+CONFIG_PREEMPTION=n, where the devnet_rename seqcount read side was
+infinitely spinning: it got scheduled after the seqcount write side
+blocked inside its own critical section.
+
+To fix that deadlock, among other issues, the commit added a
+cond_resched() inside the read side section. While this will get the
+non-preemptible kernel eventually unstuck, the seqcount reader is fully
+exhausting its slice just spinning -- until TIF_NEED_RESCHED is set.
+
+The fix is also still broken: if the seqcount reader belongs to a
+real-time scheduling policy, it can spin forever and the kernel will
+livelock.
+
+Disabling preemption over the seqcount write side critical section will
+not work: inside it are a number of GFP_KERNEL allocations and mutex
+locking through the drivers/base/ :: device_rename() call chain.
+
+>From all the above, replace the seqcount with a rwsem.
+
+Fixes: 5dbe7c178d3f (net: fix kernel deadlock with interface rename and netdev name retrieval.)
+Fixes: 30e6c9fa93cf (net: devnet_rename_seq should be a seqcount)
+Fixes: c91f6df2db49 (sockopt: Change getsockopt() of SO_BINDTODEVICE to return an interface name)
+Cc: <stable@vger.kernel.org>
+Reported-by: kbuild test robot <lkp@intel.com> [ v1 missing up_read() on error exit ]
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com> [ v1 missing up_read() on error exit ]
+Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
+Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/dev.c | 40 ++++++++++++++++++----------------------
+ 1 file changed, 18 insertions(+), 22 deletions(-)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 9e4a00462f5c7..0aaa1426450fa 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -83,6 +83,7 @@
+ #include <linux/sched.h>
+ #include <linux/sched/mm.h>
+ #include <linux/mutex.h>
++#include <linux/rwsem.h>
+ #include <linux/string.h>
+ #include <linux/mm.h>
+ #include <linux/socket.h>
+@@ -194,7 +195,7 @@ static DEFINE_SPINLOCK(napi_hash_lock);
+ static unsigned int napi_gen_id = NR_CPUS;
+ static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
+
+-static seqcount_t devnet_rename_seq;
++static DECLARE_RWSEM(devnet_rename_sem);
+
+ static inline void dev_base_seq_inc(struct net *net)
+ {
+@@ -898,33 +899,28 @@ EXPORT_SYMBOL(dev_get_by_napi_id);
+ * @net: network namespace
+ * @name: a pointer to the buffer where the name will be stored.
+ * @ifindex: the ifindex of the interface to get the name from.
+- *
+- * The use of raw_seqcount_begin() and cond_resched() before
+- * retrying is required as we want to give the writers a chance
+- * to complete when CONFIG_PREEMPTION is not set.
+ */
+ int netdev_get_name(struct net *net, char *name, int ifindex)
+ {
+ struct net_device *dev;
+- unsigned int seq;
++ int ret;
+
+-retry:
+- seq = raw_seqcount_begin(&devnet_rename_seq);
++ down_read(&devnet_rename_sem);
+ rcu_read_lock();
++
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (!dev) {
+- rcu_read_unlock();
+- return -ENODEV;
++ ret = -ENODEV;
++ goto out;
+ }
+
+ strcpy(name, dev->name);
+- rcu_read_unlock();
+- if (read_seqcount_retry(&devnet_rename_seq, seq)) {
+- cond_resched();
+- goto retry;
+- }
+
+- return 0;
++ ret = 0;
++out:
++ rcu_read_unlock();
++ up_read(&devnet_rename_sem);
++ return ret;
+ }
+
+ /**
+@@ -1189,10 +1185,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
+ if (dev->flags & IFF_UP)
+ return -EBUSY;
+
+- write_seqcount_begin(&devnet_rename_seq);
++ down_write(&devnet_rename_sem);
+
+ if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
+- write_seqcount_end(&devnet_rename_seq);
++ up_write(&devnet_rename_sem);
+ return 0;
+ }
+
+@@ -1200,7 +1196,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
+
+ err = dev_get_valid_name(net, dev, newname);
+ if (err < 0) {
+- write_seqcount_end(&devnet_rename_seq);
++ up_write(&devnet_rename_sem);
+ return err;
+ }
+
+@@ -1215,11 +1211,11 @@ rollback:
+ if (ret) {
+ memcpy(dev->name, oldname, IFNAMSIZ);
+ dev->name_assign_type = old_assign_type;
+- write_seqcount_end(&devnet_rename_seq);
++ up_write(&devnet_rename_sem);
+ return ret;
+ }
+
+- write_seqcount_end(&devnet_rename_seq);
++ up_write(&devnet_rename_sem);
+
+ netdev_adjacent_rename_links(dev, oldname);
+
+@@ -1240,7 +1236,7 @@ rollback:
+ /* err >= 0 after dev_alloc_name() or stores the first errno */
+ if (err >= 0) {
+ err = ret;
+- write_seqcount_begin(&devnet_rename_seq);
++ down_write(&devnet_rename_sem);
+ memcpy(dev->name, oldname, IFNAMSIZ);
+ memcpy(oldname, newname, IFNAMSIZ);
+ dev->name_assign_type = old_assign_type;
+--
+2.25.1
+