--- /dev/null
+From 73a822880c775230d03565de9e6e3c4d92b812ce Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Sun, 28 Apr 2019 17:25:38 -0400
+Subject: locking/rwsem: Prevent decrement of reader count before increment
+
+[ Upstream commit a9e9bcb45b1525ba7aea26ed9441e8632aeeda58 ]
+
+During my rwsem testing, it was found that after a down_read(), the
+reader count may occasionally become 0 or even negative. Consequently,
+a writer may steal the lock at that time and execute with the reader
+in parallel thus breaking the mutual exclusion guarantee of the write
+lock. In other words, both readers and writer can become rwsem owners
+simultaneously.
+
+The current reader wakeup code does it in one pass to clear waiter->task
+and put them into wake_q before fully incrementing the reader count.
+Once waiter->task is cleared, the corresponding reader may see it,
+finish the critical section and do unlock to decrement the count before
+the count is incremented. This is not a problem if there is only one
+reader to wake up as the count has been pre-incremented by 1. It is
+a problem if there are more than one readers to be woken up and writer
+can steal the lock.
+
+The wakeup was actually done in 2 passes before the following v4.9 commit:
+
+ 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once")
+
+To fix this problem, the wakeup is now done in two passes
+again. In the first pass, we collect the readers and count them.
+The reader count is then fully incremented. In the second pass, the
+waiter->task is then cleared and they are put into wake_q to be woken
+up later.
+
+Signed-off-by: Waiman Long <longman@redhat.com>
+Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Will Deacon <will.deacon@arm.com>
+Cc: huang ying <huang.ying.caritas@gmail.com>
+Fixes: 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once")
+Link: http://lkml.kernel.org/r/20190428212557.13482-2-longman@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/rwsem-xadd.c | 44 +++++++++++++++++++++++++------------
+ 1 file changed, 30 insertions(+), 14 deletions(-)
+
+diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
+index be06c45cbe4f9..0cdbb636e3163 100644
+--- a/kernel/locking/rwsem-xadd.c
++++ b/kernel/locking/rwsem-xadd.c
+@@ -127,6 +127,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
+ {
+ struct rwsem_waiter *waiter, *tmp;
+ long oldcount, woken = 0, adjustment = 0;
++ struct list_head wlist;
+
+ /*
+ * Take a peek at the queue head waiter such that we can determine
+@@ -185,18 +186,42 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
+ * of the queue. We know that woken will be at least 1 as we accounted
+ * for above. Note we increment the 'active part' of the count by the
+ * number of readers before waking any processes up.
++ *
++ * We have to do wakeup in 2 passes to prevent the possibility that
++ * the reader count may be decremented before it is incremented. It
++ * is because the to-be-woken waiter may not have slept yet. So it
++ * may see waiter->task got cleared, finish its critical section and
++ * do an unlock before the reader count increment.
++ *
++ * 1) Collect the read-waiters in a separate list, count them and
++ * fully increment the reader count in rwsem.
++ * 2) For each waiters in the new list, clear waiter->task and
++ * put them into wake_q to be woken up later.
+ */
+- list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+- struct task_struct *tsk;
+-
++ list_for_each_entry(waiter, &sem->wait_list, list) {
+ if (waiter->type == RWSEM_WAITING_FOR_WRITE)
+ break;
+
+ woken++;
+- tsk = waiter->task;
++ }
++ list_cut_before(&wlist, &sem->wait_list, &waiter->list);
++
++ adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
++ if (list_empty(&sem->wait_list)) {
++ /* hit end of list above */
++ adjustment -= RWSEM_WAITING_BIAS;
++ }
++
++ if (adjustment)
++ atomic_long_add(adjustment, &sem->count);
++
++ /* 2nd pass */
++ list_for_each_entry_safe(waiter, tmp, &wlist, list) {
++ struct task_struct *tsk;
+
++ tsk = waiter->task;
+ get_task_struct(tsk);
+- list_del(&waiter->list);
++
+ /*
+ * Ensure calling get_task_struct() before setting the reader
+ * waiter to nil such that rwsem_down_read_failed() cannot
+@@ -212,15 +237,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
+ /* wake_q_add() already take the task ref */
+ put_task_struct(tsk);
+ }
+-
+- adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+- if (list_empty(&sem->wait_list)) {
+- /* hit end of list above */
+- adjustment -= RWSEM_WAITING_BIAS;
+- }
+-
+- if (adjustment)
+- atomic_long_add(adjustment, &sem->count);
+ }
+
+ /*
+--
+2.20.1
+
--- /dev/null
+From cc83886d9a61b4303a66f562b56e7447627d4992 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 May 2019 21:30:49 -0400
+Subject: net: core: another layer of lists, around PF_MEMALLOC skb handling
+
+[ Upstream commit 78ed8cc25986ac5c21762eeddc1e86e94d422e36 ]
+
+First example of a layer splitting the list (rather than merely taking
+ individual packets off it).
+Involves new list.h function, list_cut_before(), like list_cut_position()
+ but cuts on the other side of the given entry.
+
+Signed-off-by: Edward Cree <ecree@solarflare.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[sl: cut out non list.h bits, we only want list_cut_before]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/list.h | 30 ++++++++++++++++++++++++++++++
+ 1 file changed, 30 insertions(+)
+
+diff --git a/include/linux/list.h b/include/linux/list.h
+index 5809e9a2de5b2..6f935018ea056 100644
+--- a/include/linux/list.h
++++ b/include/linux/list.h
+@@ -271,6 +271,36 @@ static inline void list_cut_position(struct list_head *list,
+ __list_cut_position(list, head, entry);
+ }
+
++/**
++ * list_cut_before - cut a list into two, before given entry
++ * @list: a new list to add all removed entries
++ * @head: a list with entries
++ * @entry: an entry within head, could be the head itself
++ *
++ * This helper moves the initial part of @head, up to but
++ * excluding @entry, from @head to @list. You should pass
++ * in @entry an element you know is on @head. @list should
++ * be an empty list or a list you do not care about losing
++ * its data.
++ * If @entry == @head, all entries on @head are moved to
++ * @list.
++ */
++static inline void list_cut_before(struct list_head *list,
++ struct list_head *head,
++ struct list_head *entry)
++{
++ if (head->next == entry) {
++ INIT_LIST_HEAD(list);
++ return;
++ }
++ list->next = head->next;
++ list->next->prev = list;
++ list->prev = entry->prev;
++ list->prev->next = list;
++ head->next = entry;
++ entry->prev = head;
++}
++
+ static inline void __list_splice(const struct list_head *list,
+ struct list_head *prev,
+ struct list_head *next)
+--
+2.20.1
+
--- /dev/null
+From f8b31684437ba3bfcf62a8ce9c845d4ff370095d Mon Sep 17 00:00:00 2001
+From: Dexuan Cui <decui@microsoft.com>
+Date: Wed, 15 May 2019 15:42:07 -0700
+Subject: PCI: hv: Fix a memory leak in hv_eject_device_work()
+
+[ Upstream commit 05f151a73ec2b23ffbff706e5203e729a995cdc2 ]
+
+When a device is created in new_pcichild_device(), hpdev->refs is set
+to 2 (i.e. the initial value of 1 plus the get_pcichild()).
+
+When we hot remove the device from the host, in a Linux VM we first call
+hv_pci_eject_device(), which increases hpdev->refs by get_pcichild() and
+then schedules a work of hv_eject_device_work(), so hpdev->refs becomes
+3 (let's ignore the paired get/put_pcichild() in other places). But in
+hv_eject_device_work(), currently we only call put_pcichild() twice,
+meaning the 'hpdev' struct can't be freed in put_pcichild().
+
+Add one put_pcichild() to fix the memory leak.
+
+The device can also be removed when we run "rmmod pci-hyperv". On this
+path (hv_pci_remove() -> hv_pci_bus_exit() -> hv_pci_devices_present()),
+hpdev->refs is 2, and we do correctly call put_pcichild() twice in
+pci_devices_present_work().
+
+Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs")
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+[lorenzo.pieralisi@arm.com: commit log rework]
+Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/host/pci-hyperv.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c
+index b4d8ccfd9f7c2..200b415765264 100644
+--- a/drivers/pci/host/pci-hyperv.c
++++ b/drivers/pci/host/pci-hyperv.c
+@@ -1620,6 +1620,7 @@ static void hv_eject_device_work(struct work_struct *work)
+ spin_unlock_irqrestore(&hpdev->hbus->device_list_lock, flags);
+
+ put_pcichild(hpdev, hv_pcidev_ref_childlist);
++ put_pcichild(hpdev, hv_pcidev_ref_initial);
+ put_pcichild(hpdev, hv_pcidev_ref_pnp);
+ put_hvpcibus(hpdev->hbus);
+ }
+--
+2.20.1
+