--- /dev/null
+From 613b8b49ab902e1c338c78698e5d7810ac11be65 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Oct 2019 08:34:59 -0700
+Subject: e1000e: Drop unnecessary __E1000_DOWN bit twiddling
+
+From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+
+[ Upstream commit daee5598e491d8d3979bd4ad6c447d89ce57b446 ]
+
+Since we no longer check for __E1000_DOWN in e1000e_close we can drop the
+spot where we were restoring the bit. This saves us a bit of unnecessary
+complexity.
+
+Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
+Tested-by: Aaron Brown <aaron.f.brown@intel.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/e1000e/netdev.c | 7 +------
+ 1 file changed, 1 insertion(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
+index d7d56e42a6aac..aa9fdda839148 100644
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -7407,15 +7407,13 @@ static void e1000_remove(struct pci_dev *pdev)
+ {
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct e1000_adapter *adapter = netdev_priv(netdev);
+- bool down = test_bit(__E1000_DOWN, &adapter->state);
+
+ e1000e_ptp_remove(adapter);
+
+ /* The timers may be rescheduled, so explicitly disable them
+ * from being rescheduled.
+ */
+- if (!down)
+- set_bit(__E1000_DOWN, &adapter->state);
++ set_bit(__E1000_DOWN, &adapter->state);
+ del_timer_sync(&adapter->phy_info_timer);
+
+ cancel_work_sync(&adapter->reset_task);
+@@ -7435,9 +7433,6 @@ static void e1000_remove(struct pci_dev *pdev)
+ }
+ }
+
+- /* Don't lie to e1000_close() down the road. */
+- if (!down)
+- clear_bit(__E1000_DOWN, &adapter->state);
+ unregister_netdev(netdev);
+
+ if (pci_dev_run_wake(pdev))
+--
+2.20.1
+
--- /dev/null
+From d84eb43b7150df147cba96b52fb9952232ef4477 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Jan 2020 23:29:22 -0800
+Subject: e1000e: Revert "e1000e: Make watchdog use delayed work"
+
+From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+
+[ Upstream commit d5ad7a6a7f3c87b278d7e4973b65682be4e588dd ]
+
+This reverts commit 59653e6497d16f7ac1d9db088f3959f57ee8c3db.
+
+This is due to this commit causing driver crashes and connections to
+reset unexpectedly.
+
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Tested-by: Aaron Brown <aaron.f.brown@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/e1000e/e1000.h | 5 +-
+ drivers/net/ethernet/intel/e1000e/netdev.c | 54 ++++++++++------------
+ 2 files changed, 27 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
+index 6c51b1bad8c42..37a2314d3e6b1 100644
+--- a/drivers/net/ethernet/intel/e1000e/e1000.h
++++ b/drivers/net/ethernet/intel/e1000e/e1000.h
+@@ -185,13 +185,12 @@ struct e1000_phy_regs {
+
+ /* board specific private data structure */
+ struct e1000_adapter {
++ struct timer_list watchdog_timer;
+ struct timer_list phy_info_timer;
+ struct timer_list blink_timer;
+
+ struct work_struct reset_task;
+- struct delayed_work watchdog_task;
+-
+- struct workqueue_struct *e1000_workqueue;
++ struct work_struct watchdog_task;
+
+ const struct e1000_info *ei;
+
+diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
+index aa9fdda839148..c27ed7363768c 100644
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -1780,8 +1780,7 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data)
+ }
+ /* guard against interrupt when we're going down */
+ if (!test_bit(__E1000_DOWN, &adapter->state))
+- mod_delayed_work(adapter->e1000_workqueue,
+- &adapter->watchdog_task, HZ);
++ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
+ /* Reset on uncorrectable ECC error */
+@@ -1861,8 +1860,7 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data)
+ }
+ /* guard against interrupt when we're going down */
+ if (!test_bit(__E1000_DOWN, &adapter->state))
+- mod_delayed_work(adapter->e1000_workqueue,
+- &adapter->watchdog_task, HZ);
++ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
+ /* Reset on uncorrectable ECC error */
+@@ -1907,8 +1905,7 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
+ hw->mac.get_link_status = true;
+ /* guard against interrupt when we're going down */
+ if (!test_bit(__E1000_DOWN, &adapter->state))
+- mod_delayed_work(adapter->e1000_workqueue,
+- &adapter->watchdog_task, HZ);
++ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
+ if (!test_bit(__E1000_DOWN, &adapter->state))
+@@ -4281,6 +4278,7 @@ void e1000e_down(struct e1000_adapter *adapter, bool reset)
+
+ napi_synchronize(&adapter->napi);
+
++ del_timer_sync(&adapter->watchdog_timer);
+ del_timer_sync(&adapter->phy_info_timer);
+
+ spin_lock(&adapter->stats64_lock);
+@@ -5152,11 +5150,25 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter)
+ }
+ }
+
++/**
++ * e1000_watchdog - Timer Call-back
++ * @data: pointer to adapter cast into an unsigned long
++ **/
++static void e1000_watchdog(struct timer_list *t)
++{
++ struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
++
++ /* Do the rest outside of interrupt context */
++ schedule_work(&adapter->watchdog_task);
++
++ /* TODO: make this use queue_delayed_work() */
++}
++
+ static void e1000_watchdog_task(struct work_struct *work)
+ {
+ struct e1000_adapter *adapter = container_of(work,
+ struct e1000_adapter,
+- watchdog_task.work);
++ watchdog_task);
+ struct net_device *netdev = adapter->netdev;
+ struct e1000_mac_info *mac = &adapter->hw.mac;
+ struct e1000_phy_info *phy = &adapter->hw.phy;
+@@ -5404,9 +5416,8 @@ static void e1000_watchdog_task(struct work_struct *work)
+
+ /* Reset the timer */
+ if (!test_bit(__E1000_DOWN, &adapter->state))
+- queue_delayed_work(adapter->e1000_workqueue,
+- &adapter->watchdog_task,
+- round_jiffies(2 * HZ));
++ mod_timer(&adapter->watchdog_timer,
++ round_jiffies(jiffies + 2 * HZ));
+ }
+
+ #define E1000_TX_FLAGS_CSUM 0x00000001
+@@ -7259,21 +7270,11 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ goto err_eeprom;
+ }
+
+- adapter->e1000_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
+- e1000e_driver_name);
+-
+- if (!adapter->e1000_workqueue) {
+- err = -ENOMEM;
+- goto err_workqueue;
+- }
+-
+- INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog_task);
+- queue_delayed_work(adapter->e1000_workqueue, &adapter->watchdog_task,
+- 0);
+-
++ timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
+ timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0);
+
+ INIT_WORK(&adapter->reset_task, e1000_reset_task);
++ INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
+ INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
+ INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
+ INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang);
+@@ -7367,9 +7368,6 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ return 0;
+
+ err_register:
+- flush_workqueue(adapter->e1000_workqueue);
+- destroy_workqueue(adapter->e1000_workqueue);
+-err_workqueue:
+ if (!(adapter->flags & FLAG_HAS_AMT))
+ e1000e_release_hw_control(adapter);
+ err_eeprom:
+@@ -7414,17 +7412,15 @@ static void e1000_remove(struct pci_dev *pdev)
+ * from being rescheduled.
+ */
+ set_bit(__E1000_DOWN, &adapter->state);
++ del_timer_sync(&adapter->watchdog_timer);
+ del_timer_sync(&adapter->phy_info_timer);
+
+ cancel_work_sync(&adapter->reset_task);
++ cancel_work_sync(&adapter->watchdog_task);
+ cancel_work_sync(&adapter->downshift_task);
+ cancel_work_sync(&adapter->update_phy_task);
+ cancel_work_sync(&adapter->print_hang_task);
+
+- cancel_delayed_work(&adapter->watchdog_task);
+- flush_workqueue(adapter->e1000_workqueue);
+- destroy_workqueue(adapter->e1000_workqueue);
+-
+ if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) {
+ cancel_work_sync(&adapter->tx_hwtstamp_work);
+ if (adapter->tx_hwtstamp_skb) {
+--
+2.20.1
+
vfs-fix-do_last-regression.patch
cifs-fix-soft-mounts-hanging-in-the-reconnect-code.patch
+x86-resctrl-fix-a-deadlock-due-to-inaccurate-referen.patch
+x86-resctrl-fix-use-after-free-when-deleting-resourc.patch
+x86-resctrl-fix-use-after-free-due-to-inaccurate-ref.patch
+e1000e-drop-unnecessary-__e1000_down-bit-twiddling.patch
+e1000e-revert-e1000e-make-watchdog-use-delayed-work.patch
--- /dev/null
+From 45b1dca1b89b2e2ac862d2e57616677d0a10acd1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Jan 2020 00:28:05 +0800
+Subject: x86/resctrl: Fix a deadlock due to inaccurate reference
+
+From: Xiaochen Shen <xiaochen.shen@intel.com>
+
+[ Upstream commit 334b0f4e9b1b4a1d475f803419d202f6c5e4d18e ]
+
+There is a race condition which results in a deadlock when rmdir and
+mkdir execute concurrently:
+
+$ ls /sys/fs/resctrl/c1/mon_groups/m1/
+cpus cpus_list mon_data tasks
+
+Thread 1: rmdir /sys/fs/resctrl/c1
+Thread 2: mkdir /sys/fs/resctrl/c1/mon_groups/m1
+
+3 locks held by mkdir/48649:
+ #0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50
+ #1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c13b>] filename_create+0x7b/0x170
+ #2: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70
+
+4 locks held by rmdir/48652:
+ #0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50
+ #1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c3cf>] do_rmdir+0x13f/0x1e0
+ #2: (&type->i_mutex_dir_key#8){++++}, at: [<ffffffffb4c86d5d>] vfs_rmdir+0x4d/0x120
+ #3: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70
+
+Thread 1 is deleting control group "c1". Holding rdtgroup_mutex,
+kernfs_remove() removes all kernfs nodes under directory "c1"
+recursively, then waits for sub kernfs node "mon_groups" to drop active
+reference.
+
+Thread 2 is trying to create a subdirectory "m1" in the "mon_groups"
+directory. The wrapper kernfs_iop_mkdir() takes an active reference to
+the "mon_groups" directory but the code drops the active reference to
+the parent directory "c1" instead.
+
+As a result, Thread 1 is blocked on waiting for active reference to drop
+and never release rdtgroup_mutex, while Thread 2 is also blocked on
+trying to get rdtgroup_mutex.
+
+Thread 1 (rdtgroup_rmdir) Thread 2 (rdtgroup_mkdir)
+(rmdir /sys/fs/resctrl/c1) (mkdir /sys/fs/resctrl/c1/mon_groups/m1)
+------------------------- -------------------------
+ kernfs_iop_mkdir
+ /*
+ * kn: "m1", parent_kn: "mon_groups",
+ * prgrp_kn: parent_kn->parent: "c1",
+ *
+ * "mon_groups", parent_kn->active++: 1
+ */
+ kernfs_get_active(parent_kn)
+kernfs_iop_rmdir
+ /* "c1", kn->active++ */
+ kernfs_get_active(kn)
+
+ rdtgroup_kn_lock_live
+ atomic_inc(&rdtgrp->waitcount)
+ /* "c1", kn->active-- */
+ kernfs_break_active_protection(kn)
+ mutex_lock
+
+ rdtgroup_rmdir_ctrl
+ free_all_child_rdtgrp
+ sentry->flags = RDT_DELETED
+
+ rdtgroup_ctrl_remove
+ rdtgrp->flags = RDT_DELETED
+ kernfs_get(kn)
+ kernfs_remove(rdtgrp->kn)
+ __kernfs_remove
+ /* "mon_groups", sub_kn */
+ atomic_add(KN_DEACTIVATED_BIAS, &sub_kn->active)
+ kernfs_drain(sub_kn)
+ /*
+ * sub_kn->active == KN_DEACTIVATED_BIAS + 1,
+ * waiting on sub_kn->active to drop, but it
+ * never drops in Thread 2 which is blocked
+ * on getting rdtgroup_mutex.
+ */
+Thread 1 hangs here ---->
+ wait_event(sub_kn->active == KN_DEACTIVATED_BIAS)
+ ...
+ rdtgroup_mkdir
+ rdtgroup_mkdir_mon(parent_kn, prgrp_kn)
+ mkdir_rdt_prepare(parent_kn, prgrp_kn)
+ rdtgroup_kn_lock_live(prgrp_kn)
+ atomic_inc(&rdtgrp->waitcount)
+ /*
+ * "c1", prgrp_kn->active--
+ *
+ * The active reference on "c1" is
+ * dropped, but not matching the
+ * actual active reference taken
+ * on "mon_groups", thus causing
+ * Thread 1 to wait forever while
+ * holding rdtgroup_mutex.
+ */
+ kernfs_break_active_protection(
+ prgrp_kn)
+ /*
+ * Trying to get rdtgroup_mutex
+ * which is held by Thread 1.
+ */
+Thread 2 hangs here ----> mutex_lock
+ ...
+
+The problem is that the creation of a subdirectory in the "mon_groups"
+directory incorrectly releases the active protection of its parent
+directory instead of itself before it starts waiting for rdtgroup_mutex.
+This is triggered by the rdtgroup_mkdir() flow calling
+rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() with kernfs node of the
+parent control group ("c1") as argument. It should be called with kernfs
+node "mon_groups" instead. What is currently missing is that the
+kn->priv of "mon_groups" is NULL instead of pointing to the rdtgrp.
+
+Fix it by pointing kn->priv to rdtgrp when "mon_groups" is created. Then
+it could be passed to rdtgroup_kn_lock_live()/rdtgroup_kn_unlock()
+instead. And then it operates on the same rdtgroup structure but handles
+the active reference of kernfs node "mon_groups" to prevent deadlock.
+The same changes are also made to the "mon_data" directories.
+
+This results in some unused function parameters that will be cleaned up
+in follow-up patch as the focus here is on the fix only in support of
+backporting efforts.
+
+Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring")
+Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1578500886-21771-4-git-send-email-xiaochen.shen@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+index dac7209a07084..e4da26325e3ea 100644
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -1970,7 +1970,7 @@ static int rdt_get_tree(struct fs_context *fc)
+
+ if (rdt_mon_capable) {
+ ret = mongroup_create_dir(rdtgroup_default.kn,
+- NULL, "mon_groups",
++ &rdtgroup_default, "mon_groups",
+ &kn_mongrp);
+ if (ret < 0)
+ goto out_info;
+@@ -2446,7 +2446,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
+ /*
+ * Create the mon_data directory first.
+ */
+- ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
++ ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
+ if (ret)
+ return ret;
+
+@@ -2645,7 +2645,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
+ uint files = 0;
+ int ret;
+
+- prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
++ prdtgrp = rdtgroup_kn_lock_live(parent_kn);
+ if (!prdtgrp) {
+ ret = -ENODEV;
+ goto out_unlock;
+@@ -2718,7 +2718,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
+ kernfs_activate(kn);
+
+ /*
+- * The caller unlocks the prgrp_kn upon success.
++ * The caller unlocks the parent_kn upon success.
+ */
+ return 0;
+
+@@ -2729,7 +2729,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
+ out_free_rgrp:
+ kfree(rdtgrp);
+ out_unlock:
+- rdtgroup_kn_unlock(prgrp_kn);
++ rdtgroup_kn_unlock(parent_kn);
+ return ret;
+ }
+
+@@ -2767,7 +2767,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
+ */
+ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
+
+- rdtgroup_kn_unlock(prgrp_kn);
++ rdtgroup_kn_unlock(parent_kn);
+ return ret;
+ }
+
+@@ -2810,7 +2810,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
+ * Create an empty mon_groups directory to hold the subset
+ * of tasks and cpus to monitor.
+ */
+- ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
++ ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
+ if (ret) {
+ rdt_last_cmd_puts("kernfs subdir error\n");
+ goto out_del_list;
+@@ -2826,7 +2826,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
+ out_common_fail:
+ mkdir_rdt_prepare_clean(rdtgrp);
+ out_unlock:
+- rdtgroup_kn_unlock(prgrp_kn);
++ rdtgroup_kn_unlock(parent_kn);
+ return ret;
+ }
+
+--
+2.20.1
+
--- /dev/null
+From c78ba78f3b5b50eecaefbc66780643af964dbbba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Jan 2020 00:28:04 +0800
+Subject: x86/resctrl: Fix use-after-free due to inaccurate refcount of
+ rdtgroup
+
+From: Xiaochen Shen <xiaochen.shen@intel.com>
+
+[ Upstream commit 074fadee59ee7a9d2b216e9854bd4efb5dad679f ]
+
+There is a race condition in the following scenario which results in an
+use-after-free issue when reading a monitoring file and deleting the
+parent ctrl_mon group concurrently:
+
+Thread 1 calls atomic_inc() to take refcount of rdtgrp and then calls
+kernfs_break_active_protection() to drop the active reference of kernfs
+node in rdtgroup_kn_lock_live().
+
+In Thread 2, kernfs_remove() is a blocking routine. It waits on all sub
+kernfs nodes to drop the active reference when removing all subtree
+kernfs nodes recursively. Thread 2 could block on kernfs_remove() until
+Thread 1 calls kernfs_break_active_protection(). Only after
+kernfs_remove() completes the refcount of rdtgrp could be trusted.
+
+Before Thread 1 calls atomic_inc() and kernfs_break_active_protection(),
+Thread 2 could call kfree() when the refcount of rdtgrp (sentry) is 0
+instead of 1 due to the race.
+
+In Thread 1, in rdtgroup_kn_unlock(), referring to earlier rdtgrp memory
+(rdtgrp->waitcount) which was already freed in Thread 2 results in
+use-after-free issue.
+
+Thread 1 (rdtgroup_mondata_show) Thread 2 (rdtgroup_rmdir)
+-------------------------------- -------------------------
+rdtgroup_kn_lock_live
+ /*
+ * kn active protection until
+ * kernfs_break_active_protection(kn)
+ */
+ rdtgrp = kernfs_to_rdtgroup(kn)
+ rdtgroup_kn_lock_live
+ atomic_inc(&rdtgrp->waitcount)
+ mutex_lock
+ rdtgroup_rmdir_ctrl
+ free_all_child_rdtgrp
+ /*
+ * sentry->waitcount should be 1
+ * but is 0 now due to the race.
+ */
+ kfree(sentry)*[1]
+ /*
+ * Only after kernfs_remove()
+ * completes, the refcount of
+ * rdtgrp could be trusted.
+ */
+ atomic_inc(&rdtgrp->waitcount)
+ /* kn->active-- */
+ kernfs_break_active_protection(kn)
+ rdtgroup_ctrl_remove
+ rdtgrp->flags = RDT_DELETED
+ /*
+ * Blocking routine, wait for
+ * all sub kernfs nodes to drop
+ * active reference in
+ * kernfs_break_active_protection.
+ */
+ kernfs_remove(rdtgrp->kn)
+ rdtgroup_kn_unlock
+ mutex_unlock
+ atomic_dec_and_test(
+ &rdtgrp->waitcount)
+ && (flags & RDT_DELETED)
+ kernfs_unbreak_active_protection(kn)
+ kfree(rdtgrp)
+ mutex_lock
+mon_event_read
+rdtgroup_kn_unlock
+ mutex_unlock
+ /*
+ * Use-after-free: refer to earlier rdtgrp
+ * memory which was freed in [1].
+ */
+ atomic_dec_and_test(&rdtgrp->waitcount)
+ && (flags & RDT_DELETED)
+ /* kn->active++ */
+ kernfs_unbreak_active_protection(kn)
+ kfree(rdtgrp)
+
+Fix it by moving free_all_child_rdtgrp() to after kernfs_remove() in
+rdtgroup_rmdir_ctrl() to ensure it has the accurate refcount of rdtgrp.
+
+Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
+Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1578500886-21771-3-git-send-email-xiaochen.shen@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+index c7564294a12a8..954fd048ad9bd 100644
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -2960,13 +2960,13 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
+ closid_free(rdtgrp->closid);
+ free_rmid(rdtgrp->mon.rmid);
+
++ rdtgroup_ctrl_remove(kn, rdtgrp);
++
+ /*
+ * Free all the child monitor group rmids.
+ */
+ free_all_child_rdtgrp(rdtgrp);
+
+- rdtgroup_ctrl_remove(kn, rdtgrp);
+-
+ return 0;
+ }
+
+--
+2.20.1
+
--- /dev/null
+From 545e5454ad863083e84ad46372551f81b93f47f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Jan 2020 00:28:03 +0800
+Subject: x86/resctrl: Fix use-after-free when deleting resource groups
+
+From: Xiaochen Shen <xiaochen.shen@intel.com>
+
+[ Upstream commit b8511ccc75c033f6d54188ea4df7bf1e85778740 ]
+
+A resource group (rdtgrp) contains a reference count (rdtgrp->waitcount)
+that indicates how many waiters expect this rdtgrp to exist. Waiters
+could be waiting on rdtgroup_mutex or some work sitting on a task's
+workqueue for when the task returns from kernel mode or exits.
+
+The deletion of a rdtgrp is intended to have two phases:
+
+ (1) while holding rdtgroup_mutex the necessary cleanup is done and
+ rdtgrp->flags is set to RDT_DELETED,
+
+ (2) after releasing the rdtgroup_mutex, the rdtgrp structure is freed
+ only if there are no waiters and its flag is set to RDT_DELETED. Upon
+ gaining access to rdtgroup_mutex or rdtgrp, a waiter is required to check
+ for the RDT_DELETED flag.
+
+When unmounting the resctrl file system or deleting ctrl_mon groups,
+all of the subdirectories are removed and the data structure of rdtgrp
+is forcibly freed without checking rdtgrp->waitcount. If at this point
+there was a waiter on rdtgrp then a use-after-free issue occurs when the
+waiter starts running and accesses the rdtgrp structure it was waiting
+on.
+
+See kfree() calls in [1], [2] and [3] in these two call paths in
+following scenarios:
+(1) rdt_kill_sb() -> rmdir_all_sub() -> free_all_child_rdtgrp()
+(2) rdtgroup_rmdir() -> rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp()
+
+There are several scenarios that result in use-after-free issue in
+following:
+
+Scenario 1:
+-----------
+In Thread 1, rdtgroup_tasks_write() adds a task_work callback
+move_myself(). If move_myself() is scheduled to execute after Thread 2
+rdt_kill_sb() is finished, referring to earlier rdtgrp memory
+(rdtgrp->waitcount) which was already freed in Thread 2 results in
+use-after-free issue.
+
+Thread 1 (rdtgroup_tasks_write) Thread 2 (rdt_kill_sb)
+------------------------------- ----------------------
+rdtgroup_kn_lock_live
+ atomic_inc(&rdtgrp->waitcount)
+ mutex_lock
+rdtgroup_move_task
+ __rdtgroup_move_task
+ /*
+ * Take an extra refcount, so rdtgrp cannot be freed
+ * before the call back move_myself has been invoked
+ */
+ atomic_inc(&rdtgrp->waitcount)
+ /* Callback move_myself will be scheduled for later */
+ task_work_add(move_myself)
+rdtgroup_kn_unlock
+ mutex_unlock
+ atomic_dec_and_test(&rdtgrp->waitcount)
+ && (flags & RDT_DELETED)
+ mutex_lock
+ rmdir_all_sub
+ /*
+ * sentry and rdtgrp are freed
+ * without checking refcount
+ */
+ free_all_child_rdtgrp
+ kfree(sentry)*[1]
+ kfree(rdtgrp)*[2]
+ mutex_unlock
+/*
+ * Callback is scheduled to execute
+ * after rdt_kill_sb is finished
+ */
+move_myself
+ /*
+ * Use-after-free: refer to earlier rdtgrp
+ * memory which was freed in [1] or [2].
+ */
+ atomic_dec_and_test(&rdtgrp->waitcount)
+ && (flags & RDT_DELETED)
+ kfree(rdtgrp)
+
+Scenario 2:
+-----------
+In Thread 1, rdtgroup_tasks_write() adds a task_work callback
+move_myself(). If move_myself() is scheduled to execute after Thread 2
+rdtgroup_rmdir() is finished, referring to earlier rdtgrp memory
+(rdtgrp->waitcount) which was already freed in Thread 2 results in
+use-after-free issue.
+
+Thread 1 (rdtgroup_tasks_write) Thread 2 (rdtgroup_rmdir)
+------------------------------- -------------------------
+rdtgroup_kn_lock_live
+ atomic_inc(&rdtgrp->waitcount)
+ mutex_lock
+rdtgroup_move_task
+ __rdtgroup_move_task
+ /*
+ * Take an extra refcount, so rdtgrp cannot be freed
+ * before the call back move_myself has been invoked
+ */
+ atomic_inc(&rdtgrp->waitcount)
+ /* Callback move_myself will be scheduled for later */
+ task_work_add(move_myself)
+rdtgroup_kn_unlock
+ mutex_unlock
+ atomic_dec_and_test(&rdtgrp->waitcount)
+ && (flags & RDT_DELETED)
+ rdtgroup_kn_lock_live
+ atomic_inc(&rdtgrp->waitcount)
+ mutex_lock
+ rdtgroup_rmdir_ctrl
+ free_all_child_rdtgrp
+ /*
+ * sentry is freed without
+ * checking refcount
+ */
+ kfree(sentry)*[3]
+ rdtgroup_ctrl_remove
+ rdtgrp->flags = RDT_DELETED
+ rdtgroup_kn_unlock
+ mutex_unlock
+ atomic_dec_and_test(
+ &rdtgrp->waitcount)
+ && (flags & RDT_DELETED)
+ kfree(rdtgrp)
+/*
+ * Callback is scheduled to execute
+ * after rdt_kill_sb is finished
+ */
+move_myself
+ /*
+ * Use-after-free: refer to earlier rdtgrp
+ * memory which was freed in [3].
+ */
+ atomic_dec_and_test(&rdtgrp->waitcount)
+ && (flags & RDT_DELETED)
+ kfree(rdtgrp)
+
+If CONFIG_DEBUG_SLAB=y, Slab corruption on kmalloc-2k can be observed
+like following. Note that "0x6b" is POISON_FREE after kfree(). The
+corrupted bits "0x6a", "0x64" at offset 0x424 correspond to
+waitcount member of struct rdtgroup which was freed:
+
+ Slab corruption (Not tainted): kmalloc-2k start=ffff9504c5b0d000, len=2048
+ 420: 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkjkkkkkkkkkkk
+ Single bit error detected. Probably bad RAM.
+ Run memtest86+ or a similar memory test tool.
+ Next obj: start=ffff9504c5b0d800, len=2048
+ 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+ 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+
+ Slab corruption (Not tainted): kmalloc-2k start=ffff9504c58ab800, len=2048
+ 420: 6b 6b 6b 6b 64 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkdkkkkkkkkkkk
+ Prev obj: start=ffff9504c58ab000, len=2048
+ 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+ 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+
+Fix this by taking reference count (waitcount) of rdtgrp into account in
+the two call paths that currently do not do so. Instead of always
+freeing the resource group it will only be freed if there are no waiters
+on it. If there are waiters, the resource group will have its flags set
+to RDT_DELETED.
+
+It will be left to the waiter to free the resource group when it starts
+running and finding that it was the last waiter and the resource group
+has been removed (rdtgrp->flags & RDT_DELETED) since. (1) rdt_kill_sb()
+-> rmdir_all_sub() -> free_all_child_rdtgrp() (2) rdtgroup_rmdir() ->
+rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp()
+
+Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
+Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
+Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/1578500886-21771-2-git-send-email-xiaochen.shen@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+index e4da26325e3ea..c7564294a12a8 100644
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -2205,7 +2205,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
+ list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
+ free_rmid(sentry->mon.rmid);
+ list_del(&sentry->mon.crdtgrp_list);
+- kfree(sentry);
++
++ if (atomic_read(&sentry->waitcount) != 0)
++ sentry->flags = RDT_DELETED;
++ else
++ kfree(sentry);
+ }
+ }
+
+@@ -2243,7 +2247,11 @@ static void rmdir_all_sub(void)
+
+ kernfs_remove(rdtgrp->kn);
+ list_del(&rdtgrp->rdtgroup_list);
+- kfree(rdtgrp);
++
++ if (atomic_read(&rdtgrp->waitcount) != 0)
++ rdtgrp->flags = RDT_DELETED;
++ else
++ kfree(rdtgrp);
+ }
+ /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
+ update_closid_rmid(cpu_online_mask, &rdtgroup_default);
+--
+2.20.1
+