From ae1017130d89f0f90ca8555a1b4c4333e4dcf27f Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 2 Feb 2020 22:01:13 -0500 Subject: [PATCH] fixes for 4.14 Signed-off-by: Sasha Levin --- ...fix-user-after-free-on-module-unload.patch | 43 ++++ queue-4.14/series | 4 + ...a-deadlock-due-to-inaccurate-referen.patch | 230 +++++++++++++++++ ...use-after-free-due-to-inaccurate-ref.patch | 151 +++++++++++ ...use-after-free-when-deleting-resourc.patch | 234 ++++++++++++++++++ 5 files changed, 662 insertions(+) create mode 100644 queue-4.14/crypto-pcrypt-fix-user-after-free-on-module-unload.patch create mode 100644 queue-4.14/x86-resctrl-fix-a-deadlock-due-to-inaccurate-referen.patch create mode 100644 queue-4.14/x86-resctrl-fix-use-after-free-due-to-inaccurate-ref.patch create mode 100644 queue-4.14/x86-resctrl-fix-use-after-free-when-deleting-resourc.patch diff --git a/queue-4.14/crypto-pcrypt-fix-user-after-free-on-module-unload.patch b/queue-4.14/crypto-pcrypt-fix-user-after-free-on-module-unload.patch new file mode 100644 index 00000000000..7179d67e507 --- /dev/null +++ b/queue-4.14/crypto-pcrypt-fix-user-after-free-on-module-unload.patch @@ -0,0 +1,43 @@ +From 4a8451e641a15b114220eded090b5b2726f8f3d9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2019 17:41:31 +0800 +Subject: crypto: pcrypt - Fix user-after-free on module unload + +From: Herbert Xu + +[ Upstream commit 07bfd9bdf568a38d9440c607b72342036011f727 ] + +On module unload of pcrypt we must unregister the crypto algorithms +first and then tear down the padata structure. As otherwise the +crypto algorithms are still alive and can be used while the padata +structure is being freed. + +Fixes: 5068c7a883d1 ("crypto: pcrypt - Add pcrypt crypto...") +Cc: +Signed-off-by: Herbert Xu +Signed-off-by: Sasha Levin +--- + crypto/pcrypt.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c +index a5718c0a3dc4e..1348541da463a 100644 +--- a/crypto/pcrypt.c ++++ b/crypto/pcrypt.c +@@ -505,11 +505,12 @@ static int __init pcrypt_init(void) + + static void __exit pcrypt_exit(void) + { ++ crypto_unregister_template(&pcrypt_tmpl); ++ + pcrypt_fini_padata(&pencrypt); + pcrypt_fini_padata(&pdecrypt); + + kset_unregister(pcrypt_kset); +- crypto_unregister_template(&pcrypt_tmpl); + } + + module_init(pcrypt_init); +-- +2.20.1 + diff --git a/queue-4.14/series b/queue-4.14/series index bcf75462f4d..43804d29398 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -31,3 +31,7 @@ net_sched-ematch-reject-invalid-tcf_em_simple.patch rsi-fix-use-after-free-on-probe-errors.patch crypto-af_alg-use-bh_lock_sock-in-sk_destruct.patch vfs-fix-do_last-regression.patch +x86-resctrl-fix-use-after-free-when-deleting-resourc.patch +x86-resctrl-fix-use-after-free-due-to-inaccurate-ref.patch +x86-resctrl-fix-a-deadlock-due-to-inaccurate-referen.patch +crypto-pcrypt-fix-user-after-free-on-module-unload.patch diff --git a/queue-4.14/x86-resctrl-fix-a-deadlock-due-to-inaccurate-referen.patch b/queue-4.14/x86-resctrl-fix-a-deadlock-due-to-inaccurate-referen.patch new file mode 100644 index 00000000000..507de442df9 --- /dev/null +++ b/queue-4.14/x86-resctrl-fix-a-deadlock-due-to-inaccurate-referen.patch @@ -0,0 +1,230 @@ +From 04dcdc19c1ea8238a0bebb35ad5429c368cc2f1d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 2 Feb 2020 06:00:53 +0800 +Subject: x86/resctrl: Fix a deadlock due to inaccurate reference + +From: Xiaochen Shen + +commit 334b0f4e9b1b4a1d475f803419d202f6c5e4d18e upstream. + +There is a race condition which results in a deadlock when rmdir and +mkdir execute concurrently: + +$ ls /sys/fs/resctrl/c1/mon_groups/m1/ +cpus cpus_list mon_data tasks + +Thread 1: rmdir /sys/fs/resctrl/c1 +Thread 2: mkdir /sys/fs/resctrl/c1/mon_groups/m1 + +3 locks held by mkdir/48649: + #0: (sb_writers#17){.+.+}, at: [] mnt_want_write+0x20/0x50 + #1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [] filename_create+0x7b/0x170 + #2: (rdtgroup_mutex){+.+.}, at: [] rdtgroup_kn_lock_live+0x3d/0x70 + +4 locks held by rmdir/48652: + #0: (sb_writers#17){.+.+}, at: [] mnt_want_write+0x20/0x50 + #1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [] do_rmdir+0x13f/0x1e0 + #2: (&type->i_mutex_dir_key#8){++++}, at: [] vfs_rmdir+0x4d/0x120 + #3: (rdtgroup_mutex){+.+.}, at: [] rdtgroup_kn_lock_live+0x3d/0x70 + +Thread 1 is deleting control group "c1". Holding rdtgroup_mutex, +kernfs_remove() removes all kernfs nodes under directory "c1" +recursively, then waits for sub kernfs node "mon_groups" to drop active +reference. + +Thread 2 is trying to create a subdirectory "m1" in the "mon_groups" +directory. The wrapper kernfs_iop_mkdir() takes an active reference to +the "mon_groups" directory but the code drops the active reference to +the parent directory "c1" instead. + +As a result, Thread 1 is blocked on waiting for active reference to drop +and never release rdtgroup_mutex, while Thread 2 is also blocked on +trying to get rdtgroup_mutex. + +Thread 1 (rdtgroup_rmdir) Thread 2 (rdtgroup_mkdir) +(rmdir /sys/fs/resctrl/c1) (mkdir /sys/fs/resctrl/c1/mon_groups/m1) +------------------------- ------------------------- + kernfs_iop_mkdir + /* + * kn: "m1", parent_kn: "mon_groups", + * prgrp_kn: parent_kn->parent: "c1", + * + * "mon_groups", parent_kn->active++: 1 + */ + kernfs_get_active(parent_kn) +kernfs_iop_rmdir + /* "c1", kn->active++ */ + kernfs_get_active(kn) + + rdtgroup_kn_lock_live + atomic_inc(&rdtgrp->waitcount) + /* "c1", kn->active-- */ + kernfs_break_active_protection(kn) + mutex_lock + + rdtgroup_rmdir_ctrl + free_all_child_rdtgrp + sentry->flags = RDT_DELETED + + rdtgroup_ctrl_remove + rdtgrp->flags = RDT_DELETED + kernfs_get(kn) + kernfs_remove(rdtgrp->kn) + __kernfs_remove + /* "mon_groups", sub_kn */ + atomic_add(KN_DEACTIVATED_BIAS, &sub_kn->active) + kernfs_drain(sub_kn) + /* + * sub_kn->active == KN_DEACTIVATED_BIAS + 1, + * waiting on sub_kn->active to drop, but it + * never drops in Thread 2 which is blocked + * on getting rdtgroup_mutex. + */ +Thread 1 hangs here ----> + wait_event(sub_kn->active == KN_DEACTIVATED_BIAS) + ... + rdtgroup_mkdir + rdtgroup_mkdir_mon(parent_kn, prgrp_kn) + mkdir_rdt_prepare(parent_kn, prgrp_kn) + rdtgroup_kn_lock_live(prgrp_kn) + atomic_inc(&rdtgrp->waitcount) + /* + * "c1", prgrp_kn->active-- + * + * The active reference on "c1" is + * dropped, but not matching the + * actual active reference taken + * on "mon_groups", thus causing + * Thread 1 to wait forever while + * holding rdtgroup_mutex. + */ + kernfs_break_active_protection( + prgrp_kn) + /* + * Trying to get rdtgroup_mutex + * which is held by Thread 1. + */ +Thread 2 hangs here ----> mutex_lock + ... + +The problem is that the creation of a subdirectory in the "mon_groups" +directory incorrectly releases the active protection of its parent +directory instead of itself before it starts waiting for rdtgroup_mutex. +This is triggered by the rdtgroup_mkdir() flow calling +rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() with kernfs node of the +parent control group ("c1") as argument. It should be called with kernfs +node "mon_groups" instead. What is currently missing is that the +kn->priv of "mon_groups" is NULL instead of pointing to the rdtgrp. + +Fix it by pointing kn->priv to rdtgrp when "mon_groups" is created. Then +it could be passed to rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() +instead. And then it operates on the same rdtgroup structure but handles +the active reference of kernfs node "mon_groups" to prevent deadlock. +The same changes are also made to the "mon_data" directories. + +This results in some unused function parameters that will be cleaned up +in follow-up patch as the focus here is on the fix only in support of +backporting efforts. + +Backporting notes: + +Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt +files to a separate directory"), the file +arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to +arch/x86/kernel/cpu/resctrl/rdtgroup.c. +Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +for older stable trees. + +Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring") +Suggested-by: Reinette Chatre +Signed-off-by: Xiaochen Shen +Signed-off-by: Borislav Petkov +Reviewed-by: Reinette Chatre +Reviewed-by: Tony Luck +Acked-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/1578500886-21771-4-git-send-email-xiaochen.shen@intel.com +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +index 01574966d91fd..0ec30b2384c05 100644 +--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c ++++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +@@ -1107,7 +1107,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, + + if (rdt_mon_capable) { + ret = mongroup_create_dir(rdtgroup_default.kn, +- NULL, "mon_groups", ++ &rdtgroup_default, "mon_groups", + &kn_mongrp); + if (ret) { + dentry = ERR_PTR(ret); +@@ -1499,7 +1499,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, + /* + * Create the mon_data directory first. + */ +- ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn); ++ ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); + if (ret) + return ret; + +@@ -1533,7 +1533,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, + uint files = 0; + int ret; + +- prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); ++ prdtgrp = rdtgroup_kn_lock_live(parent_kn); + if (!prdtgrp) { + ret = -ENODEV; + goto out_unlock; +@@ -1589,7 +1589,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, + kernfs_activate(kn); + + /* +- * The caller unlocks the prgrp_kn upon success. ++ * The caller unlocks the parent_kn upon success. + */ + return 0; + +@@ -1600,7 +1600,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, + out_free_rgrp: + kfree(rdtgrp); + out_unlock: +- rdtgroup_kn_unlock(prgrp_kn); ++ rdtgroup_kn_unlock(parent_kn); + return ret; + } + +@@ -1638,7 +1638,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, + */ + list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); + +- rdtgroup_kn_unlock(prgrp_kn); ++ rdtgroup_kn_unlock(parent_kn); + return ret; + } + +@@ -1675,7 +1675,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, + * Create an empty mon_groups directory to hold the subset + * of tasks and cpus to monitor. + */ +- ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); ++ ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); + if (ret) + goto out_id_free; + } +@@ -1688,7 +1688,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, + out_common_fail: + mkdir_rdt_prepare_clean(rdtgrp); + out_unlock: +- rdtgroup_kn_unlock(prgrp_kn); ++ rdtgroup_kn_unlock(parent_kn); + return ret; + } + +-- +2.20.1 + diff --git a/queue-4.14/x86-resctrl-fix-use-after-free-due-to-inaccurate-ref.patch b/queue-4.14/x86-resctrl-fix-use-after-free-due-to-inaccurate-ref.patch new file mode 100644 index 00000000000..cfed4231399 --- /dev/null +++ b/queue-4.14/x86-resctrl-fix-use-after-free-due-to-inaccurate-ref.patch @@ -0,0 +1,151 @@ +From a19f577a20377a3bafd8d58b29800f8f4bf32818 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 2 Feb 2020 06:00:23 +0800 +Subject: x86/resctrl: Fix use-after-free due to inaccurate refcount of + rdtgroup + +From: Xiaochen Shen + +commit 074fadee59ee7a9d2b216e9854bd4efb5dad679f upstream. + +There is a race condition in the following scenario which results in an +use-after-free issue when reading a monitoring file and deleting the +parent ctrl_mon group concurrently: + +Thread 1 calls atomic_inc() to take refcount of rdtgrp and then calls +kernfs_break_active_protection() to drop the active reference of kernfs +node in rdtgroup_kn_lock_live(). + +In Thread 2, kernfs_remove() is a blocking routine. It waits on all sub +kernfs nodes to drop the active reference when removing all subtree +kernfs nodes recursively. Thread 2 could block on kernfs_remove() until +Thread 1 calls kernfs_break_active_protection(). Only after +kernfs_remove() completes the refcount of rdtgrp could be trusted. + +Before Thread 1 calls atomic_inc() and kernfs_break_active_protection(), +Thread 2 could call kfree() when the refcount of rdtgrp (sentry) is 0 +instead of 1 due to the race. + +In Thread 1, in rdtgroup_kn_unlock(), referring to earlier rdtgrp memory +(rdtgrp->waitcount) which was already freed in Thread 2 results in +use-after-free issue. + +Thread 1 (rdtgroup_mondata_show) Thread 2 (rdtgroup_rmdir) +-------------------------------- ------------------------- +rdtgroup_kn_lock_live + /* + * kn active protection until + * kernfs_break_active_protection(kn) + */ + rdtgrp = kernfs_to_rdtgroup(kn) + rdtgroup_kn_lock_live + atomic_inc(&rdtgrp->waitcount) + mutex_lock + rdtgroup_rmdir_ctrl + free_all_child_rdtgrp + /* + * sentry->waitcount should be 1 + * but is 0 now due to the race. + */ + kfree(sentry)*[1] + /* + * Only after kernfs_remove() + * completes, the refcount of + * rdtgrp could be trusted. + */ + atomic_inc(&rdtgrp->waitcount) + /* kn->active-- */ + kernfs_break_active_protection(kn) + rdtgroup_ctrl_remove + rdtgrp->flags = RDT_DELETED + /* + * Blocking routine, wait for + * all sub kernfs nodes to drop + * active reference in + * kernfs_break_active_protection. + */ + kernfs_remove(rdtgrp->kn) + rdtgroup_kn_unlock + mutex_unlock + atomic_dec_and_test( + &rdtgrp->waitcount) + && (flags & RDT_DELETED) + kernfs_unbreak_active_protection(kn) + kfree(rdtgrp) + mutex_lock +mon_event_read +rdtgroup_kn_unlock + mutex_unlock + /* + * Use-after-free: refer to earlier rdtgrp + * memory which was freed in [1]. + */ + atomic_dec_and_test(&rdtgrp->waitcount) + && (flags & RDT_DELETED) + /* kn->active++ */ + kernfs_unbreak_active_protection(kn) + kfree(rdtgrp) + +Fix it by moving free_all_child_rdtgrp() to after kernfs_remove() in +rdtgroup_rmdir_ctrl() to ensure it has the accurate refcount of rdtgrp. + +Backporting notes: + +Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt +files to a separate directory"), the file +arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to +arch/x86/kernel/cpu/resctrl/rdtgroup.c. +Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +for older stable trees. + +Upstream commit 17eafd076291 ("x86/intel_rdt: Split resource group +removal in two") moved part of resource group removal code from +rdtgroup_rmdir_mon() into a separate function rdtgroup_ctrl_remove(). +Apply the change against original code base of rdtgroup_rmdir_mon() for +older stable trees. + +Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support") +Suggested-by: Reinette Chatre +Signed-off-by: Xiaochen Shen +Signed-off-by: Borislav Petkov +Reviewed-by: Reinette Chatre +Reviewed-by: Tony Luck +Acked-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/1578500886-21771-3-git-send-email-xiaochen.shen@intel.com +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +index 734996904dc3b..01574966d91fd 100644 +--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c ++++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +@@ -1800,11 +1800,6 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, + closid_free(rdtgrp->closid); + free_rmid(rdtgrp->mon.rmid); + +- /* +- * Free all the child monitor group rmids. +- */ +- free_all_child_rdtgrp(rdtgrp); +- + list_del(&rdtgrp->rdtgroup_list); + + /* +@@ -1814,6 +1809,11 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, + kernfs_get(kn); + kernfs_remove(rdtgrp->kn); + ++ /* ++ * Free all the child monitor group rmids. ++ */ ++ free_all_child_rdtgrp(rdtgrp); ++ + return 0; + } + +-- +2.20.1 + diff --git a/queue-4.14/x86-resctrl-fix-use-after-free-when-deleting-resourc.patch b/queue-4.14/x86-resctrl-fix-use-after-free-when-deleting-resourc.patch new file mode 100644 index 00000000000..708ced7efbe --- /dev/null +++ b/queue-4.14/x86-resctrl-fix-use-after-free-when-deleting-resourc.patch @@ -0,0 +1,234 @@ +From 2f522c1ef52e05cba8c14ce236b5b5898e6d2cef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 2 Feb 2020 05:59:30 +0800 +Subject: x86/resctrl: Fix use-after-free when deleting resource groups + +From: Xiaochen Shen + +commit b8511ccc75c033f6d54188ea4df7bf1e85778740 upstream. + +A resource group (rdtgrp) contains a reference count (rdtgrp->waitcount) +that indicates how many waiters expect this rdtgrp to exist. Waiters +could be waiting on rdtgroup_mutex or some work sitting on a task's +workqueue for when the task returns from kernel mode or exits. + +The deletion of a rdtgrp is intended to have two phases: + + (1) while holding rdtgroup_mutex the necessary cleanup is done and + rdtgrp->flags is set to RDT_DELETED, + + (2) after releasing the rdtgroup_mutex, the rdtgrp structure is freed + only if there are no waiters and its flag is set to RDT_DELETED. Upon + gaining access to rdtgroup_mutex or rdtgrp, a waiter is required to check + for the RDT_DELETED flag. + +When unmounting the resctrl file system or deleting ctrl_mon groups, +all of the subdirectories are removed and the data structure of rdtgrp +is forcibly freed without checking rdtgrp->waitcount. If at this point +there was a waiter on rdtgrp then a use-after-free issue occurs when the +waiter starts running and accesses the rdtgrp structure it was waiting +on. + +See kfree() calls in [1], [2] and [3] in these two call paths in +following scenarios: +(1) rdt_kill_sb() -> rmdir_all_sub() -> free_all_child_rdtgrp() +(2) rdtgroup_rmdir() -> rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp() + +There are several scenarios that result in use-after-free issue in +following: + +Scenario 1: +----------- +In Thread 1, rdtgroup_tasks_write() adds a task_work callback +move_myself(). If move_myself() is scheduled to execute after Thread 2 +rdt_kill_sb() is finished, referring to earlier rdtgrp memory +(rdtgrp->waitcount) which was already freed in Thread 2 results in +use-after-free issue. + +Thread 1 (rdtgroup_tasks_write) Thread 2 (rdt_kill_sb) +------------------------------- ---------------------- +rdtgroup_kn_lock_live + atomic_inc(&rdtgrp->waitcount) + mutex_lock +rdtgroup_move_task + __rdtgroup_move_task + /* + * Take an extra refcount, so rdtgrp cannot be freed + * before the call back move_myself has been invoked + */ + atomic_inc(&rdtgrp->waitcount) + /* Callback move_myself will be scheduled for later */ + task_work_add(move_myself) +rdtgroup_kn_unlock + mutex_unlock + atomic_dec_and_test(&rdtgrp->waitcount) + && (flags & RDT_DELETED) + mutex_lock + rmdir_all_sub + /* + * sentry and rdtgrp are freed + * without checking refcount + */ + free_all_child_rdtgrp + kfree(sentry)*[1] + kfree(rdtgrp)*[2] + mutex_unlock +/* + * Callback is scheduled to execute + * after rdt_kill_sb is finished + */ +move_myself + /* + * Use-after-free: refer to earlier rdtgrp + * memory which was freed in [1] or [2]. + */ + atomic_dec_and_test(&rdtgrp->waitcount) + && (flags & RDT_DELETED) + kfree(rdtgrp) + +Scenario 2: +----------- +In Thread 1, rdtgroup_tasks_write() adds a task_work callback +move_myself(). If move_myself() is scheduled to execute after Thread 2 +rdtgroup_rmdir() is finished, referring to earlier rdtgrp memory +(rdtgrp->waitcount) which was already freed in Thread 2 results in +use-after-free issue. + +Thread 1 (rdtgroup_tasks_write) Thread 2 (rdtgroup_rmdir) +------------------------------- ------------------------- +rdtgroup_kn_lock_live + atomic_inc(&rdtgrp->waitcount) + mutex_lock +rdtgroup_move_task + __rdtgroup_move_task + /* + * Take an extra refcount, so rdtgrp cannot be freed + * before the call back move_myself has been invoked + */ + atomic_inc(&rdtgrp->waitcount) + /* Callback move_myself will be scheduled for later */ + task_work_add(move_myself) +rdtgroup_kn_unlock + mutex_unlock + atomic_dec_and_test(&rdtgrp->waitcount) + && (flags & RDT_DELETED) + rdtgroup_kn_lock_live + atomic_inc(&rdtgrp->waitcount) + mutex_lock + rdtgroup_rmdir_ctrl + free_all_child_rdtgrp + /* + * sentry is freed without + * checking refcount + */ + kfree(sentry)*[3] + rdtgroup_ctrl_remove + rdtgrp->flags = RDT_DELETED + rdtgroup_kn_unlock + mutex_unlock + atomic_dec_and_test( + &rdtgrp->waitcount) + && (flags & RDT_DELETED) + kfree(rdtgrp) +/* + * Callback is scheduled to execute + * after rdt_kill_sb is finished + */ +move_myself + /* + * Use-after-free: refer to earlier rdtgrp + * memory which was freed in [3]. + */ + atomic_dec_and_test(&rdtgrp->waitcount) + && (flags & RDT_DELETED) + kfree(rdtgrp) + +If CONFIG_DEBUG_SLAB=y, Slab corruption on kmalloc-2k can be observed +like following. Note that "0x6b" is POISON_FREE after kfree(). The +corrupted bits "0x6a", "0x64" at offset 0x424 correspond to +waitcount member of struct rdtgroup which was freed: + + Slab corruption (Not tainted): kmalloc-2k start=ffff9504c5b0d000, len=2048 + 420: 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkjkkkkkkkkkkk + Single bit error detected. Probably bad RAM. + Run memtest86+ or a similar memory test tool. + Next obj: start=ffff9504c5b0d800, len=2048 + 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk + 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk + + Slab corruption (Not tainted): kmalloc-2k start=ffff9504c58ab800, len=2048 + 420: 6b 6b 6b 6b 64 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkdkkkkkkkkkkk + Prev obj: start=ffff9504c58ab000, len=2048 + 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk + 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk + +Fix this by taking reference count (waitcount) of rdtgrp into account in +the two call paths that currently do not do so. Instead of always +freeing the resource group it will only be freed if there are no waiters +on it. If there are waiters, the resource group will have its flags set +to RDT_DELETED. + +It will be left to the waiter to free the resource group when it starts +running and finding that it was the last waiter and the resource group +has been removed (rdtgrp->flags & RDT_DELETED) since. (1) rdt_kill_sb() +-> rmdir_all_sub() -> free_all_child_rdtgrp() (2) rdtgroup_rmdir() -> +rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp() + +Backporting notes: + +Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt +files to a separate directory"), the file +arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to +arch/x86/kernel/cpu/resctrl/rdtgroup.c. + +Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +in older stable trees. + +Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support") +Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") +Suggested-by: Reinette Chatre +Signed-off-by: Xiaochen Shen +Signed-off-by: Borislav Petkov +Reviewed-by: Reinette Chatre +Reviewed-by: Tony Luck +Acked-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/1578500886-21771-2-git-send-email-xiaochen.shen@intel.com +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +index 2dae1b3c42fcc..734996904dc3b 100644 +--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c ++++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +@@ -1260,7 +1260,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) + list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { + free_rmid(sentry->mon.rmid); + list_del(&sentry->mon.crdtgrp_list); +- kfree(sentry); ++ ++ if (atomic_read(&sentry->waitcount) != 0) ++ sentry->flags = RDT_DELETED; ++ else ++ kfree(sentry); + } + } + +@@ -1294,7 +1298,11 @@ static void rmdir_all_sub(void) + + kernfs_remove(rdtgrp->kn); + list_del(&rdtgrp->rdtgroup_list); +- kfree(rdtgrp); ++ ++ if (atomic_read(&rdtgrp->waitcount) != 0) ++ rdtgrp->flags = RDT_DELETED; ++ else ++ kfree(rdtgrp); + } + /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ + update_closid_rmid(cpu_online_mask, &rdtgroup_default); +-- +2.20.1 + -- 2.47.3