From ddf43da06aa024c0f91a2c54908e8221df8b1406 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 15 Jan 2021 10:59:07 +0100
Subject: [PATCH] 5.4-stable patches

added patches:
	exfat-month-timestamp-metadata-accidentally-incremented.patch
	hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch
	iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
	vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
	x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
	x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch
---
 ...mp-metadata-accidentally-incremented.patch |  40 ++++
 ...memory-leakage-caused-by-kfifo_alloc.patch | 108 +++++++++
 ..._lsm6dsx-fix-edge-trigger-interrupts.patch |  68 ++++++
 queue-5.4/series                              |   6 +
 ...h-add-pgo-and-autofdo-input-sections.patch |  86 ++++++++
 ...ve-a-task-to-the-same-resource-group.patch |  51 +++++
 ...ask_work_add-to-update-pqr_assoc-msr.patch | 206 ++++++++++++++++++
 7 files changed, 565 insertions(+)
 create mode 100644 queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch
 create mode 100644 queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch
 create mode 100644 queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
 create mode 100644 queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
 create mode 100644 queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
 create mode 100644 queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch

diff --git a/queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch b/queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch
new file mode 100644
index 00000000000..2a6ac5f5f8f
--- /dev/null
+++ b/queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch
@@ -0,0 +1,40 @@
+From valdis.kletnieks@vt.edu  Fri Jan 15 10:42:32 2021
+From: "Valdis KlÄtnieks" <valdis.kletnieks@vt.edu>
+Date: Fri, 15 Jan 2021 02:38:19 -0500
+Subject: exfat: Month timestamp metadata accidentally incremented
+To: Greg KH <greg@kroah.com>, arpad.mueller@uni-bonn.de
+Cc: stable@vger.kernel.org
+Message-ID: <6161.1610696299@turing-police>
+
+From: "Valdis KlÄtnieks" <valdis.kletnieks@vt.edu>
+
+The staging/exfat driver has departed, but a lot of distros are still tracking
+5.4-stable, so we should fix this.
+
+There was an 0/1 offset error in month handling for file metadata, causing
+the month to get incremented on each reference to the file.
+
+Thanks to Sebastian Gurtler for troubleshooting this, and Arpad Mueller
+for bringing it to my attention.
+
+Relevant discussions:
+https://bugzilla.kernel.org/show_bug.cgi?id=210997
+https://bugs.launchpad.net/ubuntu/+source/ubuntu-meta/+bug/1872504
+
+Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/staging/exfat/exfat_super.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/staging/exfat/exfat_super.c
++++ b/drivers/staging/exfat/exfat_super.c
+@@ -59,7 +59,7 @@ static void exfat_write_super(struct sup
+ /* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
+ static void exfat_time_fat2unix(struct timespec64 *ts, struct date_time_t *tp)
+ {
+-	ts->tv_sec = mktime64(tp->Year + 1980, tp->Month + 1, tp->Day,
++	ts->tv_sec = mktime64(tp->Year + 1980, tp->Month, tp->Day,
+ 			      tp->Hour, tp->Minute, tp->Second);
+ 
+ 	ts->tv_nsec = tp->MilliSecond * NSEC_PER_MSEC;
diff --git a/queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch b/queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch
new file mode 100644
index 00000000000..185c6c7cda4
--- /dev/null
+++ b/queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch
@@ -0,0 +1,108 @@
+From 37309f47e2f5674f3e86cb765312ace42cfcedf5 Mon Sep 17 00:00:00 2001
+From: Ping Cheng <pinglinux@gmail.com>
+Date: Wed, 9 Dec 2020 20:52:30 -0800
+Subject: HID: wacom: Fix memory leakage caused by kfifo_alloc
+
+From: Ping Cheng <pinglinux@gmail.com>
+
+commit 37309f47e2f5674f3e86cb765312ace42cfcedf5 upstream.
+
+As reported by syzbot below, kfifo_alloc'd memory would not be freed
+if a non-zero return value is triggered in wacom_probe. This patch
+creates and uses devm_kfifo_alloc to allocate and free itself.
+
+BUG: memory leak
+unreferenced object 0xffff88810dc44a00 (size 512):
+  comm "kworker/1:2", pid 3674, jiffies 4294943617 (age 14.100s)
+  hex dump (first 32 bytes):
+   00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+   00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace:
+   [<0000000023e1afac>] kmalloc_array include/linux/slab.h:592 [inline]
+   [<0000000023e1afac>] __kfifo_alloc+0xad/0x100 lib/kfifo.c:43
+   [<00000000c477f737>] wacom_probe+0x1a1/0x3b0 drivers/hid/wacom_sys.c:2727
+   [<00000000b3109aca>] hid_device_probe+0x16b/0x210 drivers/hid/hid-core.c:2281
+   [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554
+   [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738
+   [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844
+   [<00000000efb7c59e>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:431
+   [<0000000024ab1590>] __device_attach+0x122/0x250 drivers/base/dd.c:912
+   [<000000004c7ac048>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:491
+   [<00000000b93050a3>] device_add+0x5ac/0xc30 drivers/base/core.c:2936
+   [<00000000e5b46ea5>] hid_add_device+0x151/0x390 drivers/hid/hid-core.c:2437
+   [<00000000c6add147>] usbhid_probe+0x412/0x560 drivers/hid/usbhid/hid-core.c:1407
+   [<00000000c33acdb4>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396
+   [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554
+   [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738
+   [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844
+
+https://syzkaller.appspot.com/bug?extid=5b49c9695968d7250a26
+
+Reported-by: syzbot+5b49c9695968d7250a26@syzkaller.appspotmail.com
+Signed-off-by: Ping Cheng <ping.cheng@wacom.com>
+Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hid/wacom_sys.c |   35 ++++++++++++++++++++++++++++++++---
+ 1 file changed, 32 insertions(+), 3 deletions(-)
+
+--- a/drivers/hid/wacom_sys.c
++++ b/drivers/hid/wacom_sys.c
+@@ -1270,6 +1270,37 @@ static int wacom_devm_sysfs_create_group
+ 					       group);
+ }
+ 
++static void wacom_devm_kfifo_release(struct device *dev, void *res)
++{
++	struct kfifo_rec_ptr_2 *devres = res;
++
++	kfifo_free(devres);
++}
++
++static int wacom_devm_kfifo_alloc(struct wacom *wacom)
++{
++	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
++	struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo;
++	int error;
++
++	pen_fifo = devres_alloc(wacom_devm_kfifo_release,
++			      sizeof(struct kfifo_rec_ptr_2),
++			      GFP_KERNEL);
++
++	if (!pen_fifo)
++		return -ENOMEM;
++
++	error = kfifo_alloc(pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL);
++	if (error) {
++		devres_free(pen_fifo);
++		return error;
++	}
++
++	devres_add(&wacom->hdev->dev, pen_fifo);
++
++	return 0;
++}
++
+ enum led_brightness wacom_leds_brightness_get(struct wacom_led *led)
+ {
+ 	struct wacom *wacom = led->wacom;
+@@ -2724,7 +2755,7 @@ static int wacom_probe(struct hid_device
+ 	if (features->check_for_hid_type && features->hid_type != hdev->type)
+ 		return -ENODEV;
+ 
+-	error = kfifo_alloc(&wacom_wac->pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL);
++	error = wacom_devm_kfifo_alloc(wacom);
+ 	if (error)
+ 		return error;
+ 
+@@ -2786,8 +2817,6 @@ static void wacom_remove(struct hid_devi
+ 
+ 	if (wacom->wacom_wac.features.type != REMOTE)
+ 		wacom_release_resources(wacom);
+-
+-	kfifo_free(&wacom_wac->pen_fifo);
+ }
+ 
+ #ifdef CONFIG_PM
diff --git a/queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch b/queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
new file mode 100644
index 00000000000..0921cbe0df2
--- /dev/null
+++ b/queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
@@ -0,0 +1,68 @@
+From foo@baz Fri Jan 15 10:51:55 AM CET 2021
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Sat, 14 Nov 2020 19:39:05 +0100
+Subject: iio: imu: st_lsm6dsx: fix edge-trigger interrupts
+
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+
+commit 3f9bce7a22a3f8ac9d885c9d75bc45569f24ac8b upstream
+
+If we are using edge IRQs, new samples can arrive while processing
+current interrupt since there are no hw guarantees the irq line
+stays "low" long enough to properly detect the new interrupt.
+In this case the new sample will be missed.
+Polling FIFO status register in st_lsm6dsx_handler_thread routine
+allow us to read new samples even if the interrupt arrives while
+processing previous data and the timeslot where the line is "low"
+is too short to be properly detected.
+
+Fixes: 89ca88a7cdf2 ("iio: imu: st_lsm6dsx: support active-low interrupts")
+Fixes: 290a6ce11d93 ("iio: imu: add support to lsm6dsx driver")
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Link: https://lore.kernel.org/r/5e93cda7dc1e665f5685c53ad8e9ea71dbae782d.1605378871.git.lorenzo@kernel.org
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+[sudip: manual backport to old irq handler path]
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c |   26 ++++++++++++++++++++-----
+ 1 file changed, 21 insertions(+), 5 deletions(-)
+
+--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
++++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
+@@ -664,13 +664,29 @@ static irqreturn_t st_lsm6dsx_handler_ir
+ static irqreturn_t st_lsm6dsx_handler_thread(int irq, void *private)
+ {
+ 	struct st_lsm6dsx_hw *hw = private;
+-	int count;
++	int fifo_len = 0, len;
+ 
+-	mutex_lock(&hw->fifo_lock);
+-	count = hw->settings->fifo_ops.read_fifo(hw);
+-	mutex_unlock(&hw->fifo_lock);
++	/*
++	 * If we are using edge IRQs, new samples can arrive while
++	 * processing current interrupt since there are no hw
++	 * guarantees the irq line stays "low" long enough to properly
++	 * detect the new interrupt. In this case the new sample will
++	 * be missed.
++	 * Polling FIFO status register allow us to read new
++	 * samples even if the interrupt arrives while processing
++	 * previous data and the timeslot where the line is "low" is
++	 * too short to be properly detected.
++	 */
++	do {
++		mutex_lock(&hw->fifo_lock);
++		len = hw->settings->fifo_ops.read_fifo(hw);
++		mutex_unlock(&hw->fifo_lock);
+ 
+-	return count ? IRQ_HANDLED : IRQ_NONE;
++		if (len > 0)
++			fifo_len += len;
++	} while (len > 0);
++
++	return fifo_len ? IRQ_HANDLED : IRQ_NONE;
+ }
+ 
+ static int st_lsm6dsx_buffer_preenable(struct iio_dev *iio_dev)
diff --git a/queue-5.4/series b/queue-5.4/series
index f4a70a2cc98..e65ab46c973 100644
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -24,3 +24,9 @@ chtls-fix-panic-when-route-to-peer-not-configured.patch
 chtls-replace-skb_dequeue-with-skb_peek.patch
 chtls-added-a-check-to-avoid-null-pointer-dereference.patch
 chtls-fix-chtls-resources-release-sequence.patch
+x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch
+x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
+exfat-month-timestamp-metadata-accidentally-incremented.patch
+vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
+iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
+hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch
diff --git a/queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch b/queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
new file mode 100644
index 00000000000..c5bf91c5958
--- /dev/null
+++ b/queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
@@ -0,0 +1,86 @@
+From foo@baz Fri Jan 15 10:46:00 AM CET 2021
+From: Nick Desaulniers <ndesaulniers@google.com>
+Date: Fri, 21 Aug 2020 12:42:47 -0700
+Subject: vmlinux.lds.h: Add PGO and AutoFDO input sections
+
+From: Nick Desaulniers <ndesaulniers@google.com>
+
+commit eff8728fe69880d3f7983bec3fb6cea4c306261f upstream.
+
+Basically, consider .text.{hot|unlikely|unknown}.* part of .text, too.
+
+When compiling with profiling information (collected via PGO
+instrumentations or AutoFDO sampling), Clang will separate code into
+.text.hot, .text.unlikely, or .text.unknown sections based on profiling
+information. After D79600 (clang-11), these sections will have a
+trailing `.` suffix, ie.  .text.hot., .text.unlikely., .text.unknown..
+
+When using -ffunction-sections together with profiling infomation,
+either explicitly (FGKASLR) or implicitly (LTO), code may be placed in
+sections following the convention:
+.text.hot.<foo>, .text.unlikely.<bar>, .text.unknown.<baz>
+where <foo>, <bar>, and <baz> are functions.  (This produces one section
+per function; we generally try to merge these all back via linker script
+so that we don't have 50k sections).
+
+For the above cases, we need to teach our linker scripts that such
+sections might exist and that we'd explicitly like them grouped
+together, otherwise we can wind up with code outside of the
+_stext/_etext boundaries that might not be mapped properly for some
+architectures, resulting in boot failures.
+
+If the linker script is not told about possible input sections, then
+where the section is placed as output is a heuristic-laiden mess that's
+non-portable between linkers (ie. BFD and LLD), and has resulted in many
+hard to debug bugs.  Kees Cook is working on cleaning this up by adding
+--orphan-handling=warn linker flag used in ARCH=powerpc to additional
+architectures. In the case of linker scripts, borrowing from the Zen of
+Python: explicit is better than implicit.
+
+Also, ld.bfd's internal linker script considers .text.hot AND
+.text.hot.* to be part of .text, as well as .text.unlikely and
+.text.unlikely.*. I didn't see support for .text.unknown.*, and didn't
+see Clang producing such code in our kernel builds, but I see code in
+LLVM that can produce such section names if profiling information is
+missing. That may point to a larger issue with generating or collecting
+profiles, but I would much rather be safe and explicit than have to
+debug yet another issue related to orphan section placement.
+
+Reported-by: Jian Cai <jiancai@google.com>
+Suggested-by: FÄng-ruÃ¬ SÃ²ng <maskray@google.com>
+Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Luis Lozano <llozano@google.com>
+Tested-by: Manoj Gupta <manojgupta@google.com>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: linux-arch@vger.kernel.org
+Cc: stable@vger.kernel.org
+Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=add44f8d5c5c05e08b11e033127a744d61c26aee
+Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=1de778ed23ce7492c523d5850c6c6dbb34152655
+Link: https://reviews.llvm.org/D79600
+Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1084760
+Link: https://lore.kernel.org/r/20200821194310.3089815-7-keescook@chromium.org
+
+Debugged-by: Luis Lozano <llozano@google.com>
+[nc: Resolve small conflict due to lack of NOINSTR_TEXT]
+Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/vmlinux.lds.h |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -520,7 +520,10 @@
+  */
+ #define TEXT_TEXT							\
+ 		ALIGN_FUNCTION();					\
+-		*(.text.hot TEXT_MAIN .text.fixup .text.unlikely)	\
++		*(.text.hot .text.hot.*)				\
++		*(TEXT_MAIN .text.fixup)				\
++		*(.text.unlikely .text.unlikely.*)			\
++		*(.text.unknown .text.unknown.*)			\
+ 		*(.text..refcount)					\
+ 		*(.ref.text)						\
+ 	MEM_KEEP(init.text*)						\
diff --git a/queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch b/queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
new file mode 100644
index 00000000000..cbfbd9fbff4
--- /dev/null
+++ b/queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
@@ -0,0 +1,51 @@
+From foo@baz Fri Jan 15 10:39:41 AM CET 2021
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Mon, 11 Jan 2021 15:21:41 -0800
+Subject: x86/resctrl: Don't move a task to the same resource group
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: Fenghua Yu <fenghua.yu@intel.com>, Shakeel Butt <shakeelb@google.com>, Reinette Chatre <reinette.chatre@intel.com>, Borislav Petkov <bp@suse.de>, Tony Luck <tony.luck@intel.com>
+Message-ID: <189c5c33fe6def640b0ac8807cf819d7541bfa46.1610394119.git.reinette.chatre@intel.com>
+
+From: Fenghua Yu <fenghua.yu@intel.com>
+
+commit a0195f314a25582b38993bf30db11c300f4f4611 upstream
+
+Shakeel Butt reported in [1] that a user can request a task to be moved
+to a resource group even if the task is already in the group. It just
+wastes time to do the move operation which could be costly to send IPI
+to a different CPU.
+
+Add a sanity check to ensure that the move operation only happens when
+the task is not already in the resource group.
+
+[1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/
+
+Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files")
+Reported-by: Shakeel Butt <shakeelb@google.com>
+Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/962ede65d8e95be793cb61102cca37f7bb018e66.1608243147.git.reinette.chatre@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -546,6 +546,13 @@ static void update_task_closid_rmid(stru
+ static int __rdtgroup_move_task(struct task_struct *tsk,
+ 				struct rdtgroup *rdtgrp)
+ {
++	/* If the task is already in rdtgrp, no need to move the task. */
++	if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
++	     tsk->rmid == rdtgrp->mon.rmid) ||
++	    (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
++	     tsk->closid == rdtgrp->mon.parent->closid))
++		return 0;
++
+ 	/*
+ 	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
+ 	 * updated by them.
diff --git a/queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch b/queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch
new file mode 100644
index 00000000000..cd6fc8258ad
--- /dev/null
+++ b/queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch
@@ -0,0 +1,206 @@
+From foo@baz Fri Jan 15 10:39:41 AM CET 2021
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Mon, 11 Jan 2021 15:21:13 -0800
+Subject: x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: Fenghua Yu <fenghua.yu@intel.com>, Shakeel Butt <shakeelb@google.com>, Valentin Schneider <valentin.schneider@arm.com>, Reinette Chatre <reinette.chatre@intel.com>, Borislav Petkov <bp@suse.de>, Tony Luck <tony.luck@intel.com>, James Morse <james.morse@arm.com>
+Message-ID: <3a470a8ff17dfcf1a3d5afc189b74050a2634c2a.1610394119.git.reinette.chatre@intel.com>
+
+From: Fenghua Yu <fenghua.yu@intel.com>
+
+commit ae28d1aae48a1258bd09a6f707ebb4231d79a761 upstream
+
+Currently, when moving a task to a resource group the PQR_ASSOC MSR is
+updated with the new closid and rmid in an added task callback. If the
+task is running, the work is run as soon as possible. If the task is not
+running, the work is executed later in the kernel exit path when the
+kernel returns to the task again.
+
+Updating the PQR_ASSOC MSR as soon as possible on the CPU a moved task
+is running is the right thing to do. Queueing work for a task that is
+not running is unnecessary (the PQR_ASSOC MSR is already updated when
+the task is scheduled in) and causing system resource waste with the way
+in which it is implemented: Work to update the PQR_ASSOC register is
+queued every time the user writes a task id to the "tasks" file, even if
+the task already belongs to the resource group.
+
+This could result in multiple pending work items associated with a
+single task even if they are all identical and even though only a single
+update with most recent values is needed. Specifically, even if a task
+is moved between different resource groups while it is sleeping then it
+is only the last move that is relevant but yet a work item is queued
+during each move.
+
+This unnecessary queueing of work items could result in significant
+system resource waste, especially on tasks sleeping for a long time.
+For example, as demonstrated by Shakeel Butt in [1] writing the same
+task id to the "tasks" file can quickly consume significant memory. The
+same problem (wasted system resources) occurs when moving a task between
+different resource groups.
+
+As pointed out by Valentin Schneider in [2] there is an additional issue
+with the way in which the queueing of work is done in that the task_struct
+update is currently done after the work is queued, resulting in a race with
+the register update possibly done before the data needed by the update is
+available.
+
+To solve these issues, update the PQR_ASSOC MSR in a synchronous way
+right after the new closid and rmid are ready during the task movement,
+only if the task is running. If a moved task is not running nothing
+is done since the PQR_ASSOC MSR will be updated next time the task is
+scheduled. This is the same way used to update the register when tasks
+are moved as part of resource group removal.
+
+[1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/
+[2] https://lore.kernel.org/lkml/20201123022433.17905-1-valentin.schneider@arm.com
+
+ [ bp: Massage commit message and drop the two update_task_closid_rmid()
+   variants. ]
+
+Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files")
+Reported-by: Shakeel Butt <shakeelb@google.com>
+Reported-by: Valentin Schneider <valentin.schneider@arm.com>
+Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: James Morse <james.morse@arm.com>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/17aa2fb38fc12ce7bb710106b3e7c7b45acb9e94.1608243147.git.reinette.chatre@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c |  108 +++++++++++++--------------------
+ 1 file changed, 43 insertions(+), 65 deletions(-)
+
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -525,85 +525,63 @@ static void rdtgroup_remove(struct rdtgr
+ 	kfree(rdtgrp);
+ }
+ 
+-struct task_move_callback {
+-	struct callback_head	work;
+-	struct rdtgroup		*rdtgrp;
+-};
+-
+-static void move_myself(struct callback_head *head)
++static void _update_task_closid_rmid(void *task)
+ {
+-	struct task_move_callback *callback;
+-	struct rdtgroup *rdtgrp;
+-
+-	callback = container_of(head, struct task_move_callback, work);
+-	rdtgrp = callback->rdtgrp;
+-
+ 	/*
+-	 * If resource group was deleted before this task work callback
+-	 * was invoked, then assign the task to root group and free the
+-	 * resource group.
++	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
++	 * Otherwise, the MSR is updated when the task is scheduled in.
+ 	 */
+-	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+-	    (rdtgrp->flags & RDT_DELETED)) {
+-		current->closid = 0;
+-		current->rmid = 0;
+-		rdtgroup_remove(rdtgrp);
+-	}
+-
+-	preempt_disable();
+-	/* update PQR_ASSOC MSR to make resource group go into effect */
+-	resctrl_sched_in();
+-	preempt_enable();
++	if (task == current)
++		resctrl_sched_in();
++}
+ 
+-	kfree(callback);
++static void update_task_closid_rmid(struct task_struct *t)
++{
++	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
++		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
++	else
++		_update_task_closid_rmid(t);
+ }
+ 
+ static int __rdtgroup_move_task(struct task_struct *tsk,
+ 				struct rdtgroup *rdtgrp)
+ {
+-	struct task_move_callback *callback;
+-	int ret;
+-
+-	callback = kzalloc(sizeof(*callback), GFP_KERNEL);
+-	if (!callback)
+-		return -ENOMEM;
+-	callback->work.func = move_myself;
+-	callback->rdtgrp = rdtgrp;
+-
+ 	/*
+-	 * Take a refcount, so rdtgrp cannot be freed before the
+-	 * callback has been invoked.
++	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
++	 * updated by them.
++	 *
++	 * For ctrl_mon groups, move both closid and rmid.
++	 * For monitor groups, can move the tasks only from
++	 * their parent CTRL group.
+ 	 */
+-	atomic_inc(&rdtgrp->waitcount);
+-	ret = task_work_add(tsk, &callback->work, true);
+-	if (ret) {
+-		/*
+-		 * Task is exiting. Drop the refcount and free the callback.
+-		 * No need to check the refcount as the group cannot be
+-		 * deleted before the write function unlocks rdtgroup_mutex.
+-		 */
+-		atomic_dec(&rdtgrp->waitcount);
+-		kfree(callback);
+-		rdt_last_cmd_puts("Task exited\n");
+-	} else {
+-		/*
+-		 * For ctrl_mon groups move both closid and rmid.
+-		 * For monitor groups, can move the tasks only from
+-		 * their parent CTRL group.
+-		 */
+-		if (rdtgrp->type == RDTCTRL_GROUP) {
+-			tsk->closid = rdtgrp->closid;
++
++	if (rdtgrp->type == RDTCTRL_GROUP) {
++		tsk->closid = rdtgrp->closid;
++		tsk->rmid = rdtgrp->mon.rmid;
++	} else if (rdtgrp->type == RDTMON_GROUP) {
++		if (rdtgrp->mon.parent->closid == tsk->closid) {
+ 			tsk->rmid = rdtgrp->mon.rmid;
+-		} else if (rdtgrp->type == RDTMON_GROUP) {
+-			if (rdtgrp->mon.parent->closid == tsk->closid) {
+-				tsk->rmid = rdtgrp->mon.rmid;
+-			} else {
+-				rdt_last_cmd_puts("Can't move task to different control group\n");
+-				ret = -EINVAL;
+-			}
++		} else {
++			rdt_last_cmd_puts("Can't move task to different control group\n");
++			return -EINVAL;
+ 		}
+ 	}
+-	return ret;
++
++	/*
++	 * Ensure the task's closid and rmid are written before determining if
++	 * the task is current that will decide if it will be interrupted.
++	 */
++	barrier();
++
++	/*
++	 * By now, the task's closid and rmid are set. If the task is current
++	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
++	 * group go into effect. If the task is not current, the MSR will be
++	 * updated when the task is scheduled in.
++	 */
++	update_task_closid_rmid(tsk);
++
++	return 0;
+ }
+ 
+ /**
-- 
2.47.3