]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 15 Jan 2021 09:59:07 +0000 (10:59 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 15 Jan 2021 09:59:07 +0000 (10:59 +0100)
added patches:
exfat-month-timestamp-metadata-accidentally-incremented.patch
hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch
iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch

queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch [new file with mode: 0644]
queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch [new file with mode: 0644]
queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch [new file with mode: 0644]
queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch [new file with mode: 0644]
queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch [new file with mode: 0644]

diff --git a/queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch b/queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch
new file mode 100644 (file)
index 0000000..2a6ac5f
--- /dev/null
@@ -0,0 +1,40 @@
+From valdis.kletnieks@vt.edu  Fri Jan 15 10:42:32 2021
+From: "Valdis Klētnieks" <valdis.kletnieks@vt.edu>
+Date: Fri, 15 Jan 2021 02:38:19 -0500
+Subject: exfat: Month timestamp metadata accidentally incremented
+To: Greg KH <greg@kroah.com>, arpad.mueller@uni-bonn.de
+Cc: stable@vger.kernel.org
+Message-ID: <6161.1610696299@turing-police>
+
+From: "Valdis Klētnieks" <valdis.kletnieks@vt.edu>
+
+The staging/exfat driver has departed, but a lot of distros are still tracking
+5.4-stable, so we should fix this.
+
+There was an 0/1 offset error in month handling for file metadata, causing
+the month to get incremented on each reference to the file.
+
+Thanks to Sebastian Gurtler for troubleshooting this, and Arpad Mueller
+for bringing it to my attention.
+
+Relevant discussions:
+https://bugzilla.kernel.org/show_bug.cgi?id=210997
+https://bugs.launchpad.net/ubuntu/+source/ubuntu-meta/+bug/1872504
+
+Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/staging/exfat/exfat_super.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/staging/exfat/exfat_super.c
++++ b/drivers/staging/exfat/exfat_super.c
+@@ -59,7 +59,7 @@ static void exfat_write_super(struct sup
+ /* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
+ static void exfat_time_fat2unix(struct timespec64 *ts, struct date_time_t *tp)
+ {
+-      ts->tv_sec = mktime64(tp->Year + 1980, tp->Month + 1, tp->Day,
++      ts->tv_sec = mktime64(tp->Year + 1980, tp->Month, tp->Day,
+                             tp->Hour, tp->Minute, tp->Second);
+       ts->tv_nsec = tp->MilliSecond * NSEC_PER_MSEC;
diff --git a/queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch b/queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch
new file mode 100644 (file)
index 0000000..185c6c7
--- /dev/null
@@ -0,0 +1,108 @@
+From 37309f47e2f5674f3e86cb765312ace42cfcedf5 Mon Sep 17 00:00:00 2001
+From: Ping Cheng <pinglinux@gmail.com>
+Date: Wed, 9 Dec 2020 20:52:30 -0800
+Subject: HID: wacom: Fix memory leakage caused by kfifo_alloc
+
+From: Ping Cheng <pinglinux@gmail.com>
+
+commit 37309f47e2f5674f3e86cb765312ace42cfcedf5 upstream.
+
+As reported by syzbot below, kfifo_alloc'd memory would not be freed
+if a non-zero return value is triggered in wacom_probe. This patch
+creates and uses devm_kfifo_alloc to allocate and free itself.
+
+BUG: memory leak
+unreferenced object 0xffff88810dc44a00 (size 512):
+  comm "kworker/1:2", pid 3674, jiffies 4294943617 (age 14.100s)
+  hex dump (first 32 bytes):
+   00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+   00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace:
+   [<0000000023e1afac>] kmalloc_array include/linux/slab.h:592 [inline]
+   [<0000000023e1afac>] __kfifo_alloc+0xad/0x100 lib/kfifo.c:43
+   [<00000000c477f737>] wacom_probe+0x1a1/0x3b0 drivers/hid/wacom_sys.c:2727
+   [<00000000b3109aca>] hid_device_probe+0x16b/0x210 drivers/hid/hid-core.c:2281
+   [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554
+   [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738
+   [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844
+   [<00000000efb7c59e>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:431
+   [<0000000024ab1590>] __device_attach+0x122/0x250 drivers/base/dd.c:912
+   [<000000004c7ac048>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:491
+   [<00000000b93050a3>] device_add+0x5ac/0xc30 drivers/base/core.c:2936
+   [<00000000e5b46ea5>] hid_add_device+0x151/0x390 drivers/hid/hid-core.c:2437
+   [<00000000c6add147>] usbhid_probe+0x412/0x560 drivers/hid/usbhid/hid-core.c:1407
+   [<00000000c33acdb4>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396
+   [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554
+   [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738
+   [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844
+
+https://syzkaller.appspot.com/bug?extid=5b49c9695968d7250a26
+
+Reported-by: syzbot+5b49c9695968d7250a26@syzkaller.appspotmail.com
+Signed-off-by: Ping Cheng <ping.cheng@wacom.com>
+Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hid/wacom_sys.c |   35 ++++++++++++++++++++++++++++++++---
+ 1 file changed, 32 insertions(+), 3 deletions(-)
+
+--- a/drivers/hid/wacom_sys.c
++++ b/drivers/hid/wacom_sys.c
+@@ -1270,6 +1270,37 @@ static int wacom_devm_sysfs_create_group
+                                              group);
+ }
++static void wacom_devm_kfifo_release(struct device *dev, void *res)
++{
++      struct kfifo_rec_ptr_2 *devres = res;
++
++      kfifo_free(devres);
++}
++
++static int wacom_devm_kfifo_alloc(struct wacom *wacom)
++{
++      struct wacom_wac *wacom_wac = &wacom->wacom_wac;
++      struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo;
++      int error;
++
++      pen_fifo = devres_alloc(wacom_devm_kfifo_release,
++                            sizeof(struct kfifo_rec_ptr_2),
++                            GFP_KERNEL);
++
++      if (!pen_fifo)
++              return -ENOMEM;
++
++      error = kfifo_alloc(pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL);
++      if (error) {
++              devres_free(pen_fifo);
++              return error;
++      }
++
++      devres_add(&wacom->hdev->dev, pen_fifo);
++
++      return 0;
++}
++
+ enum led_brightness wacom_leds_brightness_get(struct wacom_led *led)
+ {
+       struct wacom *wacom = led->wacom;
+@@ -2724,7 +2755,7 @@ static int wacom_probe(struct hid_device
+       if (features->check_for_hid_type && features->hid_type != hdev->type)
+               return -ENODEV;
+-      error = kfifo_alloc(&wacom_wac->pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL);
++      error = wacom_devm_kfifo_alloc(wacom);
+       if (error)
+               return error;
+@@ -2786,8 +2817,6 @@ static void wacom_remove(struct hid_devi
+       if (wacom->wacom_wac.features.type != REMOTE)
+               wacom_release_resources(wacom);
+-
+-      kfifo_free(&wacom_wac->pen_fifo);
+ }
+ #ifdef CONFIG_PM
diff --git a/queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch b/queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
new file mode 100644 (file)
index 0000000..0921cbe
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Fri Jan 15 10:51:55 AM CET 2021
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Sat, 14 Nov 2020 19:39:05 +0100
+Subject: iio: imu: st_lsm6dsx: fix edge-trigger interrupts
+
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+
+commit 3f9bce7a22a3f8ac9d885c9d75bc45569f24ac8b upstream
+
+If we are using edge IRQs, new samples can arrive while processing
+current interrupt since there are no hw guarantees the irq line
+stays "low" long enough to properly detect the new interrupt.
+In this case the new sample will be missed.
+Polling FIFO status register in st_lsm6dsx_handler_thread routine
+allow us to read new samples even if the interrupt arrives while
+processing previous data and the timeslot where the line is "low"
+is too short to be properly detected.
+
+Fixes: 89ca88a7cdf2 ("iio: imu: st_lsm6dsx: support active-low interrupts")
+Fixes: 290a6ce11d93 ("iio: imu: add support to lsm6dsx driver")
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Link: https://lore.kernel.org/r/5e93cda7dc1e665f5685c53ad8e9ea71dbae782d.1605378871.git.lorenzo@kernel.org
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+[sudip: manual backport to old irq handler path]
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c |   26 ++++++++++++++++++++-----
+ 1 file changed, 21 insertions(+), 5 deletions(-)
+
+--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
++++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
+@@ -664,13 +664,29 @@ static irqreturn_t st_lsm6dsx_handler_ir
+ static irqreturn_t st_lsm6dsx_handler_thread(int irq, void *private)
+ {
+       struct st_lsm6dsx_hw *hw = private;
+-      int count;
++      int fifo_len = 0, len;
+-      mutex_lock(&hw->fifo_lock);
+-      count = hw->settings->fifo_ops.read_fifo(hw);
+-      mutex_unlock(&hw->fifo_lock);
++      /*
++       * If we are using edge IRQs, new samples can arrive while
++       * processing current interrupt since there are no hw
++       * guarantees the irq line stays "low" long enough to properly
++       * detect the new interrupt. In this case the new sample will
++       * be missed.
++       * Polling FIFO status register allow us to read new
++       * samples even if the interrupt arrives while processing
++       * previous data and the timeslot where the line is "low" is
++       * too short to be properly detected.
++       */
++      do {
++              mutex_lock(&hw->fifo_lock);
++              len = hw->settings->fifo_ops.read_fifo(hw);
++              mutex_unlock(&hw->fifo_lock);
+-      return count ? IRQ_HANDLED : IRQ_NONE;
++              if (len > 0)
++                      fifo_len += len;
++      } while (len > 0);
++
++      return fifo_len ? IRQ_HANDLED : IRQ_NONE;
+ }
+ static int st_lsm6dsx_buffer_preenable(struct iio_dev *iio_dev)
index f4a70a2cc989d11cb76070da91468bf12454207f..e65ab46c973a668d7b3d099fdef0397bac6eebac 100644 (file)
@@ -24,3 +24,9 @@ chtls-fix-panic-when-route-to-peer-not-configured.patch
 chtls-replace-skb_dequeue-with-skb_peek.patch
 chtls-added-a-check-to-avoid-null-pointer-dereference.patch
 chtls-fix-chtls-resources-release-sequence.patch
+x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch
+x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
+exfat-month-timestamp-metadata-accidentally-incremented.patch
+vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
+iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
+hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch
diff --git a/queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch b/queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
new file mode 100644 (file)
index 0000000..c5bf91c
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Fri Jan 15 10:46:00 AM CET 2021
+From: Nick Desaulniers <ndesaulniers@google.com>
+Date: Fri, 21 Aug 2020 12:42:47 -0700
+Subject: vmlinux.lds.h: Add PGO and AutoFDO input sections
+
+From: Nick Desaulniers <ndesaulniers@google.com>
+
+commit eff8728fe69880d3f7983bec3fb6cea4c306261f upstream.
+
+Basically, consider .text.{hot|unlikely|unknown}.* part of .text, too.
+
+When compiling with profiling information (collected via PGO
+instrumentations or AutoFDO sampling), Clang will separate code into
+.text.hot, .text.unlikely, or .text.unknown sections based on profiling
+information. After D79600 (clang-11), these sections will have a
+trailing `.` suffix, ie.  .text.hot., .text.unlikely., .text.unknown..
+
+When using -ffunction-sections together with profiling infomation,
+either explicitly (FGKASLR) or implicitly (LTO), code may be placed in
+sections following the convention:
+.text.hot.<foo>, .text.unlikely.<bar>, .text.unknown.<baz>
+where <foo>, <bar>, and <baz> are functions.  (This produces one section
+per function; we generally try to merge these all back via linker script
+so that we don't have 50k sections).
+
+For the above cases, we need to teach our linker scripts that such
+sections might exist and that we'd explicitly like them grouped
+together, otherwise we can wind up with code outside of the
+_stext/_etext boundaries that might not be mapped properly for some
+architectures, resulting in boot failures.
+
+If the linker script is not told about possible input sections, then
+where the section is placed as output is a heuristic-laiden mess that's
+non-portable between linkers (ie. BFD and LLD), and has resulted in many
+hard to debug bugs.  Kees Cook is working on cleaning this up by adding
+--orphan-handling=warn linker flag used in ARCH=powerpc to additional
+architectures. In the case of linker scripts, borrowing from the Zen of
+Python: explicit is better than implicit.
+
+Also, ld.bfd's internal linker script considers .text.hot AND
+.text.hot.* to be part of .text, as well as .text.unlikely and
+.text.unlikely.*. I didn't see support for .text.unknown.*, and didn't
+see Clang producing such code in our kernel builds, but I see code in
+LLVM that can produce such section names if profiling information is
+missing. That may point to a larger issue with generating or collecting
+profiles, but I would much rather be safe and explicit than have to
+debug yet another issue related to orphan section placement.
+
+Reported-by: Jian Cai <jiancai@google.com>
+Suggested-by: Fāng-ruì Sòng <maskray@google.com>
+Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Luis Lozano <llozano@google.com>
+Tested-by: Manoj Gupta <manojgupta@google.com>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: linux-arch@vger.kernel.org
+Cc: stable@vger.kernel.org
+Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=add44f8d5c5c05e08b11e033127a744d61c26aee
+Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=1de778ed23ce7492c523d5850c6c6dbb34152655
+Link: https://reviews.llvm.org/D79600
+Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1084760
+Link: https://lore.kernel.org/r/20200821194310.3089815-7-keescook@chromium.org
+
+Debugged-by: Luis Lozano <llozano@google.com>
+[nc: Resolve small conflict due to lack of NOINSTR_TEXT]
+Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/vmlinux.lds.h |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -520,7 +520,10 @@
+  */
+ #define TEXT_TEXT                                                     \
+               ALIGN_FUNCTION();                                       \
+-              *(.text.hot TEXT_MAIN .text.fixup .text.unlikely)       \
++              *(.text.hot .text.hot.*)                                \
++              *(TEXT_MAIN .text.fixup)                                \
++              *(.text.unlikely .text.unlikely.*)                      \
++              *(.text.unknown .text.unknown.*)                        \
+               *(.text..refcount)                                      \
+               *(.ref.text)                                            \
+       MEM_KEEP(init.text*)                                            \
diff --git a/queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch b/queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
new file mode 100644 (file)
index 0000000..cbfbd9f
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Fri Jan 15 10:39:41 AM CET 2021
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Mon, 11 Jan 2021 15:21:41 -0800
+Subject: x86/resctrl: Don't move a task to the same resource group
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: Fenghua Yu <fenghua.yu@intel.com>, Shakeel Butt <shakeelb@google.com>, Reinette Chatre <reinette.chatre@intel.com>, Borislav Petkov <bp@suse.de>, Tony Luck <tony.luck@intel.com>
+Message-ID: <189c5c33fe6def640b0ac8807cf819d7541bfa46.1610394119.git.reinette.chatre@intel.com>
+
+From: Fenghua Yu <fenghua.yu@intel.com>
+
+commit a0195f314a25582b38993bf30db11c300f4f4611 upstream
+
+Shakeel Butt reported in [1] that a user can request a task to be moved
+to a resource group even if the task is already in the group. It just
+wastes time to do the move operation which could be costly to send IPI
+to a different CPU.
+
+Add a sanity check to ensure that the move operation only happens when
+the task is not already in the resource group.
+
+[1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/
+
+Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files")
+Reported-by: Shakeel Butt <shakeelb@google.com>
+Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/962ede65d8e95be793cb61102cca37f7bb018e66.1608243147.git.reinette.chatre@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -546,6 +546,13 @@ static void update_task_closid_rmid(stru
+ static int __rdtgroup_move_task(struct task_struct *tsk,
+                               struct rdtgroup *rdtgrp)
+ {
++      /* If the task is already in rdtgrp, no need to move the task. */
++      if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
++           tsk->rmid == rdtgrp->mon.rmid) ||
++          (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
++           tsk->closid == rdtgrp->mon.parent->closid))
++              return 0;
++
+       /*
+        * Set the task's closid/rmid before the PQR_ASSOC MSR can be
+        * updated by them.
diff --git a/queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch b/queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch
new file mode 100644 (file)
index 0000000..cd6fc82
--- /dev/null
@@ -0,0 +1,206 @@
+From foo@baz Fri Jan 15 10:39:41 AM CET 2021
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Mon, 11 Jan 2021 15:21:13 -0800
+Subject: x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: Fenghua Yu <fenghua.yu@intel.com>, Shakeel Butt <shakeelb@google.com>, Valentin Schneider <valentin.schneider@arm.com>, Reinette Chatre <reinette.chatre@intel.com>, Borislav Petkov <bp@suse.de>, Tony Luck <tony.luck@intel.com>, James Morse <james.morse@arm.com>
+Message-ID: <3a470a8ff17dfcf1a3d5afc189b74050a2634c2a.1610394119.git.reinette.chatre@intel.com>
+
+From: Fenghua Yu <fenghua.yu@intel.com>
+
+commit ae28d1aae48a1258bd09a6f707ebb4231d79a761 upstream
+
+Currently, when moving a task to a resource group the PQR_ASSOC MSR is
+updated with the new closid and rmid in an added task callback. If the
+task is running, the work is run as soon as possible. If the task is not
+running, the work is executed later in the kernel exit path when the
+kernel returns to the task again.
+
+Updating the PQR_ASSOC MSR as soon as possible on the CPU a moved task
+is running is the right thing to do. Queueing work for a task that is
+not running is unnecessary (the PQR_ASSOC MSR is already updated when
+the task is scheduled in) and causing system resource waste with the way
+in which it is implemented: Work to update the PQR_ASSOC register is
+queued every time the user writes a task id to the "tasks" file, even if
+the task already belongs to the resource group.
+
+This could result in multiple pending work items associated with a
+single task even if they are all identical and even though only a single
+update with most recent values is needed. Specifically, even if a task
+is moved between different resource groups while it is sleeping then it
+is only the last move that is relevant but yet a work item is queued
+during each move.
+
+This unnecessary queueing of work items could result in significant
+system resource waste, especially on tasks sleeping for a long time.
+For example, as demonstrated by Shakeel Butt in [1] writing the same
+task id to the "tasks" file can quickly consume significant memory. The
+same problem (wasted system resources) occurs when moving a task between
+different resource groups.
+
+As pointed out by Valentin Schneider in [2] there is an additional issue
+with the way in which the queueing of work is done in that the task_struct
+update is currently done after the work is queued, resulting in a race with
+the register update possibly done before the data needed by the update is
+available.
+
+To solve these issues, update the PQR_ASSOC MSR in a synchronous way
+right after the new closid and rmid are ready during the task movement,
+only if the task is running. If a moved task is not running nothing
+is done since the PQR_ASSOC MSR will be updated next time the task is
+scheduled. This is the same way used to update the register when tasks
+are moved as part of resource group removal.
+
+[1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/
+[2] https://lore.kernel.org/lkml/20201123022433.17905-1-valentin.schneider@arm.com
+
+ [ bp: Massage commit message and drop the two update_task_closid_rmid()
+   variants. ]
+
+Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files")
+Reported-by: Shakeel Butt <shakeelb@google.com>
+Reported-by: Valentin Schneider <valentin.schneider@arm.com>
+Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: James Morse <james.morse@arm.com>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/17aa2fb38fc12ce7bb710106b3e7c7b45acb9e94.1608243147.git.reinette.chatre@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c |  108 +++++++++++++--------------------
+ 1 file changed, 43 insertions(+), 65 deletions(-)
+
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -525,85 +525,63 @@ static void rdtgroup_remove(struct rdtgr
+       kfree(rdtgrp);
+ }
+-struct task_move_callback {
+-      struct callback_head    work;
+-      struct rdtgroup         *rdtgrp;
+-};
+-
+-static void move_myself(struct callback_head *head)
++static void _update_task_closid_rmid(void *task)
+ {
+-      struct task_move_callback *callback;
+-      struct rdtgroup *rdtgrp;
+-
+-      callback = container_of(head, struct task_move_callback, work);
+-      rdtgrp = callback->rdtgrp;
+-
+       /*
+-       * If resource group was deleted before this task work callback
+-       * was invoked, then assign the task to root group and free the
+-       * resource group.
++       * If the task is still current on this CPU, update PQR_ASSOC MSR.
++       * Otherwise, the MSR is updated when the task is scheduled in.
+        */
+-      if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+-          (rdtgrp->flags & RDT_DELETED)) {
+-              current->closid = 0;
+-              current->rmid = 0;
+-              rdtgroup_remove(rdtgrp);
+-      }
+-
+-      preempt_disable();
+-      /* update PQR_ASSOC MSR to make resource group go into effect */
+-      resctrl_sched_in();
+-      preempt_enable();
++      if (task == current)
++              resctrl_sched_in();
++}
+-      kfree(callback);
++static void update_task_closid_rmid(struct task_struct *t)
++{
++      if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
++              smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
++      else
++              _update_task_closid_rmid(t);
+ }
+ static int __rdtgroup_move_task(struct task_struct *tsk,
+                               struct rdtgroup *rdtgrp)
+ {
+-      struct task_move_callback *callback;
+-      int ret;
+-
+-      callback = kzalloc(sizeof(*callback), GFP_KERNEL);
+-      if (!callback)
+-              return -ENOMEM;
+-      callback->work.func = move_myself;
+-      callback->rdtgrp = rdtgrp;
+-
+       /*
+-       * Take a refcount, so rdtgrp cannot be freed before the
+-       * callback has been invoked.
++       * Set the task's closid/rmid before the PQR_ASSOC MSR can be
++       * updated by them.
++       *
++       * For ctrl_mon groups, move both closid and rmid.
++       * For monitor groups, can move the tasks only from
++       * their parent CTRL group.
+        */
+-      atomic_inc(&rdtgrp->waitcount);
+-      ret = task_work_add(tsk, &callback->work, true);
+-      if (ret) {
+-              /*
+-               * Task is exiting. Drop the refcount and free the callback.
+-               * No need to check the refcount as the group cannot be
+-               * deleted before the write function unlocks rdtgroup_mutex.
+-               */
+-              atomic_dec(&rdtgrp->waitcount);
+-              kfree(callback);
+-              rdt_last_cmd_puts("Task exited\n");
+-      } else {
+-              /*
+-               * For ctrl_mon groups move both closid and rmid.
+-               * For monitor groups, can move the tasks only from
+-               * their parent CTRL group.
+-               */
+-              if (rdtgrp->type == RDTCTRL_GROUP) {
+-                      tsk->closid = rdtgrp->closid;
++
++      if (rdtgrp->type == RDTCTRL_GROUP) {
++              tsk->closid = rdtgrp->closid;
++              tsk->rmid = rdtgrp->mon.rmid;
++      } else if (rdtgrp->type == RDTMON_GROUP) {
++              if (rdtgrp->mon.parent->closid == tsk->closid) {
+                       tsk->rmid = rdtgrp->mon.rmid;
+-              } else if (rdtgrp->type == RDTMON_GROUP) {
+-                      if (rdtgrp->mon.parent->closid == tsk->closid) {
+-                              tsk->rmid = rdtgrp->mon.rmid;
+-                      } else {
+-                              rdt_last_cmd_puts("Can't move task to different control group\n");
+-                              ret = -EINVAL;
+-                      }
++              } else {
++                      rdt_last_cmd_puts("Can't move task to different control group\n");
++                      return -EINVAL;
+               }
+       }
+-      return ret;
++
++      /*
++       * Ensure the task's closid and rmid are written before determining if
++       * the task is current that will decide if it will be interrupted.
++       */
++      barrier();
++
++      /*
++       * By now, the task's closid and rmid are set. If the task is current
++       * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
++       * group go into effect. If the task is not current, the MSR will be
++       * updated when the task is scheduled in.
++       */
++      update_task_closid_rmid(tsk);
++
++      return 0;
+ }
+ /**