From ddf43da06aa024c0f91a2c54908e8221df8b1406 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 15 Jan 2021 10:59:07 +0100 Subject: [PATCH] 5.4-stable patches added patches: exfat-month-timestamp-metadata-accidentally-incremented.patch hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch --- ...mp-metadata-accidentally-incremented.patch | 40 ++++ ...memory-leakage-caused-by-kfifo_alloc.patch | 108 +++++++++ ..._lsm6dsx-fix-edge-trigger-interrupts.patch | 68 ++++++ queue-5.4/series | 6 + ...h-add-pgo-and-autofdo-input-sections.patch | 86 ++++++++ ...ve-a-task-to-the-same-resource-group.patch | 51 +++++ ...ask_work_add-to-update-pqr_assoc-msr.patch | 206 ++++++++++++++++++ 7 files changed, 565 insertions(+) create mode 100644 queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch create mode 100644 queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch create mode 100644 queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch create mode 100644 queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch create mode 100644 queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch create mode 100644 queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch diff --git a/queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch b/queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch new file mode 100644 index 00000000000..2a6ac5f5f8f --- /dev/null +++ b/queue-5.4/exfat-month-timestamp-metadata-accidentally-incremented.patch @@ -0,0 +1,40 @@ +From valdis.kletnieks@vt.edu Fri Jan 15 10:42:32 2021 +From: "Valdis Klētnieks" +Date: Fri, 15 Jan 2021 02:38:19 -0500 +Subject: exfat: Month timestamp metadata accidentally incremented +To: Greg KH , arpad.mueller@uni-bonn.de +Cc: stable@vger.kernel.org +Message-ID: <6161.1610696299@turing-police> + +From: "Valdis Klētnieks" + +The staging/exfat driver has departed, but a lot of distros are still tracking +5.4-stable, so we should fix this. + +There was an 0/1 offset error in month handling for file metadata, causing +the month to get incremented on each reference to the file. + +Thanks to Sebastian Gurtler for troubleshooting this, and Arpad Mueller +for bringing it to my attention. + +Relevant discussions: +https://bugzilla.kernel.org/show_bug.cgi?id=210997 +https://bugs.launchpad.net/ubuntu/+source/ubuntu-meta/+bug/1872504 + +Signed-off-by: Valdis Kletnieks +Signed-off-by: Greg Kroah-Hartman +--- + drivers/staging/exfat/exfat_super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/staging/exfat/exfat_super.c ++++ b/drivers/staging/exfat/exfat_super.c +@@ -59,7 +59,7 @@ static void exfat_write_super(struct sup + /* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */ + static void exfat_time_fat2unix(struct timespec64 *ts, struct date_time_t *tp) + { +- ts->tv_sec = mktime64(tp->Year + 1980, tp->Month + 1, tp->Day, ++ ts->tv_sec = mktime64(tp->Year + 1980, tp->Month, tp->Day, + tp->Hour, tp->Minute, tp->Second); + + ts->tv_nsec = tp->MilliSecond * NSEC_PER_MSEC; diff --git a/queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch b/queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch new file mode 100644 index 00000000000..185c6c7cda4 --- /dev/null +++ b/queue-5.4/hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch @@ -0,0 +1,108 @@ +From 37309f47e2f5674f3e86cb765312ace42cfcedf5 Mon Sep 17 00:00:00 2001 +From: Ping Cheng +Date: Wed, 9 Dec 2020 20:52:30 -0800 +Subject: HID: wacom: Fix memory leakage caused by kfifo_alloc + +From: Ping Cheng + +commit 37309f47e2f5674f3e86cb765312ace42cfcedf5 upstream. + +As reported by syzbot below, kfifo_alloc'd memory would not be freed +if a non-zero return value is triggered in wacom_probe. This patch +creates and uses devm_kfifo_alloc to allocate and free itself. + +BUG: memory leak +unreferenced object 0xffff88810dc44a00 (size 512): + comm "kworker/1:2", pid 3674, jiffies 4294943617 (age 14.100s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [<0000000023e1afac>] kmalloc_array include/linux/slab.h:592 [inline] + [<0000000023e1afac>] __kfifo_alloc+0xad/0x100 lib/kfifo.c:43 + [<00000000c477f737>] wacom_probe+0x1a1/0x3b0 drivers/hid/wacom_sys.c:2727 + [<00000000b3109aca>] hid_device_probe+0x16b/0x210 drivers/hid/hid-core.c:2281 + [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554 + [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738 + [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844 + [<00000000efb7c59e>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:431 + [<0000000024ab1590>] __device_attach+0x122/0x250 drivers/base/dd.c:912 + [<000000004c7ac048>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:491 + [<00000000b93050a3>] device_add+0x5ac/0xc30 drivers/base/core.c:2936 + [<00000000e5b46ea5>] hid_add_device+0x151/0x390 drivers/hid/hid-core.c:2437 + [<00000000c6add147>] usbhid_probe+0x412/0x560 drivers/hid/usbhid/hid-core.c:1407 + [<00000000c33acdb4>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396 + [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554 + [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738 + [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844 + +https://syzkaller.appspot.com/bug?extid=5b49c9695968d7250a26 + +Reported-by: syzbot+5b49c9695968d7250a26@syzkaller.appspotmail.com +Signed-off-by: Ping Cheng +Reviewed-by: Benjamin Tissoires +Signed-off-by: Jiri Kosina +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hid/wacom_sys.c | 35 ++++++++++++++++++++++++++++++++--- + 1 file changed, 32 insertions(+), 3 deletions(-) + +--- a/drivers/hid/wacom_sys.c ++++ b/drivers/hid/wacom_sys.c +@@ -1270,6 +1270,37 @@ static int wacom_devm_sysfs_create_group + group); + } + ++static void wacom_devm_kfifo_release(struct device *dev, void *res) ++{ ++ struct kfifo_rec_ptr_2 *devres = res; ++ ++ kfifo_free(devres); ++} ++ ++static int wacom_devm_kfifo_alloc(struct wacom *wacom) ++{ ++ struct wacom_wac *wacom_wac = &wacom->wacom_wac; ++ struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo; ++ int error; ++ ++ pen_fifo = devres_alloc(wacom_devm_kfifo_release, ++ sizeof(struct kfifo_rec_ptr_2), ++ GFP_KERNEL); ++ ++ if (!pen_fifo) ++ return -ENOMEM; ++ ++ error = kfifo_alloc(pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL); ++ if (error) { ++ devres_free(pen_fifo); ++ return error; ++ } ++ ++ devres_add(&wacom->hdev->dev, pen_fifo); ++ ++ return 0; ++} ++ + enum led_brightness wacom_leds_brightness_get(struct wacom_led *led) + { + struct wacom *wacom = led->wacom; +@@ -2724,7 +2755,7 @@ static int wacom_probe(struct hid_device + if (features->check_for_hid_type && features->hid_type != hdev->type) + return -ENODEV; + +- error = kfifo_alloc(&wacom_wac->pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL); ++ error = wacom_devm_kfifo_alloc(wacom); + if (error) + return error; + +@@ -2786,8 +2817,6 @@ static void wacom_remove(struct hid_devi + + if (wacom->wacom_wac.features.type != REMOTE) + wacom_release_resources(wacom); +- +- kfifo_free(&wacom_wac->pen_fifo); + } + + #ifdef CONFIG_PM diff --git a/queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch b/queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch new file mode 100644 index 00000000000..0921cbe0df2 --- /dev/null +++ b/queue-5.4/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch @@ -0,0 +1,68 @@ +From foo@baz Fri Jan 15 10:51:55 AM CET 2021 +From: Lorenzo Bianconi +Date: Sat, 14 Nov 2020 19:39:05 +0100 +Subject: iio: imu: st_lsm6dsx: fix edge-trigger interrupts + +From: Lorenzo Bianconi + +commit 3f9bce7a22a3f8ac9d885c9d75bc45569f24ac8b upstream + +If we are using edge IRQs, new samples can arrive while processing +current interrupt since there are no hw guarantees the irq line +stays "low" long enough to properly detect the new interrupt. +In this case the new sample will be missed. +Polling FIFO status register in st_lsm6dsx_handler_thread routine +allow us to read new samples even if the interrupt arrives while +processing previous data and the timeslot where the line is "low" +is too short to be properly detected. + +Fixes: 89ca88a7cdf2 ("iio: imu: st_lsm6dsx: support active-low interrupts") +Fixes: 290a6ce11d93 ("iio: imu: add support to lsm6dsx driver") +Signed-off-by: Lorenzo Bianconi +Link: https://lore.kernel.org/r/5e93cda7dc1e665f5685c53ad8e9ea71dbae782d.1605378871.git.lorenzo@kernel.org +Cc: +Signed-off-by: Jonathan Cameron +[sudip: manual backport to old irq handler path] +Signed-off-by: Sudip Mukherjee +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 26 ++++++++++++++++++++----- + 1 file changed, 21 insertions(+), 5 deletions(-) + +--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c ++++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +@@ -664,13 +664,29 @@ static irqreturn_t st_lsm6dsx_handler_ir + static irqreturn_t st_lsm6dsx_handler_thread(int irq, void *private) + { + struct st_lsm6dsx_hw *hw = private; +- int count; ++ int fifo_len = 0, len; + +- mutex_lock(&hw->fifo_lock); +- count = hw->settings->fifo_ops.read_fifo(hw); +- mutex_unlock(&hw->fifo_lock); ++ /* ++ * If we are using edge IRQs, new samples can arrive while ++ * processing current interrupt since there are no hw ++ * guarantees the irq line stays "low" long enough to properly ++ * detect the new interrupt. In this case the new sample will ++ * be missed. ++ * Polling FIFO status register allow us to read new ++ * samples even if the interrupt arrives while processing ++ * previous data and the timeslot where the line is "low" is ++ * too short to be properly detected. ++ */ ++ do { ++ mutex_lock(&hw->fifo_lock); ++ len = hw->settings->fifo_ops.read_fifo(hw); ++ mutex_unlock(&hw->fifo_lock); + +- return count ? IRQ_HANDLED : IRQ_NONE; ++ if (len > 0) ++ fifo_len += len; ++ } while (len > 0); ++ ++ return fifo_len ? IRQ_HANDLED : IRQ_NONE; + } + + static int st_lsm6dsx_buffer_preenable(struct iio_dev *iio_dev) diff --git a/queue-5.4/series b/queue-5.4/series index f4a70a2cc98..e65ab46c973 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -24,3 +24,9 @@ chtls-fix-panic-when-route-to-peer-not-configured.patch chtls-replace-skb_dequeue-with-skb_peek.patch chtls-added-a-check-to-avoid-null-pointer-dereference.patch chtls-fix-chtls-resources-release-sequence.patch +x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch +x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch +exfat-month-timestamp-metadata-accidentally-incremented.patch +vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch +iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch +hid-wacom-fix-memory-leakage-caused-by-kfifo_alloc.patch diff --git a/queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch b/queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch new file mode 100644 index 00000000000..c5bf91c5958 --- /dev/null +++ b/queue-5.4/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch @@ -0,0 +1,86 @@ +From foo@baz Fri Jan 15 10:46:00 AM CET 2021 +From: Nick Desaulniers +Date: Fri, 21 Aug 2020 12:42:47 -0700 +Subject: vmlinux.lds.h: Add PGO and AutoFDO input sections + +From: Nick Desaulniers + +commit eff8728fe69880d3f7983bec3fb6cea4c306261f upstream. + +Basically, consider .text.{hot|unlikely|unknown}.* part of .text, too. + +When compiling with profiling information (collected via PGO +instrumentations or AutoFDO sampling), Clang will separate code into +.text.hot, .text.unlikely, or .text.unknown sections based on profiling +information. After D79600 (clang-11), these sections will have a +trailing `.` suffix, ie. .text.hot., .text.unlikely., .text.unknown.. + +When using -ffunction-sections together with profiling infomation, +either explicitly (FGKASLR) or implicitly (LTO), code may be placed in +sections following the convention: +.text.hot., .text.unlikely., .text.unknown. +where , , and are functions. (This produces one section +per function; we generally try to merge these all back via linker script +so that we don't have 50k sections). + +For the above cases, we need to teach our linker scripts that such +sections might exist and that we'd explicitly like them grouped +together, otherwise we can wind up with code outside of the +_stext/_etext boundaries that might not be mapped properly for some +architectures, resulting in boot failures. + +If the linker script is not told about possible input sections, then +where the section is placed as output is a heuristic-laiden mess that's +non-portable between linkers (ie. BFD and LLD), and has resulted in many +hard to debug bugs. Kees Cook is working on cleaning this up by adding +--orphan-handling=warn linker flag used in ARCH=powerpc to additional +architectures. In the case of linker scripts, borrowing from the Zen of +Python: explicit is better than implicit. + +Also, ld.bfd's internal linker script considers .text.hot AND +.text.hot.* to be part of .text, as well as .text.unlikely and +.text.unlikely.*. I didn't see support for .text.unknown.*, and didn't +see Clang producing such code in our kernel builds, but I see code in +LLVM that can produce such section names if profiling information is +missing. That may point to a larger issue with generating or collecting +profiles, but I would much rather be safe and explicit than have to +debug yet another issue related to orphan section placement. + +Reported-by: Jian Cai +Suggested-by: Fāng-ruì Sòng +Signed-off-by: Nick Desaulniers +Signed-off-by: Kees Cook +Signed-off-by: Ingo Molnar +Tested-by: Luis Lozano +Tested-by: Manoj Gupta +Acked-by: Kees Cook +Cc: linux-arch@vger.kernel.org +Cc: stable@vger.kernel.org +Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=add44f8d5c5c05e08b11e033127a744d61c26aee +Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=1de778ed23ce7492c523d5850c6c6dbb34152655 +Link: https://reviews.llvm.org/D79600 +Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1084760 +Link: https://lore.kernel.org/r/20200821194310.3089815-7-keescook@chromium.org + +Debugged-by: Luis Lozano +[nc: Resolve small conflict due to lack of NOINSTR_TEXT] +Signed-off-by: Nathan Chancellor +Signed-off-by: Greg Kroah-Hartman +--- + include/asm-generic/vmlinux.lds.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -520,7 +520,10 @@ + */ + #define TEXT_TEXT \ + ALIGN_FUNCTION(); \ +- *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ ++ *(.text.hot .text.hot.*) \ ++ *(TEXT_MAIN .text.fixup) \ ++ *(.text.unlikely .text.unlikely.*) \ ++ *(.text.unknown .text.unknown.*) \ + *(.text..refcount) \ + *(.ref.text) \ + MEM_KEEP(init.text*) \ diff --git a/queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch b/queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch new file mode 100644 index 00000000000..cbfbd9fbff4 --- /dev/null +++ b/queue-5.4/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch @@ -0,0 +1,51 @@ +From foo@baz Fri Jan 15 10:39:41 AM CET 2021 +From: Reinette Chatre +Date: Mon, 11 Jan 2021 15:21:41 -0800 +Subject: x86/resctrl: Don't move a task to the same resource group +To: stable@vger.kernel.org, gregkh@linuxfoundation.org +Cc: Fenghua Yu , Shakeel Butt , Reinette Chatre , Borislav Petkov , Tony Luck +Message-ID: <189c5c33fe6def640b0ac8807cf819d7541bfa46.1610394119.git.reinette.chatre@intel.com> + +From: Fenghua Yu + +commit a0195f314a25582b38993bf30db11c300f4f4611 upstream + +Shakeel Butt reported in [1] that a user can request a task to be moved +to a resource group even if the task is already in the group. It just +wastes time to do the move operation which could be costly to send IPI +to a different CPU. + +Add a sanity check to ensure that the move operation only happens when +the task is not already in the resource group. + +[1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/ + +Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") +Reported-by: Shakeel Butt +Signed-off-by: Fenghua Yu +Signed-off-by: Reinette Chatre +Signed-off-by: Borislav Petkov +Reviewed-by: Tony Luck +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/962ede65d8e95be793cb61102cca37f7bb018e66.1608243147.git.reinette.chatre@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c ++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +@@ -546,6 +546,13 @@ static void update_task_closid_rmid(stru + static int __rdtgroup_move_task(struct task_struct *tsk, + struct rdtgroup *rdtgrp) + { ++ /* If the task is already in rdtgrp, no need to move the task. */ ++ if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid && ++ tsk->rmid == rdtgrp->mon.rmid) || ++ (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid && ++ tsk->closid == rdtgrp->mon.parent->closid)) ++ return 0; ++ + /* + * Set the task's closid/rmid before the PQR_ASSOC MSR can be + * updated by them. diff --git a/queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch b/queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch new file mode 100644 index 00000000000..cd6fc8258ad --- /dev/null +++ b/queue-5.4/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch @@ -0,0 +1,206 @@ +From foo@baz Fri Jan 15 10:39:41 AM CET 2021 +From: Reinette Chatre +Date: Mon, 11 Jan 2021 15:21:13 -0800 +Subject: x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR +To: stable@vger.kernel.org, gregkh@linuxfoundation.org +Cc: Fenghua Yu , Shakeel Butt , Valentin Schneider , Reinette Chatre , Borislav Petkov , Tony Luck , James Morse +Message-ID: <3a470a8ff17dfcf1a3d5afc189b74050a2634c2a.1610394119.git.reinette.chatre@intel.com> + +From: Fenghua Yu + +commit ae28d1aae48a1258bd09a6f707ebb4231d79a761 upstream + +Currently, when moving a task to a resource group the PQR_ASSOC MSR is +updated with the new closid and rmid in an added task callback. If the +task is running, the work is run as soon as possible. If the task is not +running, the work is executed later in the kernel exit path when the +kernel returns to the task again. + +Updating the PQR_ASSOC MSR as soon as possible on the CPU a moved task +is running is the right thing to do. Queueing work for a task that is +not running is unnecessary (the PQR_ASSOC MSR is already updated when +the task is scheduled in) and causing system resource waste with the way +in which it is implemented: Work to update the PQR_ASSOC register is +queued every time the user writes a task id to the "tasks" file, even if +the task already belongs to the resource group. + +This could result in multiple pending work items associated with a +single task even if they are all identical and even though only a single +update with most recent values is needed. Specifically, even if a task +is moved between different resource groups while it is sleeping then it +is only the last move that is relevant but yet a work item is queued +during each move. + +This unnecessary queueing of work items could result in significant +system resource waste, especially on tasks sleeping for a long time. +For example, as demonstrated by Shakeel Butt in [1] writing the same +task id to the "tasks" file can quickly consume significant memory. The +same problem (wasted system resources) occurs when moving a task between +different resource groups. + +As pointed out by Valentin Schneider in [2] there is an additional issue +with the way in which the queueing of work is done in that the task_struct +update is currently done after the work is queued, resulting in a race with +the register update possibly done before the data needed by the update is +available. + +To solve these issues, update the PQR_ASSOC MSR in a synchronous way +right after the new closid and rmid are ready during the task movement, +only if the task is running. If a moved task is not running nothing +is done since the PQR_ASSOC MSR will be updated next time the task is +scheduled. This is the same way used to update the register when tasks +are moved as part of resource group removal. + +[1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/ +[2] https://lore.kernel.org/lkml/20201123022433.17905-1-valentin.schneider@arm.com + + [ bp: Massage commit message and drop the two update_task_closid_rmid() + variants. ] + +Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") +Reported-by: Shakeel Butt +Reported-by: Valentin Schneider +Signed-off-by: Fenghua Yu +Signed-off-by: Reinette Chatre +Signed-off-by: Borislav Petkov +Reviewed-by: Tony Luck +Reviewed-by: James Morse +Reviewed-by: Valentin Schneider +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/17aa2fb38fc12ce7bb710106b3e7c7b45acb9e94.1608243147.git.reinette.chatre@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 108 +++++++++++++-------------------- + 1 file changed, 43 insertions(+), 65 deletions(-) + +--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c ++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +@@ -525,85 +525,63 @@ static void rdtgroup_remove(struct rdtgr + kfree(rdtgrp); + } + +-struct task_move_callback { +- struct callback_head work; +- struct rdtgroup *rdtgrp; +-}; +- +-static void move_myself(struct callback_head *head) ++static void _update_task_closid_rmid(void *task) + { +- struct task_move_callback *callback; +- struct rdtgroup *rdtgrp; +- +- callback = container_of(head, struct task_move_callback, work); +- rdtgrp = callback->rdtgrp; +- + /* +- * If resource group was deleted before this task work callback +- * was invoked, then assign the task to root group and free the +- * resource group. ++ * If the task is still current on this CPU, update PQR_ASSOC MSR. ++ * Otherwise, the MSR is updated when the task is scheduled in. + */ +- if (atomic_dec_and_test(&rdtgrp->waitcount) && +- (rdtgrp->flags & RDT_DELETED)) { +- current->closid = 0; +- current->rmid = 0; +- rdtgroup_remove(rdtgrp); +- } +- +- preempt_disable(); +- /* update PQR_ASSOC MSR to make resource group go into effect */ +- resctrl_sched_in(); +- preempt_enable(); ++ if (task == current) ++ resctrl_sched_in(); ++} + +- kfree(callback); ++static void update_task_closid_rmid(struct task_struct *t) ++{ ++ if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) ++ smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); ++ else ++ _update_task_closid_rmid(t); + } + + static int __rdtgroup_move_task(struct task_struct *tsk, + struct rdtgroup *rdtgrp) + { +- struct task_move_callback *callback; +- int ret; +- +- callback = kzalloc(sizeof(*callback), GFP_KERNEL); +- if (!callback) +- return -ENOMEM; +- callback->work.func = move_myself; +- callback->rdtgrp = rdtgrp; +- + /* +- * Take a refcount, so rdtgrp cannot be freed before the +- * callback has been invoked. ++ * Set the task's closid/rmid before the PQR_ASSOC MSR can be ++ * updated by them. ++ * ++ * For ctrl_mon groups, move both closid and rmid. ++ * For monitor groups, can move the tasks only from ++ * their parent CTRL group. + */ +- atomic_inc(&rdtgrp->waitcount); +- ret = task_work_add(tsk, &callback->work, true); +- if (ret) { +- /* +- * Task is exiting. Drop the refcount and free the callback. +- * No need to check the refcount as the group cannot be +- * deleted before the write function unlocks rdtgroup_mutex. +- */ +- atomic_dec(&rdtgrp->waitcount); +- kfree(callback); +- rdt_last_cmd_puts("Task exited\n"); +- } else { +- /* +- * For ctrl_mon groups move both closid and rmid. +- * For monitor groups, can move the tasks only from +- * their parent CTRL group. +- */ +- if (rdtgrp->type == RDTCTRL_GROUP) { +- tsk->closid = rdtgrp->closid; ++ ++ if (rdtgrp->type == RDTCTRL_GROUP) { ++ tsk->closid = rdtgrp->closid; ++ tsk->rmid = rdtgrp->mon.rmid; ++ } else if (rdtgrp->type == RDTMON_GROUP) { ++ if (rdtgrp->mon.parent->closid == tsk->closid) { + tsk->rmid = rdtgrp->mon.rmid; +- } else if (rdtgrp->type == RDTMON_GROUP) { +- if (rdtgrp->mon.parent->closid == tsk->closid) { +- tsk->rmid = rdtgrp->mon.rmid; +- } else { +- rdt_last_cmd_puts("Can't move task to different control group\n"); +- ret = -EINVAL; +- } ++ } else { ++ rdt_last_cmd_puts("Can't move task to different control group\n"); ++ return -EINVAL; + } + } +- return ret; ++ ++ /* ++ * Ensure the task's closid and rmid are written before determining if ++ * the task is current that will decide if it will be interrupted. ++ */ ++ barrier(); ++ ++ /* ++ * By now, the task's closid and rmid are set. If the task is current ++ * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource ++ * group go into effect. If the task is not current, the MSR will be ++ * updated when the task is scheduled in. ++ */ ++ update_task_closid_rmid(tsk); ++ ++ return 0; + } + + /** -- 2.47.3