]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 15 Jan 2021 09:58:38 +0000 (10:58 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 15 Jan 2021 09:58:38 +0000 (10:58 +0100)
added patches:
drm-i915-fix-mismatch-between-misplaced-vma-check-and-vma-insert.patch
iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
iio-imu-st_lsm6dsx-flip-irq-return-logic.patch
spi-pxa2xx-fix-use-after-free-on-unbind.patch
ubifs-wbuf-don-t-leak-kernel-memory-to-flash.patch
vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch

queue-4.14/drm-i915-fix-mismatch-between-misplaced-vma-check-and-vma-insert.patch [new file with mode: 0644]
queue-4.14/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch [new file with mode: 0644]
queue-4.14/iio-imu-st_lsm6dsx-flip-irq-return-logic.patch [new file with mode: 0644]
queue-4.14/series
queue-4.14/spi-pxa2xx-fix-use-after-free-on-unbind.patch [new file with mode: 0644]
queue-4.14/ubifs-wbuf-don-t-leak-kernel-memory-to-flash.patch [new file with mode: 0644]
queue-4.14/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch [new file with mode: 0644]
queue-4.14/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch [new file with mode: 0644]
queue-4.14/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch [new file with mode: 0644]

diff --git a/queue-4.14/drm-i915-fix-mismatch-between-misplaced-vma-check-and-vma-insert.patch b/queue-4.14/drm-i915-fix-mismatch-between-misplaced-vma-check-and-vma-insert.patch
new file mode 100644 (file)
index 0000000..735c401
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Fri Jan 15 10:47:51 AM CET 2021
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Wed, 16 Dec 2020 09:29:51 +0000
+Subject: drm/i915: Fix mismatch between misplaced vma check and vma insert
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0e53656ad8abc99e0a80c3de611e593ebbf55829 upstream
+
+When inserting a VMA, we restrict the placement to the low 4G unless the
+caller opts into using the full range. This was done to allow usersapce
+the opportunity to transition slowly from a 32b address space, and to
+avoid breaking inherent 32b assumptions of some commands.
+
+However, for insert we limited ourselves to 4G-4K, but on verification
+we allowed the full 4G. This causes some attempts to bind a new buffer
+to sporadically fail with -ENOSPC, but at other times be bound
+successfully.
+
+commit 48ea1e32c39d ("drm/i915/gen9: Set PIN_ZONE_4G end to 4GB - 1
+page") suggests that there is a genuine problem with stateless addressing
+that cannot utilize the last page in 4G and so we purposefully excluded
+it. This means that the quick pin pass may cause us to utilize a buggy
+placement.
+
+Reported-by: CQ Tang <cq.tang@intel.com>
+Testcase: igt/gem_exec_params/larger-than-life-batch
+Fixes: 48ea1e32c39d ("drm/i915/gen9: Set PIN_ZONE_4G end to 4GB - 1 page")
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: CQ Tang <cq.tang@intel.com>
+Reviewed-by: CQ Tang <cq.tang@intel.com>
+Reviewed-by: Matthew Auld <matthew.auld@intel.com>
+Cc: <stable@vger.kernel.org> # v4.5+
+Link: https://patchwork.freedesktop.org/patch/msgid/20201216092951.7124-1-chris@chris-wilson.co.uk
+(cherry picked from commit 5f22cc0b134ab702d7f64b714e26018f7288ffee)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+[sudip: use file from old path]
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -341,7 +341,7 @@ eb_vma_misplaced(const struct drm_i915_g
+               return true;
+       if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
+-          (vma->node.start + vma->node.size - 1) >> 32)
++          (vma->node.start + vma->node.size + 4095) >> 32)
+               return true;
+       if (flags & __EXEC_OBJECT_NEEDS_MAP &&
diff --git a/queue-4.14/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch b/queue-4.14/iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
new file mode 100644 (file)
index 0000000..9037838
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Fri Jan 15 10:52:27 AM CET 2021
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Sat, 14 Nov 2020 19:39:05 +0100
+Subject: iio: imu: st_lsm6dsx: fix edge-trigger interrupts
+
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+
+commit 3f9bce7a22a3f8ac9d885c9d75bc45569f24ac8b upstream
+
+If we are using edge IRQs, new samples can arrive while processing
+current interrupt since there are no hw guarantees the irq line
+stays "low" long enough to properly detect the new interrupt.
+In this case the new sample will be missed.
+Polling FIFO status register in st_lsm6dsx_handler_thread routine
+allow us to read new samples even if the interrupt arrives while
+processing previous data and the timeslot where the line is "low"
+is too short to be properly detected.
+
+Fixes: 89ca88a7cdf2 ("iio: imu: st_lsm6dsx: support active-low interrupts")
+Fixes: 290a6ce11d93 ("iio: imu: add support to lsm6dsx driver")
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Link: https://lore.kernel.org/r/5e93cda7dc1e665f5685c53ad8e9ea71dbae782d.1605378871.git.lorenzo@kernel.org
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+[sudip: manual backport to old irq handler path]
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c |   26 ++++++++++++++++++++-----
+ 1 file changed, 21 insertions(+), 5 deletions(-)
+
+--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
++++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
+@@ -395,13 +395,29 @@ static irqreturn_t st_lsm6dsx_handler_ir
+ static irqreturn_t st_lsm6dsx_handler_thread(int irq, void *private)
+ {
+       struct st_lsm6dsx_hw *hw = private;
+-      int count;
++      int fifo_len = 0, len;
+-      mutex_lock(&hw->fifo_lock);
+-      count = st_lsm6dsx_read_fifo(hw);
+-      mutex_unlock(&hw->fifo_lock);
++      /*
++       * If we are using edge IRQs, new samples can arrive while
++       * processing current interrupt since there are no hw
++       * guarantees the irq line stays "low" long enough to properly
++       * detect the new interrupt. In this case the new sample will
++       * be missed.
++       * Polling FIFO status register allow us to read new
++       * samples even if the interrupt arrives while processing
++       * previous data and the timeslot where the line is "low" is
++       * too short to be properly detected.
++       */
++      do {
++              mutex_lock(&hw->fifo_lock);
++              len = st_lsm6dsx_read_fifo(hw);
++              mutex_unlock(&hw->fifo_lock);
+-      return count ? IRQ_HANDLED : IRQ_NONE;
++              if (len > 0)
++                      fifo_len += len;
++      } while (len > 0);
++
++      return fifo_len ? IRQ_HANDLED : IRQ_NONE;
+ }
+ static int st_lsm6dsx_buffer_preenable(struct iio_dev *iio_dev)
diff --git a/queue-4.14/iio-imu-st_lsm6dsx-flip-irq-return-logic.patch b/queue-4.14/iio-imu-st_lsm6dsx-flip-irq-return-logic.patch
new file mode 100644 (file)
index 0000000..c15136c
--- /dev/null
@@ -0,0 +1,31 @@
+From foo@baz Fri Jan 15 10:52:17 AM CET 2021
+From: Sean Nyekjaer <sean@geanix.com>
+Date: Mon, 15 Jul 2019 09:07:15 +0200
+Subject: iio: imu: st_lsm6dsx: flip irq return logic
+
+From: Sean Nyekjaer <sean@geanix.com>
+
+commit ec76d918f23034f9f662539ca9c64e2ae3ba9fba upstream
+
+No need for using reverse logic in the irq return,
+fix this by flip things around.
+
+Signed-off-by: Sean Nyekjaer <sean@geanix.com>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
++++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
+@@ -401,7 +401,7 @@ static irqreturn_t st_lsm6dsx_handler_th
+       count = st_lsm6dsx_read_fifo(hw);
+       mutex_unlock(&hw->fifo_lock);
+-      return !count ? IRQ_NONE : IRQ_HANDLED;
++      return count ? IRQ_HANDLED : IRQ_NONE;
+ }
+ static int st_lsm6dsx_buffer_preenable(struct iio_dev *iio_dev)
index d73947e96006c2441493f1d199c022d8dc0b1d18..eaa69e9516c664ab6d14ee6a3b3884d62c9cb6d4 100644 (file)
@@ -3,3 +3,11 @@ net-cdc_ncm-correct-overhead-in-delayed_ndp_size.patch
 net-vlan-avoid-leaks-on-register_vlan_dev-failures.patch
 net-ip-always-refragment-ip-defragmented-packets.patch
 net-fix-pmtu-check-in-nopmtudisc-mode.patch
+x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch
+x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
+vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
+drm-i915-fix-mismatch-between-misplaced-vma-check-and-vma-insert.patch
+ubifs-wbuf-don-t-leak-kernel-memory-to-flash.patch
+spi-pxa2xx-fix-use-after-free-on-unbind.patch
+iio-imu-st_lsm6dsx-flip-irq-return-logic.patch
+iio-imu-st_lsm6dsx-fix-edge-trigger-interrupts.patch
diff --git a/queue-4.14/spi-pxa2xx-fix-use-after-free-on-unbind.patch b/queue-4.14/spi-pxa2xx-fix-use-after-free-on-unbind.patch
new file mode 100644 (file)
index 0000000..c87d6a0
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Fri Jan 15 10:50:54 AM CET 2021
+From: Lukas Wunner <lukas@wunner.de>
+Date: Mon, 7 Dec 2020 09:17:05 +0100
+Subject: spi: pxa2xx: Fix use-after-free on unbind
+
+From: Lukas Wunner <lukas@wunner.de>
+
+commit 5626308bb94d9f930aa5f7c77327df4c6daa7759 upstream
+
+pxa2xx_spi_remove() accesses the driver's private data after calling
+spi_unregister_controller() even though that function releases the last
+reference on the spi_controller and thereby frees the private data.
+
+Fix by switching over to the new devm_spi_alloc_master/slave() helper
+which keeps the private data accessible until the driver has unbound.
+
+Fixes: 32e5b57232c0 ("spi: pxa2xx: Fix controller unregister order")
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Cc: <stable@vger.kernel.org> # v2.6.17+: 5e844cc37a5c: spi: Introduce device-managed SPI controller allocation
+Cc: <stable@vger.kernel.org> # v2.6.17+: 32e5b57232c0: spi: pxa2xx: Fix controller unregister order
+Cc: <stable@vger.kernel.org> # v2.6.17+
+Link: https://lore.kernel.org/r/5764b04d4a6e43069ebb7808f64c2f774ac6f193.1607286887.git.lukas@wunner.de
+Signed-off-by: Mark Brown <broonie@kernel.org>
+[sudip: adjust context]
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/spi/spi-pxa2xx.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/spi/spi-pxa2xx.c
++++ b/drivers/spi/spi-pxa2xx.c
+@@ -1660,7 +1660,7 @@ static int pxa2xx_spi_probe(struct platf
+               return -ENODEV;
+       }
+-      master = spi_alloc_master(dev, sizeof(struct driver_data));
++      master = devm_spi_alloc_master(dev, sizeof(*drv_data));
+       if (!master) {
+               dev_err(&pdev->dev, "cannot alloc spi_master\n");
+               pxa_ssp_free(ssp);
+@@ -1841,7 +1841,6 @@ out_error_clock_enabled:
+       free_irq(ssp->irq, drv_data);
+ out_error_master_alloc:
+-      spi_master_put(master);
+       pxa_ssp_free(ssp);
+       return status;
+ }
diff --git a/queue-4.14/ubifs-wbuf-don-t-leak-kernel-memory-to-flash.patch b/queue-4.14/ubifs-wbuf-don-t-leak-kernel-memory-to-flash.patch
new file mode 100644 (file)
index 0000000..38959f6
--- /dev/null
@@ -0,0 +1,70 @@
+From foo@baz Fri Jan 15 10:49:39 AM CET 2021
+From: Richard Weinberger <richard@nod.at>
+Date: Mon, 16 Nov 2020 22:05:30 +0100
+Subject: ubifs: wbuf: Don't leak kernel memory to flash
+
+From: Richard Weinberger <richard@nod.at>
+
+commit 20f1431160c6b590cdc269a846fc5a448abf5b98 upstream
+
+Write buffers use a kmalloc()'ed buffer, they can leak
+up to seven bytes of kernel memory to flash if writes are not
+aligned.
+So use ubifs_pad() to fill these gaps with padding bytes.
+This was never a problem while scanning because the scanner logic
+manually aligns node lengths and skips over these gaps.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 1e51764a3c2ac05a2 ("UBIFS: add new flash file system")
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+[sudip: adjust context]
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/io.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/fs/ubifs/io.c
++++ b/fs/ubifs/io.c
+@@ -331,7 +331,7 @@ void ubifs_pad(const struct ubifs_info *
+ {
+       uint32_t crc;
+-      ubifs_assert(pad >= 0 && !(pad & 7));
++      ubifs_assert(pad >= 0);
+       if (pad >= UBIFS_PAD_NODE_SZ) {
+               struct ubifs_ch *ch = buf;
+@@ -727,6 +727,10 @@ int ubifs_wbuf_write_nolock(struct ubifs
+                * write-buffer.
+                */
+               memcpy(wbuf->buf + wbuf->used, buf, len);
++              if (aligned_len > len) {
++                      ubifs_assert(aligned_len - len < 8);
++                      ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len);
++              }
+               if (aligned_len == wbuf->avail) {
+                       dbg_io("flush jhead %s wbuf to LEB %d:%d",
+@@ -819,13 +823,18 @@ int ubifs_wbuf_write_nolock(struct ubifs
+       }
+       spin_lock(&wbuf->lock);
+-      if (aligned_len)
++      if (aligned_len) {
+               /*
+                * And now we have what's left and what does not take whole
+                * max. write unit, so write it to the write-buffer and we are
+                * done.
+                */
+               memcpy(wbuf->buf, buf + written, len);
++              if (aligned_len > len) {
++                      ubifs_assert(aligned_len - len < 8);
++                      ubifs_pad(c, wbuf->buf + len, aligned_len - len);
++              }
++      }
+       if (c->leb_size - wbuf->offs >= c->max_write_size)
+               wbuf->size = c->max_write_size;
diff --git a/queue-4.14/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch b/queue-4.14/vmlinux.lds.h-add-pgo-and-autofdo-input-sections.patch
new file mode 100644 (file)
index 0000000..0ee7a12
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Fri Jan 15 10:46:06 AM CET 2021
+From: Nick Desaulniers <ndesaulniers@google.com>
+Date: Fri, 21 Aug 2020 12:42:47 -0700
+Subject: vmlinux.lds.h: Add PGO and AutoFDO input sections
+
+From: Nick Desaulniers <ndesaulniers@google.com>
+
+commit eff8728fe69880d3f7983bec3fb6cea4c306261f upstream.
+
+Basically, consider .text.{hot|unlikely|unknown}.* part of .text, too.
+
+When compiling with profiling information (collected via PGO
+instrumentations or AutoFDO sampling), Clang will separate code into
+.text.hot, .text.unlikely, or .text.unknown sections based on profiling
+information. After D79600 (clang-11), these sections will have a
+trailing `.` suffix, ie.  .text.hot., .text.unlikely., .text.unknown..
+
+When using -ffunction-sections together with profiling infomation,
+either explicitly (FGKASLR) or implicitly (LTO), code may be placed in
+sections following the convention:
+.text.hot.<foo>, .text.unlikely.<bar>, .text.unknown.<baz>
+where <foo>, <bar>, and <baz> are functions.  (This produces one section
+per function; we generally try to merge these all back via linker script
+so that we don't have 50k sections).
+
+For the above cases, we need to teach our linker scripts that such
+sections might exist and that we'd explicitly like them grouped
+together, otherwise we can wind up with code outside of the
+_stext/_etext boundaries that might not be mapped properly for some
+architectures, resulting in boot failures.
+
+If the linker script is not told about possible input sections, then
+where the section is placed as output is a heuristic-laiden mess that's
+non-portable between linkers (ie. BFD and LLD), and has resulted in many
+hard to debug bugs.  Kees Cook is working on cleaning this up by adding
+--orphan-handling=warn linker flag used in ARCH=powerpc to additional
+architectures. In the case of linker scripts, borrowing from the Zen of
+Python: explicit is better than implicit.
+
+Also, ld.bfd's internal linker script considers .text.hot AND
+.text.hot.* to be part of .text, as well as .text.unlikely and
+.text.unlikely.*. I didn't see support for .text.unknown.*, and didn't
+see Clang producing such code in our kernel builds, but I see code in
+LLVM that can produce such section names if profiling information is
+missing. That may point to a larger issue with generating or collecting
+profiles, but I would much rather be safe and explicit than have to
+debug yet another issue related to orphan section placement.
+
+Reported-by: Jian Cai <jiancai@google.com>
+Suggested-by: Fāng-ruì Sòng <maskray@google.com>
+Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Luis Lozano <llozano@google.com>
+Tested-by: Manoj Gupta <manojgupta@google.com>
+Acked-by: Kees Cook <keescook@chromium.org>
+Cc: linux-arch@vger.kernel.org
+Cc: stable@vger.kernel.org
+Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=add44f8d5c5c05e08b11e033127a744d61c26aee
+Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=1de778ed23ce7492c523d5850c6c6dbb34152655
+Link: https://reviews.llvm.org/D79600
+Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1084760
+Link: https://lore.kernel.org/r/20200821194310.3089815-7-keescook@chromium.org
+
+Debugged-by: Luis Lozano <llozano@google.com>
+[nc: Resolve small conflict due to lack of NOINSTR_TEXT]
+Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/vmlinux.lds.h |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -459,7 +459,10 @@
+  */
+ #define TEXT_TEXT                                                     \
+               ALIGN_FUNCTION();                                       \
+-              *(.text.hot TEXT_MAIN .text.fixup .text.unlikely)       \
++              *(.text.hot .text.hot.*)                                \
++              *(TEXT_MAIN .text.fixup)                                \
++              *(.text.unlikely .text.unlikely.*)                      \
++              *(.text.unknown .text.unknown.*)                        \
+               *(.text..refcount)                                      \
+               *(.ref.text)                                            \
+       MEM_KEEP(init.text)                                             \
diff --git a/queue-4.14/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch b/queue-4.14/x86-resctrl-don-t-move-a-task-to-the-same-resource-group.patch
new file mode 100644 (file)
index 0000000..f9a8e91
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Fri Jan 15 10:40:53 AM CET 2021
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Mon, 11 Jan 2021 15:12:58 -0800
+Subject: x86/resctrl: Don't move a task to the same resource group
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: Fenghua Yu <fenghua.yu@intel.com>, Shakeel Butt <shakeelb@google.com>, Reinette Chatre <reinette.chatre@intel.com>, Borislav Petkov <bp@suse.de>, Tony Luck <tony.luck@intel.com>
+Message-ID: <5316a67c4a42a2ddf39df3b9a7c246ab7dff5348.1610394049.git.reinette.chatre@intel.com>
+
+From: Fenghua Yu <fenghua.yu@intel.com>
+
+commit a0195f314a25582b38993bf30db11c300f4f4611 upstream
+
+Shakeel Butt reported in [1] that a user can request a task to be moved
+to a resource group even if the task is already in the group. It just
+wastes time to do the move operation which could be costly to send IPI
+to a different CPU.
+
+Add a sanity check to ensure that the move operation only happens when
+the task is not already in the resource group.
+
+[1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/
+
+Backporting notes:
+
+Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
+files to a separate directory"), the file
+arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
+arch/x86/kernel/cpu/resctrl/rdtgroup.c.
+Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+for older stable trees.
+
+Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files")
+Reported-by: Shakeel Butt <shakeelb@google.com>
+Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/962ede65d8e95be793cb61102cca37f7bb018e66.1608243147.git.reinette.chatre@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
++++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+@@ -432,6 +432,13 @@ static void update_task_closid_rmid(stru
+ static int __rdtgroup_move_task(struct task_struct *tsk,
+                               struct rdtgroup *rdtgrp)
+ {
++      /* If the task is already in rdtgrp, no need to move the task. */
++      if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
++           tsk->rmid == rdtgrp->mon.rmid) ||
++          (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
++           tsk->closid == rdtgrp->mon.parent->closid))
++              return 0;
++
+       /*
+        * Set the task's closid/rmid before the PQR_ASSOC MSR can be
+        * updated by them.
diff --git a/queue-4.14/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch b/queue-4.14/x86-resctrl-use-an-ipi-instead-of-task_work_add-to-update-pqr_assoc-msr.patch
new file mode 100644 (file)
index 0000000..07b4642
--- /dev/null
@@ -0,0 +1,216 @@
+From foo@baz Fri Jan 15 10:40:53 AM CET 2021
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Mon, 11 Jan 2021 15:12:28 -0800
+Subject: x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: Fenghua Yu <fenghua.yu@intel.com>, Shakeel Butt <shakeelb@google.com>, Valentin Schneider <valentin.schneider@arm.com>, Reinette Chatre <reinette.chatre@intel.com>, Borislav Petkov <bp@suse.de>, Tony Luck <tony.luck@intel.com>, James Morse <james.morse@arm.com>
+Message-ID: <27e975e8975fc10f806d6ede8c28e56872b853cc.1610394049.git.reinette.chatre@intel.com>
+
+From: Fenghua Yu <fenghua.yu@intel.com>
+
+commit ae28d1aae48a1258bd09a6f707ebb4231d79a761 upstream
+
+Currently, when moving a task to a resource group the PQR_ASSOC MSR is
+updated with the new closid and rmid in an added task callback. If the
+task is running, the work is run as soon as possible. If the task is not
+running, the work is executed later in the kernel exit path when the
+kernel returns to the task again.
+
+Updating the PQR_ASSOC MSR as soon as possible on the CPU a moved task
+is running is the right thing to do. Queueing work for a task that is
+not running is unnecessary (the PQR_ASSOC MSR is already updated when
+the task is scheduled in) and causing system resource waste with the way
+in which it is implemented: Work to update the PQR_ASSOC register is
+queued every time the user writes a task id to the "tasks" file, even if
+the task already belongs to the resource group.
+
+This could result in multiple pending work items associated with a
+single task even if they are all identical and even though only a single
+update with most recent values is needed. Specifically, even if a task
+is moved between different resource groups while it is sleeping then it
+is only the last move that is relevant but yet a work item is queued
+during each move.
+
+This unnecessary queueing of work items could result in significant
+system resource waste, especially on tasks sleeping for a long time.
+For example, as demonstrated by Shakeel Butt in [1] writing the same
+task id to the "tasks" file can quickly consume significant memory. The
+same problem (wasted system resources) occurs when moving a task between
+different resource groups.
+
+As pointed out by Valentin Schneider in [2] there is an additional issue
+with the way in which the queueing of work is done in that the task_struct
+update is currently done after the work is queued, resulting in a race with
+the register update possibly done before the data needed by the update is
+available.
+
+To solve these issues, update the PQR_ASSOC MSR in a synchronous way
+right after the new closid and rmid are ready during the task movement,
+only if the task is running. If a moved task is not running nothing
+is done since the PQR_ASSOC MSR will be updated next time the task is
+scheduled. This is the same way used to update the register when tasks
+are moved as part of resource group removal.
+
+[1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/
+[2] https://lore.kernel.org/lkml/20201123022433.17905-1-valentin.schneider@arm.com
+
+ [ bp: Massage commit message and drop the two update_task_closid_rmid()
+   variants. ]
+
+Backporting notes:
+
+Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
+files to a separate directory"), the file
+arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
+arch/x86/kernel/cpu/resctrl/rdtgroup.c.
+Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+for older stable trees.
+
+Since upstream commit 352940ececaca ("x86/resctrl: Rename the RDT
+functions and definitions"), resctrl functions received more generic
+names. Specifically related to this backport, intel_rdt_sched_in()
+was renamed to rescrl_sched_in().
+
+Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files")
+Reported-by: Shakeel Butt <shakeelb@google.com>
+Reported-by: Valentin Schneider <valentin.schneider@arm.com>
+Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: James Morse <james.morse@arm.com>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/17aa2fb38fc12ce7bb710106b3e7c7b45acb9e94.1608243147.git.reinette.chatre@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |  105 ++++++++++++-------------------
+ 1 file changed, 42 insertions(+), 63 deletions(-)
+
+--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
++++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+@@ -411,82 +411,61 @@ static void rdtgroup_remove(struct rdtgr
+       kfree(rdtgrp);
+ }
+-struct task_move_callback {
+-      struct callback_head    work;
+-      struct rdtgroup         *rdtgrp;
+-};
+-
+-static void move_myself(struct callback_head *head)
++static void _update_task_closid_rmid(void *task)
+ {
+-      struct task_move_callback *callback;
+-      struct rdtgroup *rdtgrp;
+-
+-      callback = container_of(head, struct task_move_callback, work);
+-      rdtgrp = callback->rdtgrp;
+-
+       /*
+-       * If resource group was deleted before this task work callback
+-       * was invoked, then assign the task to root group and free the
+-       * resource group.
++       * If the task is still current on this CPU, update PQR_ASSOC MSR.
++       * Otherwise, the MSR is updated when the task is scheduled in.
+        */
+-      if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+-          (rdtgrp->flags & RDT_DELETED)) {
+-              current->closid = 0;
+-              current->rmid = 0;
+-              rdtgroup_remove(rdtgrp);
+-      }
+-
+-      preempt_disable();
+-      /* update PQR_ASSOC MSR to make resource group go into effect */
+-      intel_rdt_sched_in();
+-      preempt_enable();
++      if (task == current)
++              intel_rdt_sched_in();
++}
+-      kfree(callback);
++static void update_task_closid_rmid(struct task_struct *t)
++{
++      if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
++              smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
++      else
++              _update_task_closid_rmid(t);
+ }
+ static int __rdtgroup_move_task(struct task_struct *tsk,
+                               struct rdtgroup *rdtgrp)
+ {
+-      struct task_move_callback *callback;
+-      int ret;
+-
+-      callback = kzalloc(sizeof(*callback), GFP_KERNEL);
+-      if (!callback)
+-              return -ENOMEM;
+-      callback->work.func = move_myself;
+-      callback->rdtgrp = rdtgrp;
+-
+       /*
+-       * Take a refcount, so rdtgrp cannot be freed before the
+-       * callback has been invoked.
++       * Set the task's closid/rmid before the PQR_ASSOC MSR can be
++       * updated by them.
++       *
++       * For ctrl_mon groups, move both closid and rmid.
++       * For monitor groups, can move the tasks only from
++       * their parent CTRL group.
+        */
+-      atomic_inc(&rdtgrp->waitcount);
+-      ret = task_work_add(tsk, &callback->work, true);
+-      if (ret) {
+-              /*
+-               * Task is exiting. Drop the refcount and free the callback.
+-               * No need to check the refcount as the group cannot be
+-               * deleted before the write function unlocks rdtgroup_mutex.
+-               */
+-              atomic_dec(&rdtgrp->waitcount);
+-              kfree(callback);
+-      } else {
+-              /*
+-               * For ctrl_mon groups move both closid and rmid.
+-               * For monitor groups, can move the tasks only from
+-               * their parent CTRL group.
+-               */
+-              if (rdtgrp->type == RDTCTRL_GROUP) {
+-                      tsk->closid = rdtgrp->closid;
++
++      if (rdtgrp->type == RDTCTRL_GROUP) {
++              tsk->closid = rdtgrp->closid;
++              tsk->rmid = rdtgrp->mon.rmid;
++      } else if (rdtgrp->type == RDTMON_GROUP) {
++              if (rdtgrp->mon.parent->closid == tsk->closid)
+                       tsk->rmid = rdtgrp->mon.rmid;
+-              } else if (rdtgrp->type == RDTMON_GROUP) {
+-                      if (rdtgrp->mon.parent->closid == tsk->closid)
+-                              tsk->rmid = rdtgrp->mon.rmid;
+-                      else
+-                              ret = -EINVAL;
+-              }
++              else
++                      return -EINVAL;
+       }
+-      return ret;
++
++      /*
++       * Ensure the task's closid and rmid are written before determining if
++       * the task is current that will decide if it will be interrupted.
++       */
++      barrier();
++
++      /*
++       * By now, the task's closid and rmid are set. If the task is current
++       * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
++       * group go into effect. If the task is not current, the MSR will be
++       * updated when the task is scheduled in.
++       */
++      update_task_closid_rmid(tsk);
++
++      return 0;
+ }
+ static int rdtgroup_task_write_permission(struct task_struct *task,