--- /dev/null
+From 34c0f6f2695a2db81e09a3ab7bdb2853f45d4d3d Mon Sep 17 00:00:00 2001
+From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
+Date: Sat, 5 Dec 2020 01:48:08 +0100
+Subject: KVM: mmu: Fix SPTE encoding of MMIO generation upper half
+
+From: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+
+commit 34c0f6f2695a2db81e09a3ab7bdb2853f45d4d3d upstream.
+
+Commit cae7ed3c2cb0 ("KVM: x86: Refactor the MMIO SPTE generation handling")
+cleaned up the computation of MMIO generation SPTE masks, however it
+introduced a bug how the upper part was encoded:
+SPTE bits 52-61 were supposed to contain bits 10-19 of the current
+generation number, however a missing shift encoded bits 1-10 there instead
+(mostly duplicating the lower part of the encoded generation number that
+then consisted of bits 1-9).
+
+In the meantime, the upper part was shrunk by one bit and moved by
+subsequent commits to become an upper half of the encoded generation number
+(bits 9-17 of bits 0-17 encoded in a SPTE).
+
+In addition to the above, commit 56871d444bc4 ("KVM: x86: fix overlap between SPTE_MMIO_MASK and generation")
+has changed the SPTE bit range assigned to encode the generation number and
+the total number of bits encoded but did not update them in the comment
+attached to their defines, nor in the KVM MMU doc.
+Let's do it here, too, since it is too trivial thing to warrant a separate
+commit.
+
+Fixes: cae7ed3c2cb0 ("KVM: x86: Refactor the MMIO SPTE generation handling")
+Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+Message-Id: <156700708db2a5296c5ed7a8b9ac71f1e9765c85.1607129096.git.maciej.szmigiero@oracle.com>
+Cc: stable@vger.kernel.org
+[Reorganize macros so that everything is computed from the bit ranges. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ Documentation/virt/kvm/mmu.txt | 2 +-
+ arch/x86/kvm/mmu.c | 29 ++++++++++++++++++++---------
+ 2 files changed, 21 insertions(+), 10 deletions(-)
+
+--- a/Documentation/virt/kvm/mmu.txt
++++ b/Documentation/virt/kvm/mmu.txt
+@@ -420,7 +420,7 @@ If the generation number of the spte doe
+ number, it will ignore the cached MMIO information and handle the page
+ fault through the slow path.
+
+-Since only 19 bits are used to store generation-number on mmio spte, all
++Since only 18 bits are used to store generation-number on mmio spte, all
+ pages are zapped when there is an overflow.
+
+ Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -407,11 +407,11 @@ static inline bool is_access_track_spte(
+ }
+
+ /*
+- * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
++ * Due to limited space in PTEs, the MMIO generation is a 18 bit subset of
+ * the memslots generation and is derived as follows:
+ *
+ * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
+- * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
++ * Bits 9-17 of the MMIO generation are propagated to spte bits 54-62
+ *
+ * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
+ * the MMIO generation number, as doing so would require stealing a bit from
+@@ -420,18 +420,29 @@ static inline bool is_access_track_spte(
+ * requires a full MMU zap). The flag is instead explicitly queried when
+ * checking for MMIO spte cache hits.
+ */
+-#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0)
+
+ #define MMIO_SPTE_GEN_LOW_START 3
+ #define MMIO_SPTE_GEN_LOW_END 11
+-#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
+- MMIO_SPTE_GEN_LOW_START)
+
+ #define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT
+ #define MMIO_SPTE_GEN_HIGH_END 62
++
++#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
++ MMIO_SPTE_GEN_LOW_START)
+ #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
+ MMIO_SPTE_GEN_HIGH_START)
+
++#define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1)
++#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)
++
++/* remember to adjust the comment above as well if you change these */
++static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9);
++
++#define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0)
++#define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS)
++
++#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0)
++
+ static u64 generation_mmio_spte_mask(u64 gen)
+ {
+ u64 mask;
+@@ -439,8 +450,8 @@ static u64 generation_mmio_spte_mask(u64
+ WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
+ BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
+
+- mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
+- mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
++ mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
++ mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
+ return mask;
+ }
+
+@@ -448,8 +459,8 @@ static u64 get_mmio_spte_generation(u64
+ {
+ u64 gen;
+
+- gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
+- gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
++ gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT;
++ gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT;
+ return gen;
+ }
+
--- /dev/null
+From 758c9373d84168dc7d039cf85a0e920046b17b41 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 3 Dec 2020 21:07:05 -0800
+Subject: membarrier: Explicitly sync remote cores when SYNC_CORE is requested
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 758c9373d84168dc7d039cf85a0e920046b17b41 upstream.
+
+membarrier() does not explicitly sync_core() remote CPUs; instead, it
+relies on the assumption that an IPI will result in a core sync. On x86,
+this may be true in practice, but it's not architecturally reliable. In
+particular, the SDM and APM do not appear to guarantee that interrupt
+delivery is serializing. While IRET does serialize, IPI return can
+schedule, thereby switching to another task in the same mm that was
+sleeping in a syscall. The new task could then SYSRET back to usermode
+without ever executing IRET.
+
+Make this more robust by explicitly calling sync_core_before_usermode()
+on remote cores. (This also helps people who search the kernel tree for
+instances of sync_core() and sync_core_before_usermode() -- one might be
+surprised that the core membarrier code doesn't currently show up in a
+such a search.)
+
+Fixes: 70216e18e519 ("membarrier: Provide core serializing command, *_SYNC_CORE")
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/776b448d5f7bd6b12690707f5ed67bcda7f1d427.1607058304.git.luto@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ kernel/sched/membarrier.c | 21 ++++++++++++++++++++-
+ 1 file changed, 20 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched/membarrier.c
++++ b/kernel/sched/membarrier.c
+@@ -30,6 +30,23 @@ static void ipi_mb(void *info)
+ smp_mb(); /* IPIs should be serializing but paranoid. */
+ }
+
++static void ipi_sync_core(void *info)
++{
++ /*
++ * The smp_mb() in membarrier after all the IPIs is supposed to
++ * ensure that memory on remote CPUs that occur before the IPI
++ * become visible to membarrier()'s caller -- see scenario B in
++ * the big comment at the top of this file.
++ *
++ * A sync_core() would provide this guarantee, but
++ * sync_core_before_usermode() might end up being deferred until
++ * after membarrier()'s smp_mb().
++ */
++ smp_mb(); /* IPIs should be serializing but paranoid. */
++
++ sync_core_before_usermode();
++}
++
+ static void ipi_sync_rq_state(void *info)
+ {
+ struct mm_struct *mm = (struct mm_struct *) info;
+@@ -134,6 +151,7 @@ static int membarrier_private_expedited(
+ int cpu;
+ cpumask_var_t tmpmask;
+ struct mm_struct *mm = current->mm;
++ smp_call_func_t ipi_func = ipi_mb;
+
+ if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
+@@ -141,6 +159,7 @@ static int membarrier_private_expedited(
+ if (!(atomic_read(&mm->membarrier_state) &
+ MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
+ return -EPERM;
++ ipi_func = ipi_sync_core;
+ } else {
+ if (!(atomic_read(&mm->membarrier_state) &
+ MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
+@@ -181,7 +200,7 @@ static int membarrier_private_expedited(
+ rcu_read_unlock();
+
+ preempt_disable();
+- smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
++ smp_call_function_many(tmpmask, ipi_func, NULL, 1);
+ preempt_enable();
+
+ free_cpumask_var(tmpmask);
--- /dev/null
+From kamal@canonical.com Sat Dec 19 13:38:59 2020
+From: Kamal Mostafa <kamal@canonical.com>
+Date: Wed, 16 Dec 2020 10:13:53 -0800
+Subject: Revert "selftests/ftrace: check for do_sys_openat2 in user-memory test"
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>
+Cc: Kamal Mostafa <kamal@canonical.com>, stable@vger.kernel.org
+Message-ID: <20201216181353.30321-1-kamal@canonical.com>
+
+From: Kamal Mostafa <kamal@canonical.com>
+
+This reverts commit 9110e2f2633dc9383a3a4711a0067094f6948783.
+
+This commit is not suitable for 5.4-stable because the openat2 system
+call does not exist in v5.4.
+
+Signed-off-by: Kamal Mostafa <kamal@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc | 4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
++++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
+@@ -11,16 +11,12 @@ grep -A10 "fetcharg:" README | grep -q '
+ :;: "user-memory access syntax and ustring working on user memory";:
+ echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \
+ > kprobe_events
+-echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \
+- >> kprobe_events
+
+ grep myevent kprobe_events | \
+ grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string'
+ echo 1 > events/kprobes/myevent/enable
+-echo 1 > events/kprobes/myevent2/enable
+ echo > /dev/null
+ echo 0 > events/kprobes/myevent/enable
+-echo 0 > events/kprobes/myevent2/enable
+
+ grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"'
+
usb-sisusbvga-make-console-support-depend-on-broken.patch
alsa-pcm-oss-fix-potential-out-of-bounds-shift.patch
serial-8250_omap-avoid-fifo-corruption-caused-by-mdr1-access.patch
+kvm-mmu-fix-spte-encoding-of-mmio-generation-upper-half.patch
+revert-selftests-ftrace-check-for-do_sys_openat2-in-user-memory-test.patch
+membarrier-explicitly-sync-remote-cores-when-sync_core-is-requested.patch
+x86-resctrl-remove-unused-struct-mbm_state-chunks_bw.patch
+x86-resctrl-fix-incorrect-local-bandwidth-when-mba_sc-is-enabled.patch
--- /dev/null
+From foo@baz Sat Dec 19 01:50:24 PM CET 2020
+From: Xiaochen Shen <xiaochen.shen@intel.com>
+Date: Fri, 4 Dec 2020 14:27:59 +0800
+Subject: x86/resctrl: Fix incorrect local bandwidth when mba_sc is enabled
+
+From: Xiaochen Shen <xiaochen.shen@intel.com>
+
+commit 06c5fe9b12dde1b62821f302f177c972bb1c81f9 upstream
+
+The MBA software controller (mba_sc) is a feedback loop which
+periodically reads MBM counters and tries to restrict the bandwidth
+below a user-specified value. It tags along the MBM counter overflow
+handler to do the updates with 1s interval in mbm_update() and
+update_mba_bw().
+
+The purpose of mbm_update() is to periodically read the MBM counters to
+make sure that the hardware counter doesn't wrap around more than once
+between user samplings. mbm_update() calls __mon_event_count() for local
+bandwidth updating when mba_sc is not enabled, but calls mbm_bw_count()
+instead when mba_sc is enabled. __mon_event_count() will not be called
+for local bandwidth updating in MBM counter overflow handler, but it is
+still called when reading MBM local bandwidth counter file
+'mbm_local_bytes', the call path is as below:
+
+ rdtgroup_mondata_show()
+ mon_event_read()
+ mon_event_count()
+ __mon_event_count()
+
+In __mon_event_count(), m->chunks is updated by delta chunks which is
+calculated from previous MSR value (m->prev_msr) and current MSR value.
+When mba_sc is enabled, m->chunks is also updated in mbm_update() by
+mistake by the delta chunks which is calculated from m->prev_bw_msr
+instead of m->prev_msr. But m->chunks is not used in update_mba_bw() in
+the mba_sc feedback loop.
+
+When reading MBM local bandwidth counter file, m->chunks was changed
+unexpectedly by mbm_bw_count(). As a result, the incorrect local
+bandwidth counter which calculated from incorrect m->chunks is shown to
+the user.
+
+Fix this by removing incorrect m->chunks updating in mbm_bw_count() in
+MBM counter overflow handler, and always calling __mon_event_count() in
+mbm_update() to make sure that the hardware local bandwidth counter
+doesn't wrap around.
+
+Test steps:
+ # Run workload with aggressive memory bandwidth (e.g., 10 GB/s)
+ git clone https://github.com/intel/intel-cmt-cat && cd intel-cmt-cat
+ && make
+ ./tools/membw/membw -c 0 -b 10000 --read
+
+ # Enable MBA software controller
+ mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
+
+ # Create control group c1
+ mkdir /sys/fs/resctrl/c1
+
+ # Set MB throttle to 6 GB/s
+ echo "MB:0=6000;1=6000" > /sys/fs/resctrl/c1/schemata
+
+ # Write PID of the workload to tasks file
+ echo `pidof membw` > /sys/fs/resctrl/c1/tasks
+
+ # Read local bytes counters twice with 1s interval, the calculated
+ # local bandwidth is not as expected (approaching to 6 GB/s):
+ local_1=`cat /sys/fs/resctrl/c1/mon_data/mon_L3_00/mbm_local_bytes`
+ sleep 1
+ local_2=`cat /sys/fs/resctrl/c1/mon_data/mon_L3_00/mbm_local_bytes`
+ echo "local b/w (bytes/s):" `expr $local_2 - $local_1`
+
+Before fix:
+ local b/w (bytes/s): 11076796416
+
+After fix:
+ local b/w (bytes/s): 5465014272
+
+Fixes: ba0f26d8529c (x86/intel_rdt/mba_sc: Prepare for feedback loop)
+Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/1607063279-19437-1-git-send-email-xiaochen.shen@intel.com
+[sudip: adjust context]
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/resctrl/monitor.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/cpu/resctrl/monitor.c
++++ b/arch/x86/kernel/cpu/resctrl/monitor.c
+@@ -280,7 +280,6 @@ static void mbm_bw_count(u32 rmid, struc
+ return;
+
+ chunks = mbm_overflow_count(m->prev_bw_msr, tval);
+- m->chunks += chunks;
+ cur_bw = (chunks * r->mon_scale) >> 20;
+
+ if (m->delta_comp)
+@@ -450,15 +449,14 @@ static void mbm_update(struct rdt_domain
+ }
+ if (is_mbm_local_enabled()) {
+ rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
++ __mon_event_count(rmid, &rr);
+
+ /*
+ * Call the MBA software controller only for the
+ * control groups and when user has enabled
+ * the software controller explicitly.
+ */
+- if (!is_mba_sc(NULL))
+- __mon_event_count(rmid, &rr);
+- else
++ if (is_mba_sc(NULL))
+ mbm_bw_count(rmid, &rr);
+ }
+ }
--- /dev/null
+From foo@baz Sat Dec 19 01:50:12 PM CET 2020
+From: James Morse <james.morse@arm.com>
+Date: Wed, 8 Jul 2020 16:39:20 +0000
+Subject: x86/resctrl: Remove unused struct mbm_state::chunks_bw
+
+From: James Morse <james.morse@arm.com>
+
+commit abe8f12b44250d02937665033a8b750c1bfeb26e upstream
+
+Nothing reads struct mbm_states's chunks_bw value, its a copy of
+chunks. Remove it.
+
+Signed-off-by: James Morse <james.morse@arm.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
+Link: https://lkml.kernel.org/r/20200708163929.2783-2-james.morse@arm.com
+[sudip: adjust context]
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/resctrl/internal.h | 2 --
+ arch/x86/kernel/cpu/resctrl/monitor.c | 3 +--
+ 2 files changed, 1 insertion(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/cpu/resctrl/internal.h
++++ b/arch/x86/kernel/cpu/resctrl/internal.h
+@@ -276,7 +276,6 @@ struct rftype {
+ * struct mbm_state - status for each MBM counter in each domain
+ * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes)
+ * @prev_msr Value of IA32_QM_CTR for this RMID last time we read it
+- * @chunks_bw Total local data moved. Used for bandwidth calculation
+ * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
+ * @prev_bw The most recent bandwidth in MBps
+ * @delta_bw Difference between the current and previous bandwidth
+@@ -285,7 +284,6 @@ struct rftype {
+ struct mbm_state {
+ u64 chunks;
+ u64 prev_msr;
+- u64 chunks_bw;
+ u64 prev_bw_msr;
+ u32 prev_bw;
+ u32 delta_bw;
+--- a/arch/x86/kernel/cpu/resctrl/monitor.c
++++ b/arch/x86/kernel/cpu/resctrl/monitor.c
+@@ -280,8 +280,7 @@ static void mbm_bw_count(u32 rmid, struc
+ return;
+
+ chunks = mbm_overflow_count(m->prev_bw_msr, tval);
+- m->chunks_bw += chunks;
+- m->chunks = m->chunks_bw;
++ m->chunks += chunks;
+ cur_bw = (chunks * r->mon_scale) >> 20;
+
+ if (m->delta_comp)