--- /dev/null
+From a79305e156db3d24fcd8eb649cdb3c3b2350e5c2 Mon Sep 17 00:00:00 2001
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+Date: Thu, 28 Oct 2021 22:38:26 +0800
+Subject: can: j1939: j1939_can_recv(): ignore messages with invalid source address
+
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+
+commit a79305e156db3d24fcd8eb649cdb3c3b2350e5c2 upstream.
+
+According to SAE-J1939-82 2015 (A.3.6 Row 2), a receiver should never
+send TP.CM_CTS to the global address, so we can add a check in
+j1939_can_recv() to drop messages with invalid source address.
+
+Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
+Link: https://lore.kernel.org/all/1635431907-15617-3-git-send-email-zhangchangzhong@huawei.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
+Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/can/j1939/main.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/net/can/j1939/main.c
++++ b/net/can/j1939/main.c
+@@ -75,6 +75,13 @@ static void j1939_can_recv(struct sk_buf
+ skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX;
+ /* set default message type */
+ skcb->addr.type = J1939_TP;
++
++ if (!j1939_address_is_valid(skcb->addr.sa)) {
++ netdev_err_once(priv->ndev, "%s: sa is broadcast address, ignoring!\n",
++ __func__);
++ goto done;
++ }
++
+ if (j1939_pgn_is_pdu1(skcb->addr.pgn)) {
+ /* Type 1: with destination address */
+ skcb->addr.da = skcb->addr.pgn;
--- /dev/null
+From 164051a6ab5445bd97f719f50b16db8b32174269 Mon Sep 17 00:00:00 2001
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+Date: Thu, 28 Oct 2021 22:38:27 +0800
+Subject: can: j1939: j1939_tp_cmd_recv(): check the dst address of TP.CM_BAM
+
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+
+commit 164051a6ab5445bd97f719f50b16db8b32174269 upstream.
+
+The TP.CM_BAM message must be sent to the global address [1], so add a
+check to drop TP.CM_BAM sent to a non-global address.
+
+Without this patch, the receiver will treat the following packets as
+normal RTS/CTS transport:
+18EC0102#20090002FF002301
+18EB0102#0100000000000000
+18EB0102#020000FFFFFFFFFF
+
+[1] SAE-J1939-82 2015 A.3.3 Row 1.
+
+Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
+Link: https://lore.kernel.org/all/1635431907-15617-4-git-send-email-zhangchangzhong@huawei.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
+Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/can/j1939/transport.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/net/can/j1939/transport.c
++++ b/net/can/j1939/transport.c
+@@ -2023,6 +2023,11 @@ static void j1939_tp_cmd_recv(struct j19
+ extd = J1939_ETP;
+ fallthrough;
+ case J1939_TP_CMD_BAM:
++ if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) {
++ netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n",
++ __func__, skcb->addr.sa);
++ return;
++ }
+ fallthrough;
+ case J1939_TP_CMD_RTS:
+ if (skcb->addr.type != extd)
--- /dev/null
+From c0f49d98006f2db3333b917caac65bce2af9865c Mon Sep 17 00:00:00 2001
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+Date: Thu, 28 Oct 2021 22:38:25 +0800
+Subject: can: j1939: j1939_tp_cmd_recv(): ignore abort message in the BAM transport
+
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+
+commit c0f49d98006f2db3333b917caac65bce2af9865c upstream.
+
+This patch prevents BAM transport from being closed by receiving abort
+message, as specified in SAE-J1939-82 2015 (A.3.3 Row 4).
+
+Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
+Link: https://lore.kernel.org/all/1635431907-15617-2-git-send-email-zhangchangzhong@huawei.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
+Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/can/j1939/transport.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/can/j1939/transport.c
++++ b/net/can/j1939/transport.c
+@@ -2085,6 +2085,12 @@ static void j1939_tp_cmd_recv(struct j19
+ break;
+
+ case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
++ if (j1939_cb_is_broadcast(skcb)) {
++ netdev_err_once(priv->ndev, "%s: abort to broadcast (%02x), ignoring!\n",
++ __func__, skcb->addr.sa);
++ return;
++ }
++
+ if (j1939_tp_im_transmitter(skcb))
+ j1939_xtp_rx_abort(priv, skb, true);
+
--- /dev/null
+From 691204bd66b34ba982e19988e6eba9f6321dfe6c Mon Sep 17 00:00:00 2001
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+Date: Fri, 15 Oct 2021 19:46:59 +0200
+Subject: can: mcp251xfd: mcp251xfd_irq(): add missing can_rx_offload_threaded_irq_finish() in case of bus off
+
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+
+commit 691204bd66b34ba982e19988e6eba9f6321dfe6c upstream.
+
+The function can_rx_offload_threaded_irq_finish() is needed to trigger
+the NAPI thread to deliver read CAN frames to the networking stack.
+
+This patch adds the missing call to can_rx_offload_threaded_irq_finish()
+in case of a bus off, before leaving the interrupt handler to avoid
+packet starvation.
+
+Link: https://lore.kernel.org/all/20211106201526.44292-1-mkl@pengutronix.de
+Fixes: 30bfec4fec59 ("can: rx-offload: can_rx_offload_threaded_irq_finish(): add new function to be called from threaded interrupt")
+Cc: stable@vger.kernel.org
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+@@ -2290,8 +2290,10 @@ static irqreturn_t mcp251xfd_irq(int irq
+ * check will fail, too. So leave IRQ handler
+ * directly.
+ */
+- if (priv->can.state == CAN_STATE_BUS_OFF)
++ if (priv->can.state == CAN_STATE_BUS_OFF) {
++ can_rx_offload_threaded_irq_finish(&priv->offload);
+ return IRQ_HANDLED;
++ }
+ }
+
+ handled = IRQ_HANDLED;
--- /dev/null
+From 3f1c7aa28498e52a5e6aa2f1b89bf35c63352cfd Mon Sep 17 00:00:00 2001
+From: Stephane Grosjean <s.grosjean@peak-system.com>
+Date: Thu, 21 Oct 2021 10:15:04 +0200
+Subject: can: peak_usb: always ask for BERR reporting for PCAN-USB devices
+
+From: Stephane Grosjean <s.grosjean@peak-system.com>
+
+commit 3f1c7aa28498e52a5e6aa2f1b89bf35c63352cfd upstream.
+
+Since for the PCAN-USB, the management of the transition to the
+ERROR_WARNING or ERROR_PASSIVE state is done according to the error
+counters, these must be requested unconditionally.
+
+Link: https://lore.kernel.org/all/20211021081505.18223-2-s.grosjean@peak-system.com
+Fixes: c11dcee75830 ("can: peak_usb: pcan_usb_decode_error(): upgrade handling of bus state changes")
+Cc: stable@vger.kernel.org
+Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/can/usb/peak_usb/pcan_usb.c | 17 ++++++++---------
+ 1 file changed, 8 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
++++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
+@@ -841,14 +841,14 @@ static int pcan_usb_start(struct peak_us
+ pdev->bec.rxerr = 0;
+ pdev->bec.txerr = 0;
+
+- /* be notified on error counter changes (if requested by user) */
+- if (dev->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) {
+- err = pcan_usb_set_err_frame(dev, PCAN_USB_BERR_MASK);
+- if (err)
+- netdev_warn(dev->netdev,
+- "Asking for BERR reporting error %u\n",
+- err);
+- }
++ /* always ask the device for BERR reporting, to be able to switch from
++ * WARNING to PASSIVE state
++ */
++ err = pcan_usb_set_err_frame(dev, PCAN_USB_BERR_MASK);
++ if (err)
++ netdev_warn(dev->netdev,
++ "Asking for BERR reporting error %u\n",
++ err);
+
+ /* if revision greater than 3, can put silent mode on/off */
+ if (dev->device_rev > 3) {
+@@ -986,7 +986,6 @@ const struct peak_usb_adapter pcan_usb =
+ .device_id = PCAN_USB_PRODUCT_ID,
+ .ctrl_count = 1,
+ .ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY |
+- CAN_CTRLMODE_BERR_REPORTING |
+ CAN_CTRLMODE_CC_LEN8_DLC,
+ .clock = {
+ .freq = PCAN_USB_CRYSTAL_HZ / 2,
--- /dev/null
+From 69b31fd7a61784692db6433c05d46915b1b1a680 Mon Sep 17 00:00:00 2001
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+Date: Thu, 7 Oct 2021 11:30:06 +0200
+Subject: iio: adc: tsc2046: fix scan interval warning
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+commit 69b31fd7a61784692db6433c05d46915b1b1a680 upstream.
+
+Sync if statement with the actual warning.
+
+Fixes: 9504db5765e8 ("iio: adc: tsc2046: fix a warning message in tsc2046_adc_update_scan_mode()")
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Link: https://lore.kernel.org/r/20211007093007.1466-2-o.rempel@pengutronix.de
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/iio/adc/ti-tsc2046.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/iio/adc/ti-tsc2046.c
++++ b/drivers/iio/adc/ti-tsc2046.c
+@@ -398,7 +398,7 @@ static int tsc2046_adc_update_scan_mode(
+ priv->xfer.len = size;
+ priv->time_per_scan_us = size * 8 * priv->time_per_bit_ns / NSEC_PER_USEC;
+
+- if (priv->scan_interval_us > priv->time_per_scan_us)
++ if (priv->scan_interval_us < priv->time_per_scan_us)
+ dev_warn(&priv->spi->dev, "The scan interval (%d) is less then calculated scan time (%d)\n",
+ priv->scan_interval_us, priv->time_per_scan_us);
+
--- /dev/null
+From bad119b9a00019054f0c9e2045f312ed63ace4f4 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 8 Nov 2021 15:10:03 +0000
+Subject: io_uring: honour zeroes as io-wq worker limits
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit bad119b9a00019054f0c9e2045f312ed63ace4f4 upstream.
+
+When we pass in zero as an io-wq worker number limit it shouldn't
+actually change the limits but return the old value, follow that
+behaviour with deferred limits setup as well.
+
+Cc: stable@kernel.org # 5.15
+Reported-by: Beld Zhang <beldzhang@gmail.com>
+Fixes: e139a1ec92f8d ("io_uring: apply max_workers limit to all future users")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/1b222a92f7a78a24b042763805e891a4cdd4b544.1636384034.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -10684,7 +10684,9 @@ static int io_register_iowq_max_workers(
+
+ BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
+
+- memcpy(ctx->iowq_limits, new_count, sizeof(new_count));
++ for (i = 0; i < ARRAY_SIZE(new_count); i++)
++ if (new_count[i])
++ ctx->iowq_limits[i] = new_count[i];
+ ctx->iowq_limits_set = true;
+
+ ret = -EINVAL;
--- /dev/null
+From 8bb084119f1acc2ec55ea085a97231e3ddb30782 Mon Sep 17 00:00:00 2001
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Wed, 3 Nov 2021 11:05:45 +0000
+Subject: KVM: arm64: Extract ESR_ELx.EC only
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+commit 8bb084119f1acc2ec55ea085a97231e3ddb30782 upstream.
+
+Since ARMv8.0 the upper 32 bits of ESR_ELx have been RES0, and recently
+some of the upper bits gained a meaning and can be non-zero. For
+example, when FEAT_LS64 is implemented, ESR_ELx[36:32] contain ISS2,
+which for an ST64BV or ST64BV0 can be non-zero. This can be seen in ARM
+DDI 0487G.b, page D13-3145, section D13.2.37.
+
+Generally, we must not rely on RES0 bit remaining zero in future, and
+when extracting ESR_ELx.EC we must mask out all other bits.
+
+All C code uses the ESR_ELx_EC() macro, which masks out the irrelevant
+bits, and therefore no alterations are required to C code to avoid
+consuming irrelevant bits.
+
+In a couple of places the KVM assembly extracts ESR_ELx.EC using LSR on
+an X register, and so could in theory consume previously RES0 bits. In
+both cases this is for comparison with EC values ESR_ELx_EC_HVC32 and
+ESR_ELx_EC_HVC64, for which the upper bits of ESR_ELx must currently be
+zero, but this could change in future.
+
+This patch adjusts the KVM vectors to use UBFX rather than LSR to
+extract ESR_ELx.EC, ensuring these are robust to future additions to
+ESR_ELx.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Cc: Alexandru Elisei <alexandru.elisei@arm.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: James Morse <james.morse@arm.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Acked-by: Will Deacon <will@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20211103110545.4613-1-mark.rutland@arm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/esr.h | 1 +
+ arch/arm64/kvm/hyp/hyp-entry.S | 2 +-
+ arch/arm64/kvm/hyp/nvhe/host.S | 2 +-
+ 3 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/include/asm/esr.h
++++ b/arch/arm64/include/asm/esr.h
+@@ -68,6 +68,7 @@
+ #define ESR_ELx_EC_MAX (0x3F)
+
+ #define ESR_ELx_EC_SHIFT (26)
++#define ESR_ELx_EC_WIDTH (6)
+ #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
+ #define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
+
+--- a/arch/arm64/kvm/hyp/hyp-entry.S
++++ b/arch/arm64/kvm/hyp/hyp-entry.S
+@@ -44,7 +44,7 @@
+ el1_sync: // Guest trapped into EL2
+
+ mrs x0, esr_el2
+- lsr x0, x0, #ESR_ELx_EC_SHIFT
++ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
+ cmp x0, #ESR_ELx_EC_HVC64
+ ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
+ b.ne el1_trap
+--- a/arch/arm64/kvm/hyp/nvhe/host.S
++++ b/arch/arm64/kvm/hyp/nvhe/host.S
+@@ -115,7 +115,7 @@ SYM_FUNC_END(__hyp_do_panic)
+ .L__vect_start\@:
+ stp x0, x1, [sp, #-16]!
+ mrs x0, esr_el2
+- lsr x0, x0, #ESR_ELx_EC_SHIFT
++ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
+ cmp x0, #ESR_ELx_EC_HVC64
+ b.ne __host_exit
+
--- /dev/null
+From 67f4b9969c305be515e47f809ecacfd86bd20a9c Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 9 Nov 2021 01:30:45 +0000
+Subject: KVM: nVMX: Handle dynamic MSR intercept toggling
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 67f4b9969c305be515e47f809ecacfd86bd20a9c upstream.
+
+Always check vmcs01's MSR bitmap when merging L0 and L1 bitmaps for L2,
+and always update the relevant bits in vmcs02. This fixes two distinct,
+but intertwined bugs related to dynamic MSR bitmap modifications.
+
+The first issue is that KVM fails to enable MSR interception in vmcs02
+for the FS/GS base MSRs if L1 first runs L2 with interception disabled,
+and later enables interception.
+
+The second issue is that KVM fails to honor userspace MSR filtering when
+preparing vmcs02.
+
+Fix both issues simultaneous as fixing only one of the issues (doesn't
+matter which) would create a mess that no one should have to bisect.
+Fixing only the first bug would exacerbate the MSR filtering issue as
+userspace would see inconsistent behavior depending on the whims of L1.
+Fixing only the second bug (MSR filtering) effectively requires fixing
+the first, as the nVMX code only knows how to transition vmcs02's
+bitmap from 1->0.
+
+Move the various accessor/mutators that are currently buried in vmx.c
+into vmx.h so that they can be shared by the nested code.
+
+Fixes: 1a155254ff93 ("KVM: x86: Introduce MSR filtering")
+Fixes: d69129b4e46a ("KVM: nVMX: Disable intercept for FS/GS base MSRs in vmcs02 when possible")
+Cc: stable@vger.kernel.org
+Cc: Alexander Graf <graf@amazon.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211109013047.2041518-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c | 103 ++++++++++++++++++++--------------------------
+ arch/x86/kvm/vmx/vmx.c | 55 ------------------------
+ arch/x86/kvm/vmx/vmx.h | 63 ++++++++++++++++++++++++++++
+ 3 files changed, 111 insertions(+), 110 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -524,29 +524,6 @@ static int nested_vmx_check_tpr_shadow_c
+ }
+
+ /*
+- * Check if MSR is intercepted for L01 MSR bitmap.
+- */
+-static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+-{
+- unsigned long *msr_bitmap;
+- int f = sizeof(unsigned long);
+-
+- if (!cpu_has_vmx_msr_bitmap())
+- return true;
+-
+- msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
+-
+- if (msr <= 0x1fff) {
+- return !!test_bit(msr, msr_bitmap + 0x800 / f);
+- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+- msr &= 0x1fff;
+- return !!test_bit(msr, msr_bitmap + 0xc00 / f);
+- }
+-
+- return true;
+-}
+-
+-/*
+ * If a msr is allowed by L0, we should check whether it is allowed by L1.
+ * The corresponding bit will be cleared unless both of L0 and L1 allow it.
+ */
+@@ -599,6 +576,34 @@ static inline void enable_x2apic_msr_int
+ }
+ }
+
++#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \
++static inline \
++void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \
++ unsigned long *msr_bitmap_l1, \
++ unsigned long *msr_bitmap_l0, u32 msr) \
++{ \
++ if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \
++ vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \
++ vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \
++ else \
++ vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \
++}
++BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
++BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
++
++static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
++ unsigned long *msr_bitmap_l1,
++ unsigned long *msr_bitmap_l0,
++ u32 msr, int types)
++{
++ if (types & MSR_TYPE_R)
++ nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
++ msr_bitmap_l0, msr);
++ if (types & MSR_TYPE_W)
++ nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
++ msr_bitmap_l0, msr);
++}
++
+ /*
+ * Merge L0's and L1's MSR bitmap, return false to indicate that
+ * we do not use the hardware.
+@@ -606,10 +611,11 @@ static inline void enable_x2apic_msr_int
+ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+ {
++ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int msr;
+ unsigned long *msr_bitmap_l1;
+- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
+- struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
++ unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
++ struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
+
+ /* Nothing to do if the MSR bitmap is not in use. */
+ if (!cpu_has_vmx_msr_bitmap() ||
+@@ -660,44 +666,27 @@ static inline bool nested_vmx_prepare_ms
+ }
+ }
+
+- /* KVM unconditionally exposes the FS/GS base MSRs to L1. */
++ /*
++ * Always check vmcs01's bitmap to honor userspace MSR filters and any
++ * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
++ */
+ #ifdef CONFIG_X86_64
+- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+- MSR_FS_BASE, MSR_TYPE_RW);
++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
++ MSR_FS_BASE, MSR_TYPE_RW);
+
+- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+- MSR_GS_BASE, MSR_TYPE_RW);
++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
++ MSR_GS_BASE, MSR_TYPE_RW);
+
+- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+- MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
++ MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+ #endif
++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
++ MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
+
+- /*
+- * Checking the L0->L1 bitmap is trying to verify two things:
+- *
+- * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
+- * ensures that we do not accidentally generate an L02 MSR bitmap
+- * from the L12 MSR bitmap that is too permissive.
+- * 2. That L1 or L2s have actually used the MSR. This avoids
+- * unnecessarily merging of the bitmap if the MSR is unused. This
+- * works properly because we only update the L01 MSR bitmap lazily.
+- * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
+- * updated to reflect this when L1 (or its L2s) actually write to
+- * the MSR.
+- */
+- if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
+- nested_vmx_disable_intercept_for_msr(
+- msr_bitmap_l1, msr_bitmap_l0,
+- MSR_IA32_SPEC_CTRL,
+- MSR_TYPE_R | MSR_TYPE_W);
+-
+- if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
+- nested_vmx_disable_intercept_for_msr(
+- msr_bitmap_l1, msr_bitmap_l0,
+- MSR_IA32_PRED_CMD,
+- MSR_TYPE_W);
++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
++ MSR_IA32_PRED_CMD, MSR_TYPE_W);
+
+- kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
++ kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
+
+ return true;
+ }
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -771,22 +771,11 @@ void vmx_update_exception_bitmap(struct
+ */
+ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
+ {
+- unsigned long *msr_bitmap;
+- int f = sizeof(unsigned long);
+-
+ if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
+ return true;
+
+- msr_bitmap = vmx->loaded_vmcs->msr_bitmap;
+-
+- if (msr <= 0x1fff) {
+- return !!test_bit(msr, msr_bitmap + 0x800 / f);
+- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+- msr &= 0x1fff;
+- return !!test_bit(msr, msr_bitmap + 0xc00 / f);
+- }
+-
+- return true;
++ return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
++ MSR_IA32_SPEC_CTRL);
+ }
+
+ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+@@ -3695,46 +3684,6 @@ void free_vpid(int vpid)
+ spin_unlock(&vmx_vpid_lock);
+ }
+
+-static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
+-{
+- int f = sizeof(unsigned long);
+-
+- if (msr <= 0x1fff)
+- __clear_bit(msr, msr_bitmap + 0x000 / f);
+- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+- __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
+-}
+-
+-static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
+-{
+- int f = sizeof(unsigned long);
+-
+- if (msr <= 0x1fff)
+- __clear_bit(msr, msr_bitmap + 0x800 / f);
+- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+- __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
+-}
+-
+-static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
+-{
+- int f = sizeof(unsigned long);
+-
+- if (msr <= 0x1fff)
+- __set_bit(msr, msr_bitmap + 0x000 / f);
+- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+- __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
+-}
+-
+-static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
+-{
+- int f = sizeof(unsigned long);
+-
+- if (msr <= 0x1fff)
+- __set_bit(msr, msr_bitmap + 0x800 / f);
+- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+- __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
+-}
+-
+ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
+ {
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -400,6 +400,69 @@ static inline void vmx_set_intercept_for
+
+ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
+
++static inline bool vmx_test_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ return test_bit(msr, msr_bitmap + 0x000 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ return test_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
++ return true;
++}
++
++static inline bool vmx_test_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ return test_bit(msr, msr_bitmap + 0x800 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ return test_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
++ return true;
++}
++
++static inline void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ __clear_bit(msr, msr_bitmap + 0x000 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
++}
++
++static inline void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ __clear_bit(msr, msr_bitmap + 0x800 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
++}
++
++static inline void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ __set_bit(msr, msr_bitmap + 0x000 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
++}
++
++static inline void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
++{
++ int f = sizeof(unsigned long);
++
++ if (msr <= 0x1fff)
++ __set_bit(msr, msr_bitmap + 0x800 / f);
++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
++ __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
++}
++
++
+ static inline u8 vmx_get_rvi(void)
+ {
+ return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
--- /dev/null
+From 7dfbc624eb5726367900c8d86deff50836240361 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 9 Nov 2021 01:30:44 +0000
+Subject: KVM: nVMX: Query current VMCS when determining if MSR bitmaps are in use
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 7dfbc624eb5726367900c8d86deff50836240361 upstream.
+
+Check the current VMCS controls to determine if an MSR write will be
+intercepted due to MSR bitmaps being disabled. In the nested VMX case,
+KVM will disable MSR bitmaps in vmcs02 if they're disabled in vmcs12 or
+if KVM can't map L1's bitmaps for whatever reason.
+
+Note, the bad behavior is relatively benign in the current code base as
+KVM sets all bits in vmcs02's MSR bitmap by default, clears bits if and
+only if L0 KVM also disables interception of an MSR, and only uses the
+buggy helper for MSR_IA32_SPEC_CTRL. Because KVM explicitly tests WRMSR
+before disabling interception of MSR_IA32_SPEC_CTRL, the flawed check
+will only result in KVM reading MSR_IA32_SPEC_CTRL from hardware when it
+isn't strictly necessary.
+
+Tag the fix for stable in case a future fix wants to use
+msr_write_intercepted(), in which case a buggy implementation in older
+kernels could prove subtly problematic.
+
+Fixes: d28b387fb74d ("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211109013047.2041518-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -769,15 +769,15 @@ void vmx_update_exception_bitmap(struct
+ /*
+ * Check if MSR is intercepted for currently loaded MSR bitmap.
+ */
+-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
++static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
+ {
+ unsigned long *msr_bitmap;
+ int f = sizeof(unsigned long);
+
+- if (!cpu_has_vmx_msr_bitmap())
++ if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
+ return true;
+
+- msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
++ msr_bitmap = vmx->loaded_vmcs->msr_bitmap;
+
+ if (msr <= 0x1fff) {
+ return !!test_bit(msr, msr_bitmap + 0x800 / f);
+@@ -6720,7 +6720,7 @@ static fastpath_t vmx_vcpu_run(struct kv
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
+- if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
++ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
+ vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+
+ x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
--- /dev/null
+From 8b44b174f6aca815fc84c2038e4523ef8e32fabb Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 5 Nov 2021 09:51:00 +0000
+Subject: KVM: x86: Add helper to consolidate core logic of SET_CPUID{2} flows
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 8b44b174f6aca815fc84c2038e4523ef8e32fabb upstream.
+
+Move the core logic of SET_CPUID and SET_CPUID2 to a common helper, the
+only difference between the two ioctls() is the format of the userspace
+struct. A future fix will add yet more code to the core logic.
+
+No functional change intended.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20211105095101.5384-2-pdurrant@amazon.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c | 47 ++++++++++++++++++++++++-----------------------
+ 1 file changed, 24 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -232,6 +232,25 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struc
+ return rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
+ }
+
++static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
++ int nent)
++{
++ int r;
++
++ r = kvm_check_cpuid(e2, nent);
++ if (r)
++ return r;
++
++ kvfree(vcpu->arch.cpuid_entries);
++ vcpu->arch.cpuid_entries = e2;
++ vcpu->arch.cpuid_nent = nent;
++
++ kvm_update_cpuid_runtime(vcpu);
++ kvm_vcpu_after_set_cpuid(vcpu);
++
++ return 0;
++}
++
+ /* when an old userspace process fills a new kernel module */
+ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid *cpuid,
+@@ -268,18 +287,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_
+ e2[i].padding[2] = 0;
+ }
+
+- r = kvm_check_cpuid(e2, cpuid->nent);
+- if (r) {
++ r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
++ if (r)
+ kvfree(e2);
+- goto out_free_cpuid;
+- }
+-
+- kvfree(vcpu->arch.cpuid_entries);
+- vcpu->arch.cpuid_entries = e2;
+- vcpu->arch.cpuid_nent = cpuid->nent;
+-
+- kvm_update_cpuid_runtime(vcpu);
+- kvm_vcpu_after_set_cpuid(vcpu);
+
+ out_free_cpuid:
+ kvfree(e);
+@@ -303,20 +313,11 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm
+ return PTR_ERR(e2);
+ }
+
+- r = kvm_check_cpuid(e2, cpuid->nent);
+- if (r) {
++ r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
++ if (r)
+ kvfree(e2);
+- return r;
+- }
+
+- kvfree(vcpu->arch.cpuid_entries);
+- vcpu->arch.cpuid_entries = e2;
+- vcpu->arch.cpuid_nent = cpuid->nent;
+-
+- kvm_update_cpuid_runtime(vcpu);
+- kvm_vcpu_after_set_cpuid(vcpu);
+-
+- return 0;
++ return r;
+ }
+
+ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
--- /dev/null
+From 7e2175ebd695f17860c5bd4ad7616cce12ed4591 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw2@infradead.org>
+Date: Tue, 2 Nov 2021 17:36:39 +0000
+Subject: KVM: x86: Fix recording of guest steal time / preempted status
+
+From: David Woodhouse <dwmw2@infradead.org>
+
+commit 7e2175ebd695f17860c5bd4ad7616cce12ed4591 upstream.
+
+In commit b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is
+not missed") we switched to using a gfn_to_pfn_cache for accessing the
+guest steal time structure in order to allow for an atomic xchg of the
+preempted field. This has a couple of problems.
+
+Firstly, kvm_map_gfn() doesn't work at all for IOMEM pages when the
+atomic flag is set, which it is in kvm_steal_time_set_preempted(). So a
+guest vCPU using an IOMEM page for its steal time would never have its
+preempted field set.
+
+Secondly, the gfn_to_pfn_cache is not invalidated in all cases where it
+should have been. There are two stages to the GFN->PFN conversion;
+first the GFN is converted to a userspace HVA, and then that HVA is
+looked up in the process page tables to find the underlying host PFN.
+Correct invalidation of the latter would require being hooked up to the
+MMU notifiers, but that doesn't happen---so it just keeps mapping and
+unmapping the *wrong* PFN after the userspace page tables change.
+
+In the !IOMEM case at least the stale page *is* pinned all the time it's
+cached, so it won't be freed and reused by anyone else while still
+receiving the steal time updates. The map/unmap dance only takes care
+of the KVM administrivia such as marking the page dirty.
+
+Until the gfn_to_pfn cache handles the remapping automatically by
+integrating with the MMU notifiers, we might as well not get a
+kernel mapping of it, and use the perfectly serviceable userspace HVA
+that we already have. We just need to implement the atomic xchg on
+the userspace address with appropriate exception handling, which is
+fairly trivial.
+
+Cc: stable@vger.kernel.org
+Fixes: b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed")
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Message-Id: <3645b9b889dac6438394194bb5586a46b68d581f.camel@infradead.org>
+[I didn't entirely agree with David's assessment of the
+ usefulness of the gfn_to_pfn cache, and integrated the outcome
+ of the discussion in the above commit message. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h | 2
+ arch/x86/kvm/x86.c | 105 ++++++++++++++++++++++++++++------------
+ 2 files changed, 76 insertions(+), 31 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -751,7 +751,7 @@ struct kvm_vcpu_arch {
+ u8 preempted;
+ u64 msr_val;
+ u64 last_steal;
+- struct gfn_to_pfn_cache cache;
++ struct gfn_to_hva_cache cache;
+ } st;
+
+ u64 l1_tsc_offset;
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3195,8 +3195,11 @@ static void kvm_vcpu_flush_tlb_guest(str
+
+ static void record_steal_time(struct kvm_vcpu *vcpu)
+ {
+- struct kvm_host_map map;
+- struct kvm_steal_time *st;
++ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
++ struct kvm_steal_time __user *st;
++ struct kvm_memslots *slots;
++ u64 steal;
++ u32 version;
+
+ if (kvm_xen_msr_enabled(vcpu->kvm)) {
+ kvm_xen_runstate_set_running(vcpu);
+@@ -3206,47 +3209,83 @@ static void record_steal_time(struct kvm
+ if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ return;
+
+- /* -EAGAIN is returned in atomic context so we can just return. */
+- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
+- &map, &vcpu->arch.st.cache, false))
++ if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
+ return;
+
+- st = map.hva +
+- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
++ slots = kvm_memslots(vcpu->kvm);
++
++ if (unlikely(slots->generation != ghc->generation ||
++ kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
++ gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
++
++ /* We rely on the fact that it fits in a single page. */
++ BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
++
++ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
++ kvm_is_error_hva(ghc->hva) || !ghc->memslot)
++ return;
++ }
++
++ st = (struct kvm_steal_time __user *)ghc->hva;
++ if (!user_access_begin(st, sizeof(*st)))
++ return;
+
+ /*
+ * Doing a TLB flush here, on the guest's behalf, can avoid
+ * expensive IPIs.
+ */
+ if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
+- u8 st_preempted = xchg(&st->preempted, 0);
++ u8 st_preempted = 0;
++ int err = -EFAULT;
++
++ asm volatile("1: xchgb %0, %2\n"
++ "xor %1, %1\n"
++ "2:\n"
++ _ASM_EXTABLE_UA(1b, 2b)
++ : "+r" (st_preempted),
++ "+&r" (err)
++ : "m" (st->preempted));
++ if (err)
++ goto out;
++
++ user_access_end();
++
++ vcpu->arch.st.preempted = 0;
+
+ trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
+ st_preempted & KVM_VCPU_FLUSH_TLB);
+ if (st_preempted & KVM_VCPU_FLUSH_TLB)
+ kvm_vcpu_flush_tlb_guest(vcpu);
++
++ if (!user_access_begin(st, sizeof(*st)))
++ goto dirty;
+ } else {
+- st->preempted = 0;
++ unsafe_put_user(0, &st->preempted, out);
++ vcpu->arch.st.preempted = 0;
+ }
+
+- vcpu->arch.st.preempted = 0;
+-
+- if (st->version & 1)
+- st->version += 1; /* first time write, random junk */
++ unsafe_get_user(version, &st->version, out);
++ if (version & 1)
++ version += 1; /* first time write, random junk */
+
+- st->version += 1;
++ version += 1;
++ unsafe_put_user(version, &st->version, out);
+
+ smp_wmb();
+
+- st->steal += current->sched_info.run_delay -
++ unsafe_get_user(steal, &st->steal, out);
++ steal += current->sched_info.run_delay -
+ vcpu->arch.st.last_steal;
+ vcpu->arch.st.last_steal = current->sched_info.run_delay;
++ unsafe_put_user(steal, &st->steal, out);
+
+- smp_wmb();
+-
+- st->version += 1;
++ version += 1;
++ unsafe_put_user(version, &st->version, out);
+
+- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
++ out:
++ user_access_end();
++ dirty:
++ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
+ }
+
+ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+@@ -4285,8 +4324,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu
+
+ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
+ {
+- struct kvm_host_map map;
+- struct kvm_steal_time *st;
++ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
++ struct kvm_steal_time __user *st;
++ struct kvm_memslots *slots;
++ static const u8 preempted = KVM_VCPU_PREEMPTED;
+
+ if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ return;
+@@ -4294,16 +4335,23 @@ static void kvm_steal_time_set_preempted
+ if (vcpu->arch.st.preempted)
+ return;
+
+- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
+- &vcpu->arch.st.cache, true))
++ /* This happens on process exit */
++ if (unlikely(current->mm != vcpu->kvm->mm))
+ return;
+
+- st = map.hva +
+- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
++ slots = kvm_memslots(vcpu->kvm);
++
++ if (unlikely(slots->generation != ghc->generation ||
++ kvm_is_error_hva(ghc->hva) || !ghc->memslot))
++ return;
+
+- st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
++ st = (struct kvm_steal_time __user *)ghc->hva;
++ BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
+
+- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
++ if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
++ vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
++
++ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
+ }
+
+ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+@@ -10817,11 +10865,8 @@ void kvm_arch_vcpu_postcreate(struct kvm
+
+ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+ {
+- struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
+ int idx;
+
+- kvm_release_pfn(cache->pfn, cache->dirty, cache);
+-
+ kvmclock_reset(vcpu);
+
+ static_call(kvm_x86_vcpu_free)(vcpu);
--- /dev/null
+From 3c2172c1c47b4079c29f0e6637d764a99355ebcd Mon Sep 17 00:00:00 2001
+From: Xiaoming Ni <nixiaoming@huawei.com>
+Date: Wed, 29 Sep 2021 11:36:45 +0800
+Subject: powerpc/85xx: Fix oops when mpc85xx_smp_guts_ids node cannot be found
+
+From: Xiaoming Ni <nixiaoming@huawei.com>
+
+commit 3c2172c1c47b4079c29f0e6637d764a99355ebcd upstream.
+
+When the field described in mpc85xx_smp_guts_ids[] is not configured in
+dtb, the mpc85xx_setup_pmc() does not assign a value to the "guts"
+variable. As a result, the oops is triggered when
+mpc85xx_freeze_time_base() is executed.
+
+Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations")
+Cc: stable@vger.kernel.org # v4.6+
+Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210929033646.39630-2-nixiaoming@huawei.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
++++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
+@@ -94,9 +94,8 @@ int __init mpc85xx_setup_pmc(void)
+ pr_err("Could not map guts node address\n");
+ return -ENOMEM;
+ }
++ qoriq_pm_ops = &mpc85xx_pm_ops;
+ }
+
+- qoriq_pm_ops = &mpc85xx_pm_ops;
+-
+ return 0;
+ }
--- /dev/null
+From 51d157946666382e779f94c39891e8e9a020da78 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Mon, 8 Nov 2021 10:58:10 -0500
+Subject: ring-buffer: Protect ring_buffer_reset() from reentrancy
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 51d157946666382e779f94c39891e8e9a020da78 upstream.
+
+The resetting of the entire ring buffer use to simply go through and reset
+each individual CPU buffer that had its own protection and synchronization.
+But this was very slow, due to performing a synchronization for each CPU.
+The code was reshuffled to do one disabling of all CPU buffers, followed
+by a single RCU synchronization, and then the resetting of each of the CPU
+buffers. But unfortunately, the mutex that prevented multiple occurrences
+of resetting the buffer was not moved to the upper function, and there is
+nothing to protect from it.
+
+Take the ring buffer mutex around the global reset.
+
+Cc: stable@vger.kernel.org
+Fixes: b23d7a5f4a07a ("ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU")
+Reported-by: "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/ring_buffer.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -5233,6 +5233,9 @@ void ring_buffer_reset(struct trace_buff
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int cpu;
+
++ /* prevent another thread from changing buffer sizes */
++ mutex_lock(&buffer->mutex);
++
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+
+@@ -5251,6 +5254,8 @@ void ring_buffer_reset(struct trace_buff
+ atomic_dec(&cpu_buffer->record_disabled);
+ atomic_dec(&cpu_buffer->resize_disabled);
+ }
++
++ mutex_unlock(&buffer->mutex);
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_reset);
+
--- /dev/null
+From 027b57170bf8bb6999a28e4a5f3d78bf1db0f90c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
+Date: Sat, 2 Oct 2021 15:09:00 +0200
+Subject: serial: core: Fix initializing and restoring termios speed
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Pali Rohár <pali@kernel.org>
+
+commit 027b57170bf8bb6999a28e4a5f3d78bf1db0f90c upstream.
+
+Since commit edc6afc54968 ("tty: switch to ktermios and new framework")
+termios speed is no longer stored only in c_cflag member but also in new
+additional c_ispeed and c_ospeed members. If BOTHER flag is set in c_cflag
+then termios speed is stored only in these new members.
+
+Therefore to correctly restore termios speed it is required to store also
+ispeed and ospeed members, not only cflag member.
+
+In case only cflag member with BOTHER flag is restored then functions
+tty_termios_baud_rate() and tty_termios_input_baud_rate() returns baudrate
+stored in c_ospeed / c_ispeed member, which is zero as it was not restored
+too. If reported baudrate is invalid (e.g. zero) then serial core functions
+report fallback baudrate value 9600. So it means that in this case original
+baudrate is lost and kernel changes it to value 9600.
+
+Simple reproducer of this issue is to boot kernel with following command
+line argument: "console=ttyXXX,86400" (where ttyXXX is the device name).
+For speed 86400 there is no Bnnn constant and therefore kernel has to
+represent this speed via BOTHER c_cflag. Which means that speed is stored
+only in c_ospeed and c_ispeed members, not in c_cflag anymore.
+
+If bootloader correctly configures serial device to speed 86400 then kernel
+prints boot log to early console at speed speed 86400 without any issue.
+But after kernel starts initializing real console device ttyXXX then speed
+is changed to fallback value 9600 because information about speed was lost.
+
+This patch fixes above issue by storing and restoring also ispeed and
+ospeed members, which are required for BOTHER flag.
+
+Fixes: edc6afc54968 ("[PATCH] tty: switch to ktermios and new framework")
+Cc: stable@vger.kernel.org
+Signed-off-by: Pali Rohár <pali@kernel.org>
+Link: https://lore.kernel.org/r/20211002130900.9518-1-pali@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tty/serial/serial_core.c | 16 ++++++++++++++--
+ include/linux/console.h | 2 ++
+ 2 files changed, 16 insertions(+), 2 deletions(-)
+
+--- a/drivers/tty/serial/serial_core.c
++++ b/drivers/tty/serial/serial_core.c
+@@ -222,7 +222,11 @@ static int uart_port_startup(struct tty_
+ if (retval == 0) {
+ if (uart_console(uport) && uport->cons->cflag) {
+ tty->termios.c_cflag = uport->cons->cflag;
++ tty->termios.c_ispeed = uport->cons->ispeed;
++ tty->termios.c_ospeed = uport->cons->ospeed;
+ uport->cons->cflag = 0;
++ uport->cons->ispeed = 0;
++ uport->cons->ospeed = 0;
+ }
+ /*
+ * Initialise the hardware port settings.
+@@ -290,8 +294,11 @@ static void uart_shutdown(struct tty_str
+ /*
+ * Turn off DTR and RTS early.
+ */
+- if (uport && uart_console(uport) && tty)
++ if (uport && uart_console(uport) && tty) {
+ uport->cons->cflag = tty->termios.c_cflag;
++ uport->cons->ispeed = tty->termios.c_ispeed;
++ uport->cons->ospeed = tty->termios.c_ospeed;
++ }
+
+ if (!tty || C_HUPCL(tty))
+ uart_port_dtr_rts(uport, 0);
+@@ -2094,8 +2101,11 @@ uart_set_options(struct uart_port *port,
+ * Allow the setting of the UART parameters with a NULL console
+ * too:
+ */
+- if (co)
++ if (co) {
+ co->cflag = termios.c_cflag;
++ co->ispeed = termios.c_ispeed;
++ co->ospeed = termios.c_ospeed;
++ }
+
+ return 0;
+ }
+@@ -2229,6 +2239,8 @@ int uart_resume_port(struct uart_driver
+ */
+ memset(&termios, 0, sizeof(struct ktermios));
+ termios.c_cflag = uport->cons->cflag;
++ termios.c_ispeed = uport->cons->ispeed;
++ termios.c_ospeed = uport->cons->ospeed;
+
+ /*
+ * If that's unset, use the tty termios setting.
+--- a/include/linux/console.h
++++ b/include/linux/console.h
+@@ -149,6 +149,8 @@ struct console {
+ short flags;
+ short index;
+ int cflag;
++ uint ispeed;
++ uint ospeed;
+ void *data;
+ struct console *next;
+ };
power-supply-max17042_battery-use-vfsoc-for-capacity-when-no-rsns.patch
iio-core-fix-double-free-in-iio_device_unregister_sysfs.patch
iio-core-check-return-value-when-calling-dev_set_name.patch
+kvm-arm64-extract-esr_elx.ec-only.patch
+kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch
+kvm-x86-add-helper-to-consolidate-core-logic-of-set_cpuid-2-flows.patch
+kvm-nvmx-query-current-vmcs-when-determining-if-msr-bitmaps-are-in-use.patch
+kvm-nvmx-handle-dynamic-msr-intercept-toggling.patch
+can-peak_usb-always-ask-for-berr-reporting-for-pcan-usb-devices.patch
+can-mcp251xfd-mcp251xfd_irq-add-missing-can_rx_offload_threaded_irq_finish-in-case-of-bus-off.patch
+can-j1939-j1939_tp_cmd_recv-ignore-abort-message-in-the-bam-transport.patch
+can-j1939-j1939_can_recv-ignore-messages-with-invalid-source-address.patch
+can-j1939-j1939_tp_cmd_recv-check-the-dst-address-of-tp.cm_bam.patch
+iio-adc-tsc2046-fix-scan-interval-warning.patch
+powerpc-85xx-fix-oops-when-mpc85xx_smp_guts_ids-node-cannot-be-found.patch
+io_uring-honour-zeroes-as-io-wq-worker-limits.patch
+ring-buffer-protect-ring_buffer_reset-from-reentrancy.patch
+serial-core-fix-initializing-and-restoring-termios-speed.patch