From 476942237ab69259fa53c4a2abff6ca457a5f946 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Nov 2021 13:28:53 +0100 Subject: [PATCH] 5.15-stable patches added patches: can-j1939-j1939_can_recv-ignore-messages-with-invalid-source-address.patch can-j1939-j1939_tp_cmd_recv-check-the-dst-address-of-tp.cm_bam.patch can-j1939-j1939_tp_cmd_recv-ignore-abort-message-in-the-bam-transport.patch can-mcp251xfd-mcp251xfd_irq-add-missing-can_rx_offload_threaded_irq_finish-in-case-of-bus-off.patch can-peak_usb-always-ask-for-berr-reporting-for-pcan-usb-devices.patch iio-adc-tsc2046-fix-scan-interval-warning.patch io_uring-honour-zeroes-as-io-wq-worker-limits.patch kvm-arm64-extract-esr_elx.ec-only.patch kvm-nvmx-handle-dynamic-msr-intercept-toggling.patch kvm-nvmx-query-current-vmcs-when-determining-if-msr-bitmaps-are-in-use.patch kvm-x86-add-helper-to-consolidate-core-logic-of-set_cpuid-2-flows.patch kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch powerpc-85xx-fix-oops-when-mpc85xx_smp_guts_ids-node-cannot-be-found.patch ring-buffer-protect-ring_buffer_reset-from-reentrancy.patch serial-core-fix-initializing-and-restoring-termios-speed.patch --- ...messages-with-invalid-source-address.patch | 40 +++ ...v-check-the-dst-address-of-tp.cm_bam.patch | 45 +++ ...e-abort-message-in-the-bam-transport.patch | 38 ++ ...readed_irq_finish-in-case-of-bus-off.patch | 39 ++ ...-berr-reporting-for-pcan-usb-devices.patch | 56 +++ ...dc-tsc2046-fix-scan-interval-warning.patch | 32 ++ ...honour-zeroes-as-io-wq-worker-limits.patch | 37 ++ .../kvm-arm64-extract-esr_elx.ec-only.patch | 82 +++++ ...andle-dynamic-msr-intercept-toggling.patch | 332 ++++++++++++++++++ ...etermining-if-msr-bitmaps-are-in-use.patch | 66 ++++ ...date-core-logic-of-set_cpuid-2-flows.patch | 97 +++++ ...of-guest-steal-time-preempted-status.patch | 236 +++++++++++++ ...xx_smp_guts_ids-node-cannot-be-found.patch | 37 ++ ...ct-ring_buffer_reset-from-reentrancy.patch | 50 +++ ...ializing-and-restoring-termios-speed.patch | 111 ++++++ queue-5.15/series | 15 + 16 files changed, 1313 insertions(+) create mode 100644 queue-5.15/can-j1939-j1939_can_recv-ignore-messages-with-invalid-source-address.patch create mode 100644 queue-5.15/can-j1939-j1939_tp_cmd_recv-check-the-dst-address-of-tp.cm_bam.patch create mode 100644 queue-5.15/can-j1939-j1939_tp_cmd_recv-ignore-abort-message-in-the-bam-transport.patch create mode 100644 queue-5.15/can-mcp251xfd-mcp251xfd_irq-add-missing-can_rx_offload_threaded_irq_finish-in-case-of-bus-off.patch create mode 100644 queue-5.15/can-peak_usb-always-ask-for-berr-reporting-for-pcan-usb-devices.patch create mode 100644 queue-5.15/iio-adc-tsc2046-fix-scan-interval-warning.patch create mode 100644 queue-5.15/io_uring-honour-zeroes-as-io-wq-worker-limits.patch create mode 100644 queue-5.15/kvm-arm64-extract-esr_elx.ec-only.patch create mode 100644 queue-5.15/kvm-nvmx-handle-dynamic-msr-intercept-toggling.patch create mode 100644 queue-5.15/kvm-nvmx-query-current-vmcs-when-determining-if-msr-bitmaps-are-in-use.patch create mode 100644 queue-5.15/kvm-x86-add-helper-to-consolidate-core-logic-of-set_cpuid-2-flows.patch create mode 100644 queue-5.15/kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch create mode 100644 queue-5.15/powerpc-85xx-fix-oops-when-mpc85xx_smp_guts_ids-node-cannot-be-found.patch create mode 100644 queue-5.15/ring-buffer-protect-ring_buffer_reset-from-reentrancy.patch create mode 100644 queue-5.15/serial-core-fix-initializing-and-restoring-termios-speed.patch diff --git a/queue-5.15/can-j1939-j1939_can_recv-ignore-messages-with-invalid-source-address.patch b/queue-5.15/can-j1939-j1939_can_recv-ignore-messages-with-invalid-source-address.patch new file mode 100644 index 00000000000..ea2ce758520 --- /dev/null +++ b/queue-5.15/can-j1939-j1939_can_recv-ignore-messages-with-invalid-source-address.patch @@ -0,0 +1,40 @@ +From a79305e156db3d24fcd8eb649cdb3c3b2350e5c2 Mon Sep 17 00:00:00 2001 +From: Zhang Changzhong +Date: Thu, 28 Oct 2021 22:38:26 +0800 +Subject: can: j1939: j1939_can_recv(): ignore messages with invalid source address + +From: Zhang Changzhong + +commit a79305e156db3d24fcd8eb649cdb3c3b2350e5c2 upstream. + +According to SAE-J1939-82 2015 (A.3.6 Row 2), a receiver should never +send TP.CM_CTS to the global address, so we can add a check in +j1939_can_recv() to drop messages with invalid source address. + +Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") +Link: https://lore.kernel.org/all/1635431907-15617-3-git-send-email-zhangchangzhong@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Zhang Changzhong +Acked-by: Oleksij Rempel +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/j1939/main.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/can/j1939/main.c ++++ b/net/can/j1939/main.c +@@ -75,6 +75,13 @@ static void j1939_can_recv(struct sk_buf + skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX; + /* set default message type */ + skcb->addr.type = J1939_TP; ++ ++ if (!j1939_address_is_valid(skcb->addr.sa)) { ++ netdev_err_once(priv->ndev, "%s: sa is broadcast address, ignoring!\n", ++ __func__); ++ goto done; ++ } ++ + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) { + /* Type 1: with destination address */ + skcb->addr.da = skcb->addr.pgn; diff --git a/queue-5.15/can-j1939-j1939_tp_cmd_recv-check-the-dst-address-of-tp.cm_bam.patch b/queue-5.15/can-j1939-j1939_tp_cmd_recv-check-the-dst-address-of-tp.cm_bam.patch new file mode 100644 index 00000000000..8325676ed48 --- /dev/null +++ b/queue-5.15/can-j1939-j1939_tp_cmd_recv-check-the-dst-address-of-tp.cm_bam.patch @@ -0,0 +1,45 @@ +From 164051a6ab5445bd97f719f50b16db8b32174269 Mon Sep 17 00:00:00 2001 +From: Zhang Changzhong +Date: Thu, 28 Oct 2021 22:38:27 +0800 +Subject: can: j1939: j1939_tp_cmd_recv(): check the dst address of TP.CM_BAM + +From: Zhang Changzhong + +commit 164051a6ab5445bd97f719f50b16db8b32174269 upstream. + +The TP.CM_BAM message must be sent to the global address [1], so add a +check to drop TP.CM_BAM sent to a non-global address. + +Without this patch, the receiver will treat the following packets as +normal RTS/CTS transport: +18EC0102#20090002FF002301 +18EB0102#0100000000000000 +18EB0102#020000FFFFFFFFFF + +[1] SAE-J1939-82 2015 A.3.3 Row 1. + +Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") +Link: https://lore.kernel.org/all/1635431907-15617-4-git-send-email-zhangchangzhong@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Zhang Changzhong +Acked-by: Oleksij Rempel +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/j1939/transport.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/net/can/j1939/transport.c ++++ b/net/can/j1939/transport.c +@@ -2023,6 +2023,11 @@ static void j1939_tp_cmd_recv(struct j19 + extd = J1939_ETP; + fallthrough; + case J1939_TP_CMD_BAM: ++ if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) { ++ netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n", ++ __func__, skcb->addr.sa); ++ return; ++ } + fallthrough; + case J1939_TP_CMD_RTS: + if (skcb->addr.type != extd) diff --git a/queue-5.15/can-j1939-j1939_tp_cmd_recv-ignore-abort-message-in-the-bam-transport.patch b/queue-5.15/can-j1939-j1939_tp_cmd_recv-ignore-abort-message-in-the-bam-transport.patch new file mode 100644 index 00000000000..7745fbf6a49 --- /dev/null +++ b/queue-5.15/can-j1939-j1939_tp_cmd_recv-ignore-abort-message-in-the-bam-transport.patch @@ -0,0 +1,38 @@ +From c0f49d98006f2db3333b917caac65bce2af9865c Mon Sep 17 00:00:00 2001 +From: Zhang Changzhong +Date: Thu, 28 Oct 2021 22:38:25 +0800 +Subject: can: j1939: j1939_tp_cmd_recv(): ignore abort message in the BAM transport + +From: Zhang Changzhong + +commit c0f49d98006f2db3333b917caac65bce2af9865c upstream. + +This patch prevents BAM transport from being closed by receiving abort +message, as specified in SAE-J1939-82 2015 (A.3.3 Row 4). + +Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") +Link: https://lore.kernel.org/all/1635431907-15617-2-git-send-email-zhangchangzhong@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Zhang Changzhong +Acked-by: Oleksij Rempel +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/j1939/transport.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/can/j1939/transport.c ++++ b/net/can/j1939/transport.c +@@ -2085,6 +2085,12 @@ static void j1939_tp_cmd_recv(struct j19 + break; + + case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ ++ if (j1939_cb_is_broadcast(skcb)) { ++ netdev_err_once(priv->ndev, "%s: abort to broadcast (%02x), ignoring!\n", ++ __func__, skcb->addr.sa); ++ return; ++ } ++ + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_abort(priv, skb, true); + diff --git a/queue-5.15/can-mcp251xfd-mcp251xfd_irq-add-missing-can_rx_offload_threaded_irq_finish-in-case-of-bus-off.patch b/queue-5.15/can-mcp251xfd-mcp251xfd_irq-add-missing-can_rx_offload_threaded_irq_finish-in-case-of-bus-off.patch new file mode 100644 index 00000000000..608c368dd16 --- /dev/null +++ b/queue-5.15/can-mcp251xfd-mcp251xfd_irq-add-missing-can_rx_offload_threaded_irq_finish-in-case-of-bus-off.patch @@ -0,0 +1,39 @@ +From 691204bd66b34ba982e19988e6eba9f6321dfe6c Mon Sep 17 00:00:00 2001 +From: Marc Kleine-Budde +Date: Fri, 15 Oct 2021 19:46:59 +0200 +Subject: can: mcp251xfd: mcp251xfd_irq(): add missing can_rx_offload_threaded_irq_finish() in case of bus off + +From: Marc Kleine-Budde + +commit 691204bd66b34ba982e19988e6eba9f6321dfe6c upstream. + +The function can_rx_offload_threaded_irq_finish() is needed to trigger +the NAPI thread to deliver read CAN frames to the networking stack. + +This patch adds the missing call to can_rx_offload_threaded_irq_finish() +in case of a bus off, before leaving the interrupt handler to avoid +packet starvation. + +Link: https://lore.kernel.org/all/20211106201526.44292-1-mkl@pengutronix.de +Fixes: 30bfec4fec59 ("can: rx-offload: can_rx_offload_threaded_irq_finish(): add new function to be called from threaded interrupt") +Cc: stable@vger.kernel.org +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c ++++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +@@ -2290,8 +2290,10 @@ static irqreturn_t mcp251xfd_irq(int irq + * check will fail, too. So leave IRQ handler + * directly. + */ +- if (priv->can.state == CAN_STATE_BUS_OFF) ++ if (priv->can.state == CAN_STATE_BUS_OFF) { ++ can_rx_offload_threaded_irq_finish(&priv->offload); + return IRQ_HANDLED; ++ } + } + + handled = IRQ_HANDLED; diff --git a/queue-5.15/can-peak_usb-always-ask-for-berr-reporting-for-pcan-usb-devices.patch b/queue-5.15/can-peak_usb-always-ask-for-berr-reporting-for-pcan-usb-devices.patch new file mode 100644 index 00000000000..a0cb3e279b2 --- /dev/null +++ b/queue-5.15/can-peak_usb-always-ask-for-berr-reporting-for-pcan-usb-devices.patch @@ -0,0 +1,56 @@ +From 3f1c7aa28498e52a5e6aa2f1b89bf35c63352cfd Mon Sep 17 00:00:00 2001 +From: Stephane Grosjean +Date: Thu, 21 Oct 2021 10:15:04 +0200 +Subject: can: peak_usb: always ask for BERR reporting for PCAN-USB devices + +From: Stephane Grosjean + +commit 3f1c7aa28498e52a5e6aa2f1b89bf35c63352cfd upstream. + +Since for the PCAN-USB, the management of the transition to the +ERROR_WARNING or ERROR_PASSIVE state is done according to the error +counters, these must be requested unconditionally. + +Link: https://lore.kernel.org/all/20211021081505.18223-2-s.grosjean@peak-system.com +Fixes: c11dcee75830 ("can: peak_usb: pcan_usb_decode_error(): upgrade handling of bus state changes") +Cc: stable@vger.kernel.org +Signed-off-by: Stephane Grosjean +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/usb/peak_usb/pcan_usb.c | 17 ++++++++--------- + 1 file changed, 8 insertions(+), 9 deletions(-) + +--- a/drivers/net/can/usb/peak_usb/pcan_usb.c ++++ b/drivers/net/can/usb/peak_usb/pcan_usb.c +@@ -841,14 +841,14 @@ static int pcan_usb_start(struct peak_us + pdev->bec.rxerr = 0; + pdev->bec.txerr = 0; + +- /* be notified on error counter changes (if requested by user) */ +- if (dev->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) { +- err = pcan_usb_set_err_frame(dev, PCAN_USB_BERR_MASK); +- if (err) +- netdev_warn(dev->netdev, +- "Asking for BERR reporting error %u\n", +- err); +- } ++ /* always ask the device for BERR reporting, to be able to switch from ++ * WARNING to PASSIVE state ++ */ ++ err = pcan_usb_set_err_frame(dev, PCAN_USB_BERR_MASK); ++ if (err) ++ netdev_warn(dev->netdev, ++ "Asking for BERR reporting error %u\n", ++ err); + + /* if revision greater than 3, can put silent mode on/off */ + if (dev->device_rev > 3) { +@@ -986,7 +986,6 @@ const struct peak_usb_adapter pcan_usb = + .device_id = PCAN_USB_PRODUCT_ID, + .ctrl_count = 1, + .ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | +- CAN_CTRLMODE_BERR_REPORTING | + CAN_CTRLMODE_CC_LEN8_DLC, + .clock = { + .freq = PCAN_USB_CRYSTAL_HZ / 2, diff --git a/queue-5.15/iio-adc-tsc2046-fix-scan-interval-warning.patch b/queue-5.15/iio-adc-tsc2046-fix-scan-interval-warning.patch new file mode 100644 index 00000000000..8adbc733aa4 --- /dev/null +++ b/queue-5.15/iio-adc-tsc2046-fix-scan-interval-warning.patch @@ -0,0 +1,32 @@ +From 69b31fd7a61784692db6433c05d46915b1b1a680 Mon Sep 17 00:00:00 2001 +From: Oleksij Rempel +Date: Thu, 7 Oct 2021 11:30:06 +0200 +Subject: iio: adc: tsc2046: fix scan interval warning + +From: Oleksij Rempel + +commit 69b31fd7a61784692db6433c05d46915b1b1a680 upstream. + +Sync if statement with the actual warning. + +Fixes: 9504db5765e8 ("iio: adc: tsc2046: fix a warning message in tsc2046_adc_update_scan_mode()") +Signed-off-by: Oleksij Rempel +Link: https://lore.kernel.org/r/20211007093007.1466-2-o.rempel@pengutronix.de +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iio/adc/ti-tsc2046.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/iio/adc/ti-tsc2046.c ++++ b/drivers/iio/adc/ti-tsc2046.c +@@ -398,7 +398,7 @@ static int tsc2046_adc_update_scan_mode( + priv->xfer.len = size; + priv->time_per_scan_us = size * 8 * priv->time_per_bit_ns / NSEC_PER_USEC; + +- if (priv->scan_interval_us > priv->time_per_scan_us) ++ if (priv->scan_interval_us < priv->time_per_scan_us) + dev_warn(&priv->spi->dev, "The scan interval (%d) is less then calculated scan time (%d)\n", + priv->scan_interval_us, priv->time_per_scan_us); + diff --git a/queue-5.15/io_uring-honour-zeroes-as-io-wq-worker-limits.patch b/queue-5.15/io_uring-honour-zeroes-as-io-wq-worker-limits.patch new file mode 100644 index 00000000000..3defc93b234 --- /dev/null +++ b/queue-5.15/io_uring-honour-zeroes-as-io-wq-worker-limits.patch @@ -0,0 +1,37 @@ +From bad119b9a00019054f0c9e2045f312ed63ace4f4 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Mon, 8 Nov 2021 15:10:03 +0000 +Subject: io_uring: honour zeroes as io-wq worker limits + +From: Pavel Begunkov + +commit bad119b9a00019054f0c9e2045f312ed63ace4f4 upstream. + +When we pass in zero as an io-wq worker number limit it shouldn't +actually change the limits but return the old value, follow that +behaviour with deferred limits setup as well. + +Cc: stable@kernel.org # 5.15 +Reported-by: Beld Zhang +Fixes: e139a1ec92f8d ("io_uring: apply max_workers limit to all future users") +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/1b222a92f7a78a24b042763805e891a4cdd4b544.1636384034.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -10684,7 +10684,9 @@ static int io_register_iowq_max_workers( + + BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); + +- memcpy(ctx->iowq_limits, new_count, sizeof(new_count)); ++ for (i = 0; i < ARRAY_SIZE(new_count); i++) ++ if (new_count[i]) ++ ctx->iowq_limits[i] = new_count[i]; + ctx->iowq_limits_set = true; + + ret = -EINVAL; diff --git a/queue-5.15/kvm-arm64-extract-esr_elx.ec-only.patch b/queue-5.15/kvm-arm64-extract-esr_elx.ec-only.patch new file mode 100644 index 00000000000..5d884178de3 --- /dev/null +++ b/queue-5.15/kvm-arm64-extract-esr_elx.ec-only.patch @@ -0,0 +1,82 @@ +From 8bb084119f1acc2ec55ea085a97231e3ddb30782 Mon Sep 17 00:00:00 2001 +From: Mark Rutland +Date: Wed, 3 Nov 2021 11:05:45 +0000 +Subject: KVM: arm64: Extract ESR_ELx.EC only + +From: Mark Rutland + +commit 8bb084119f1acc2ec55ea085a97231e3ddb30782 upstream. + +Since ARMv8.0 the upper 32 bits of ESR_ELx have been RES0, and recently +some of the upper bits gained a meaning and can be non-zero. For +example, when FEAT_LS64 is implemented, ESR_ELx[36:32] contain ISS2, +which for an ST64BV or ST64BV0 can be non-zero. This can be seen in ARM +DDI 0487G.b, page D13-3145, section D13.2.37. + +Generally, we must not rely on RES0 bit remaining zero in future, and +when extracting ESR_ELx.EC we must mask out all other bits. + +All C code uses the ESR_ELx_EC() macro, which masks out the irrelevant +bits, and therefore no alterations are required to C code to avoid +consuming irrelevant bits. + +In a couple of places the KVM assembly extracts ESR_ELx.EC using LSR on +an X register, and so could in theory consume previously RES0 bits. In +both cases this is for comparison with EC values ESR_ELx_EC_HVC32 and +ESR_ELx_EC_HVC64, for which the upper bits of ESR_ELx must currently be +zero, but this could change in future. + +This patch adjusts the KVM vectors to use UBFX rather than LSR to +extract ESR_ELx.EC, ensuring these are robust to future additions to +ESR_ELx. + +Cc: stable@vger.kernel.org +Signed-off-by: Mark Rutland +Cc: Alexandru Elisei +Cc: Catalin Marinas +Cc: James Morse +Cc: Marc Zyngier +Cc: Suzuki K Poulose +Cc: Will Deacon +Acked-by: Will Deacon +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20211103110545.4613-1-mark.rutland@arm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/esr.h | 1 + + arch/arm64/kvm/hyp/hyp-entry.S | 2 +- + arch/arm64/kvm/hyp/nvhe/host.S | 2 +- + 3 files changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/arm64/include/asm/esr.h ++++ b/arch/arm64/include/asm/esr.h +@@ -68,6 +68,7 @@ + #define ESR_ELx_EC_MAX (0x3F) + + #define ESR_ELx_EC_SHIFT (26) ++#define ESR_ELx_EC_WIDTH (6) + #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) + #define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) + +--- a/arch/arm64/kvm/hyp/hyp-entry.S ++++ b/arch/arm64/kvm/hyp/hyp-entry.S +@@ -44,7 +44,7 @@ + el1_sync: // Guest trapped into EL2 + + mrs x0, esr_el2 +- lsr x0, x0, #ESR_ELx_EC_SHIFT ++ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH + cmp x0, #ESR_ELx_EC_HVC64 + ccmp x0, #ESR_ELx_EC_HVC32, #4, ne + b.ne el1_trap +--- a/arch/arm64/kvm/hyp/nvhe/host.S ++++ b/arch/arm64/kvm/hyp/nvhe/host.S +@@ -115,7 +115,7 @@ SYM_FUNC_END(__hyp_do_panic) + .L__vect_start\@: + stp x0, x1, [sp, #-16]! + mrs x0, esr_el2 +- lsr x0, x0, #ESR_ELx_EC_SHIFT ++ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH + cmp x0, #ESR_ELx_EC_HVC64 + b.ne __host_exit + diff --git a/queue-5.15/kvm-nvmx-handle-dynamic-msr-intercept-toggling.patch b/queue-5.15/kvm-nvmx-handle-dynamic-msr-intercept-toggling.patch new file mode 100644 index 00000000000..34319aa1cdb --- /dev/null +++ b/queue-5.15/kvm-nvmx-handle-dynamic-msr-intercept-toggling.patch @@ -0,0 +1,332 @@ +From 67f4b9969c305be515e47f809ecacfd86bd20a9c Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 9 Nov 2021 01:30:45 +0000 +Subject: KVM: nVMX: Handle dynamic MSR intercept toggling + +From: Sean Christopherson + +commit 67f4b9969c305be515e47f809ecacfd86bd20a9c upstream. + +Always check vmcs01's MSR bitmap when merging L0 and L1 bitmaps for L2, +and always update the relevant bits in vmcs02. This fixes two distinct, +but intertwined bugs related to dynamic MSR bitmap modifications. + +The first issue is that KVM fails to enable MSR interception in vmcs02 +for the FS/GS base MSRs if L1 first runs L2 with interception disabled, +and later enables interception. + +The second issue is that KVM fails to honor userspace MSR filtering when +preparing vmcs02. + +Fix both issues simultaneous as fixing only one of the issues (doesn't +matter which) would create a mess that no one should have to bisect. +Fixing only the first bug would exacerbate the MSR filtering issue as +userspace would see inconsistent behavior depending on the whims of L1. +Fixing only the second bug (MSR filtering) effectively requires fixing +the first, as the nVMX code only knows how to transition vmcs02's +bitmap from 1->0. + +Move the various accessor/mutators that are currently buried in vmx.c +into vmx.h so that they can be shared by the nested code. + +Fixes: 1a155254ff93 ("KVM: x86: Introduce MSR filtering") +Fixes: d69129b4e46a ("KVM: nVMX: Disable intercept for FS/GS base MSRs in vmcs02 when possible") +Cc: stable@vger.kernel.org +Cc: Alexander Graf +Signed-off-by: Sean Christopherson +Message-Id: <20211109013047.2041518-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.c | 103 ++++++++++++++++++++-------------------------- + arch/x86/kvm/vmx/vmx.c | 55 ------------------------ + arch/x86/kvm/vmx/vmx.h | 63 ++++++++++++++++++++++++++++ + 3 files changed, 111 insertions(+), 110 deletions(-) + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -524,29 +524,6 @@ static int nested_vmx_check_tpr_shadow_c + } + + /* +- * Check if MSR is intercepted for L01 MSR bitmap. +- */ +-static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +-{ +- unsigned long *msr_bitmap; +- int f = sizeof(unsigned long); +- +- if (!cpu_has_vmx_msr_bitmap()) +- return true; +- +- msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; +- +- if (msr <= 0x1fff) { +- return !!test_bit(msr, msr_bitmap + 0x800 / f); +- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { +- msr &= 0x1fff; +- return !!test_bit(msr, msr_bitmap + 0xc00 / f); +- } +- +- return true; +-} +- +-/* + * If a msr is allowed by L0, we should check whether it is allowed by L1. + * The corresponding bit will be cleared unless both of L0 and L1 allow it. + */ +@@ -599,6 +576,34 @@ static inline void enable_x2apic_msr_int + } + } + ++#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \ ++static inline \ ++void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \ ++ unsigned long *msr_bitmap_l1, \ ++ unsigned long *msr_bitmap_l0, u32 msr) \ ++{ \ ++ if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \ ++ vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \ ++ vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \ ++ else \ ++ vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \ ++} ++BUILD_NVMX_MSR_INTERCEPT_HELPER(read) ++BUILD_NVMX_MSR_INTERCEPT_HELPER(write) ++ ++static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx, ++ unsigned long *msr_bitmap_l1, ++ unsigned long *msr_bitmap_l0, ++ u32 msr, int types) ++{ ++ if (types & MSR_TYPE_R) ++ nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1, ++ msr_bitmap_l0, msr); ++ if (types & MSR_TYPE_W) ++ nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1, ++ msr_bitmap_l0, msr); ++} ++ + /* + * Merge L0's and L1's MSR bitmap, return false to indicate that + * we do not use the hardware. +@@ -606,10 +611,11 @@ static inline void enable_x2apic_msr_int + static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) + { ++ struct vcpu_vmx *vmx = to_vmx(vcpu); + int msr; + unsigned long *msr_bitmap_l1; +- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; +- struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map; ++ unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap; ++ struct kvm_host_map *map = &vmx->nested.msr_bitmap_map; + + /* Nothing to do if the MSR bitmap is not in use. */ + if (!cpu_has_vmx_msr_bitmap() || +@@ -660,44 +666,27 @@ static inline bool nested_vmx_prepare_ms + } + } + +- /* KVM unconditionally exposes the FS/GS base MSRs to L1. */ ++ /* ++ * Always check vmcs01's bitmap to honor userspace MSR filters and any ++ * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through. ++ */ + #ifdef CONFIG_X86_64 +- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, +- MSR_FS_BASE, MSR_TYPE_RW); ++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, ++ MSR_FS_BASE, MSR_TYPE_RW); + +- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, +- MSR_GS_BASE, MSR_TYPE_RW); ++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, ++ MSR_GS_BASE, MSR_TYPE_RW); + +- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, +- MSR_KERNEL_GS_BASE, MSR_TYPE_RW); ++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, ++ MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + #endif ++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, ++ MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); + +- /* +- * Checking the L0->L1 bitmap is trying to verify two things: +- * +- * 1. L0 gave a permission to L1 to actually passthrough the MSR. This +- * ensures that we do not accidentally generate an L02 MSR bitmap +- * from the L12 MSR bitmap that is too permissive. +- * 2. That L1 or L2s have actually used the MSR. This avoids +- * unnecessarily merging of the bitmap if the MSR is unused. This +- * works properly because we only update the L01 MSR bitmap lazily. +- * So even if L0 should pass L1 these MSRs, the L01 bitmap is only +- * updated to reflect this when L1 (or its L2s) actually write to +- * the MSR. +- */ +- if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL)) +- nested_vmx_disable_intercept_for_msr( +- msr_bitmap_l1, msr_bitmap_l0, +- MSR_IA32_SPEC_CTRL, +- MSR_TYPE_R | MSR_TYPE_W); +- +- if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD)) +- nested_vmx_disable_intercept_for_msr( +- msr_bitmap_l1, msr_bitmap_l0, +- MSR_IA32_PRED_CMD, +- MSR_TYPE_W); ++ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, ++ MSR_IA32_PRED_CMD, MSR_TYPE_W); + +- kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false); ++ kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); + + return true; + } +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -771,22 +771,11 @@ void vmx_update_exception_bitmap(struct + */ + static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) + { +- unsigned long *msr_bitmap; +- int f = sizeof(unsigned long); +- + if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) + return true; + +- msr_bitmap = vmx->loaded_vmcs->msr_bitmap; +- +- if (msr <= 0x1fff) { +- return !!test_bit(msr, msr_bitmap + 0x800 / f); +- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { +- msr &= 0x1fff; +- return !!test_bit(msr, msr_bitmap + 0xc00 / f); +- } +- +- return true; ++ return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, ++ MSR_IA32_SPEC_CTRL); + } + + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, +@@ -3695,46 +3684,6 @@ void free_vpid(int vpid) + spin_unlock(&vmx_vpid_lock); + } + +-static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr) +-{ +- int f = sizeof(unsigned long); +- +- if (msr <= 0x1fff) +- __clear_bit(msr, msr_bitmap + 0x000 / f); +- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) +- __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f); +-} +- +-static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr) +-{ +- int f = sizeof(unsigned long); +- +- if (msr <= 0x1fff) +- __clear_bit(msr, msr_bitmap + 0x800 / f); +- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) +- __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f); +-} +- +-static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr) +-{ +- int f = sizeof(unsigned long); +- +- if (msr <= 0x1fff) +- __set_bit(msr, msr_bitmap + 0x000 / f); +- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) +- __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f); +-} +- +-static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr) +-{ +- int f = sizeof(unsigned long); +- +- if (msr <= 0x1fff) +- __set_bit(msr, msr_bitmap + 0x800 / f); +- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) +- __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f); +-} +- + void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -400,6 +400,69 @@ static inline void vmx_set_intercept_for + + void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); + ++static inline bool vmx_test_msr_bitmap_read(ulong *msr_bitmap, u32 msr) ++{ ++ int f = sizeof(unsigned long); ++ ++ if (msr <= 0x1fff) ++ return test_bit(msr, msr_bitmap + 0x000 / f); ++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) ++ return test_bit(msr & 0x1fff, msr_bitmap + 0x400 / f); ++ return true; ++} ++ ++static inline bool vmx_test_msr_bitmap_write(ulong *msr_bitmap, u32 msr) ++{ ++ int f = sizeof(unsigned long); ++ ++ if (msr <= 0x1fff) ++ return test_bit(msr, msr_bitmap + 0x800 / f); ++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) ++ return test_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f); ++ return true; ++} ++ ++static inline void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr) ++{ ++ int f = sizeof(unsigned long); ++ ++ if (msr <= 0x1fff) ++ __clear_bit(msr, msr_bitmap + 0x000 / f); ++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) ++ __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f); ++} ++ ++static inline void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr) ++{ ++ int f = sizeof(unsigned long); ++ ++ if (msr <= 0x1fff) ++ __clear_bit(msr, msr_bitmap + 0x800 / f); ++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) ++ __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f); ++} ++ ++static inline void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr) ++{ ++ int f = sizeof(unsigned long); ++ ++ if (msr <= 0x1fff) ++ __set_bit(msr, msr_bitmap + 0x000 / f); ++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) ++ __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f); ++} ++ ++static inline void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr) ++{ ++ int f = sizeof(unsigned long); ++ ++ if (msr <= 0x1fff) ++ __set_bit(msr, msr_bitmap + 0x800 / f); ++ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) ++ __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f); ++} ++ ++ + static inline u8 vmx_get_rvi(void) + { + return vmcs_read16(GUEST_INTR_STATUS) & 0xff; diff --git a/queue-5.15/kvm-nvmx-query-current-vmcs-when-determining-if-msr-bitmaps-are-in-use.patch b/queue-5.15/kvm-nvmx-query-current-vmcs-when-determining-if-msr-bitmaps-are-in-use.patch new file mode 100644 index 00000000000..c3015b534b3 --- /dev/null +++ b/queue-5.15/kvm-nvmx-query-current-vmcs-when-determining-if-msr-bitmaps-are-in-use.patch @@ -0,0 +1,66 @@ +From 7dfbc624eb5726367900c8d86deff50836240361 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 9 Nov 2021 01:30:44 +0000 +Subject: KVM: nVMX: Query current VMCS when determining if MSR bitmaps are in use + +From: Sean Christopherson + +commit 7dfbc624eb5726367900c8d86deff50836240361 upstream. + +Check the current VMCS controls to determine if an MSR write will be +intercepted due to MSR bitmaps being disabled. In the nested VMX case, +KVM will disable MSR bitmaps in vmcs02 if they're disabled in vmcs12 or +if KVM can't map L1's bitmaps for whatever reason. + +Note, the bad behavior is relatively benign in the current code base as +KVM sets all bits in vmcs02's MSR bitmap by default, clears bits if and +only if L0 KVM also disables interception of an MSR, and only uses the +buggy helper for MSR_IA32_SPEC_CTRL. Because KVM explicitly tests WRMSR +before disabling interception of MSR_IA32_SPEC_CTRL, the flawed check +will only result in KVM reading MSR_IA32_SPEC_CTRL from hardware when it +isn't strictly necessary. + +Tag the fix for stable in case a future fix wants to use +msr_write_intercepted(), in which case a buggy implementation in older +kernels could prove subtly problematic. + +Fixes: d28b387fb74d ("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20211109013047.2041518-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -769,15 +769,15 @@ void vmx_update_exception_bitmap(struct + /* + * Check if MSR is intercepted for currently loaded MSR bitmap. + */ +-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) ++static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) + { + unsigned long *msr_bitmap; + int f = sizeof(unsigned long); + +- if (!cpu_has_vmx_msr_bitmap()) ++ if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) + return true; + +- msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; ++ msr_bitmap = vmx->loaded_vmcs->msr_bitmap; + + if (msr <= 0x1fff) { + return !!test_bit(msr, msr_bitmap + 0x800 / f); +@@ -6720,7 +6720,7 @@ static fastpath_t vmx_vcpu_run(struct kv + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ +- if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) ++ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); diff --git a/queue-5.15/kvm-x86-add-helper-to-consolidate-core-logic-of-set_cpuid-2-flows.patch b/queue-5.15/kvm-x86-add-helper-to-consolidate-core-logic-of-set_cpuid-2-flows.patch new file mode 100644 index 00000000000..894996b6892 --- /dev/null +++ b/queue-5.15/kvm-x86-add-helper-to-consolidate-core-logic-of-set_cpuid-2-flows.patch @@ -0,0 +1,97 @@ +From 8b44b174f6aca815fc84c2038e4523ef8e32fabb Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 5 Nov 2021 09:51:00 +0000 +Subject: KVM: x86: Add helper to consolidate core logic of SET_CPUID{2} flows + +From: Sean Christopherson + +commit 8b44b174f6aca815fc84c2038e4523ef8e32fabb upstream. + +Move the core logic of SET_CPUID and SET_CPUID2 to a common helper, the +only difference between the two ioctls() is the format of the userspace +struct. A future fix will add yet more code to the core logic. + +No functional change intended. + +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20211105095101.5384-2-pdurrant@amazon.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 47 ++++++++++++++++++++++++----------------------- + 1 file changed, 24 insertions(+), 23 deletions(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -232,6 +232,25 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struc + return rsvd_bits(cpuid_maxphyaddr(vcpu), 63); + } + ++static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, ++ int nent) ++{ ++ int r; ++ ++ r = kvm_check_cpuid(e2, nent); ++ if (r) ++ return r; ++ ++ kvfree(vcpu->arch.cpuid_entries); ++ vcpu->arch.cpuid_entries = e2; ++ vcpu->arch.cpuid_nent = nent; ++ ++ kvm_update_cpuid_runtime(vcpu); ++ kvm_vcpu_after_set_cpuid(vcpu); ++ ++ return 0; ++} ++ + /* when an old userspace process fills a new kernel module */ + int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, + struct kvm_cpuid *cpuid, +@@ -268,18 +287,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_ + e2[i].padding[2] = 0; + } + +- r = kvm_check_cpuid(e2, cpuid->nent); +- if (r) { ++ r = kvm_set_cpuid(vcpu, e2, cpuid->nent); ++ if (r) + kvfree(e2); +- goto out_free_cpuid; +- } +- +- kvfree(vcpu->arch.cpuid_entries); +- vcpu->arch.cpuid_entries = e2; +- vcpu->arch.cpuid_nent = cpuid->nent; +- +- kvm_update_cpuid_runtime(vcpu); +- kvm_vcpu_after_set_cpuid(vcpu); + + out_free_cpuid: + kvfree(e); +@@ -303,20 +313,11 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm + return PTR_ERR(e2); + } + +- r = kvm_check_cpuid(e2, cpuid->nent); +- if (r) { ++ r = kvm_set_cpuid(vcpu, e2, cpuid->nent); ++ if (r) + kvfree(e2); +- return r; +- } + +- kvfree(vcpu->arch.cpuid_entries); +- vcpu->arch.cpuid_entries = e2; +- vcpu->arch.cpuid_nent = cpuid->nent; +- +- kvm_update_cpuid_runtime(vcpu); +- kvm_vcpu_after_set_cpuid(vcpu); +- +- return 0; ++ return r; + } + + int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, diff --git a/queue-5.15/kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch b/queue-5.15/kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch new file mode 100644 index 00000000000..751dbb94fc5 --- /dev/null +++ b/queue-5.15/kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch @@ -0,0 +1,236 @@ +From 7e2175ebd695f17860c5bd4ad7616cce12ed4591 Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Tue, 2 Nov 2021 17:36:39 +0000 +Subject: KVM: x86: Fix recording of guest steal time / preempted status + +From: David Woodhouse + +commit 7e2175ebd695f17860c5bd4ad7616cce12ed4591 upstream. + +In commit b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is +not missed") we switched to using a gfn_to_pfn_cache for accessing the +guest steal time structure in order to allow for an atomic xchg of the +preempted field. This has a couple of problems. + +Firstly, kvm_map_gfn() doesn't work at all for IOMEM pages when the +atomic flag is set, which it is in kvm_steal_time_set_preempted(). So a +guest vCPU using an IOMEM page for its steal time would never have its +preempted field set. + +Secondly, the gfn_to_pfn_cache is not invalidated in all cases where it +should have been. There are two stages to the GFN->PFN conversion; +first the GFN is converted to a userspace HVA, and then that HVA is +looked up in the process page tables to find the underlying host PFN. +Correct invalidation of the latter would require being hooked up to the +MMU notifiers, but that doesn't happen---so it just keeps mapping and +unmapping the *wrong* PFN after the userspace page tables change. + +In the !IOMEM case at least the stale page *is* pinned all the time it's +cached, so it won't be freed and reused by anyone else while still +receiving the steal time updates. The map/unmap dance only takes care +of the KVM administrivia such as marking the page dirty. + +Until the gfn_to_pfn cache handles the remapping automatically by +integrating with the MMU notifiers, we might as well not get a +kernel mapping of it, and use the perfectly serviceable userspace HVA +that we already have. We just need to implement the atomic xchg on +the userspace address with appropriate exception handling, which is +fairly trivial. + +Cc: stable@vger.kernel.org +Fixes: b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed") +Signed-off-by: David Woodhouse +Message-Id: <3645b9b889dac6438394194bb5586a46b68d581f.camel@infradead.org> +[I didn't entirely agree with David's assessment of the + usefulness of the gfn_to_pfn cache, and integrated the outcome + of the discussion in the above commit message. - Paolo] +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/x86.c | 105 ++++++++++++++++++++++++++++------------ + 2 files changed, 76 insertions(+), 31 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -751,7 +751,7 @@ struct kvm_vcpu_arch { + u8 preempted; + u64 msr_val; + u64 last_steal; +- struct gfn_to_pfn_cache cache; ++ struct gfn_to_hva_cache cache; + } st; + + u64 l1_tsc_offset; +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3195,8 +3195,11 @@ static void kvm_vcpu_flush_tlb_guest(str + + static void record_steal_time(struct kvm_vcpu *vcpu) + { +- struct kvm_host_map map; +- struct kvm_steal_time *st; ++ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; ++ struct kvm_steal_time __user *st; ++ struct kvm_memslots *slots; ++ u64 steal; ++ u32 version; + + if (kvm_xen_msr_enabled(vcpu->kvm)) { + kvm_xen_runstate_set_running(vcpu); +@@ -3206,47 +3209,83 @@ static void record_steal_time(struct kvm + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; + +- /* -EAGAIN is returned in atomic context so we can just return. */ +- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, +- &map, &vcpu->arch.st.cache, false)) ++ if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm)) + return; + +- st = map.hva + +- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); ++ slots = kvm_memslots(vcpu->kvm); ++ ++ if (unlikely(slots->generation != ghc->generation || ++ kvm_is_error_hva(ghc->hva) || !ghc->memslot)) { ++ gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; ++ ++ /* We rely on the fact that it fits in a single page. */ ++ BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS); ++ ++ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) || ++ kvm_is_error_hva(ghc->hva) || !ghc->memslot) ++ return; ++ } ++ ++ st = (struct kvm_steal_time __user *)ghc->hva; ++ if (!user_access_begin(st, sizeof(*st))) ++ return; + + /* + * Doing a TLB flush here, on the guest's behalf, can avoid + * expensive IPIs. + */ + if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) { +- u8 st_preempted = xchg(&st->preempted, 0); ++ u8 st_preempted = 0; ++ int err = -EFAULT; ++ ++ asm volatile("1: xchgb %0, %2\n" ++ "xor %1, %1\n" ++ "2:\n" ++ _ASM_EXTABLE_UA(1b, 2b) ++ : "+r" (st_preempted), ++ "+&r" (err) ++ : "m" (st->preempted)); ++ if (err) ++ goto out; ++ ++ user_access_end(); ++ ++ vcpu->arch.st.preempted = 0; + + trace_kvm_pv_tlb_flush(vcpu->vcpu_id, + st_preempted & KVM_VCPU_FLUSH_TLB); + if (st_preempted & KVM_VCPU_FLUSH_TLB) + kvm_vcpu_flush_tlb_guest(vcpu); ++ ++ if (!user_access_begin(st, sizeof(*st))) ++ goto dirty; + } else { +- st->preempted = 0; ++ unsafe_put_user(0, &st->preempted, out); ++ vcpu->arch.st.preempted = 0; + } + +- vcpu->arch.st.preempted = 0; +- +- if (st->version & 1) +- st->version += 1; /* first time write, random junk */ ++ unsafe_get_user(version, &st->version, out); ++ if (version & 1) ++ version += 1; /* first time write, random junk */ + +- st->version += 1; ++ version += 1; ++ unsafe_put_user(version, &st->version, out); + + smp_wmb(); + +- st->steal += current->sched_info.run_delay - ++ unsafe_get_user(steal, &st->steal, out); ++ steal += current->sched_info.run_delay - + vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; ++ unsafe_put_user(steal, &st->steal, out); + +- smp_wmb(); +- +- st->version += 1; ++ version += 1; ++ unsafe_put_user(version, &st->version, out); + +- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); ++ out: ++ user_access_end(); ++ dirty: ++ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); + } + + int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +@@ -4285,8 +4324,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu + + static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) + { +- struct kvm_host_map map; +- struct kvm_steal_time *st; ++ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; ++ struct kvm_steal_time __user *st; ++ struct kvm_memslots *slots; ++ static const u8 preempted = KVM_VCPU_PREEMPTED; + + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; +@@ -4294,16 +4335,23 @@ static void kvm_steal_time_set_preempted + if (vcpu->arch.st.preempted) + return; + +- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, +- &vcpu->arch.st.cache, true)) ++ /* This happens on process exit */ ++ if (unlikely(current->mm != vcpu->kvm->mm)) + return; + +- st = map.hva + +- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); ++ slots = kvm_memslots(vcpu->kvm); ++ ++ if (unlikely(slots->generation != ghc->generation || ++ kvm_is_error_hva(ghc->hva) || !ghc->memslot)) ++ return; + +- st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; ++ st = (struct kvm_steal_time __user *)ghc->hva; ++ BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted)); + +- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); ++ if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted))) ++ vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; ++ ++ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); + } + + void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +@@ -10817,11 +10865,8 @@ void kvm_arch_vcpu_postcreate(struct kvm + + void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) + { +- struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; + int idx; + +- kvm_release_pfn(cache->pfn, cache->dirty, cache); +- + kvmclock_reset(vcpu); + + static_call(kvm_x86_vcpu_free)(vcpu); diff --git a/queue-5.15/powerpc-85xx-fix-oops-when-mpc85xx_smp_guts_ids-node-cannot-be-found.patch b/queue-5.15/powerpc-85xx-fix-oops-when-mpc85xx_smp_guts_ids-node-cannot-be-found.patch new file mode 100644 index 00000000000..e9ebb8a322e --- /dev/null +++ b/queue-5.15/powerpc-85xx-fix-oops-when-mpc85xx_smp_guts_ids-node-cannot-be-found.patch @@ -0,0 +1,37 @@ +From 3c2172c1c47b4079c29f0e6637d764a99355ebcd Mon Sep 17 00:00:00 2001 +From: Xiaoming Ni +Date: Wed, 29 Sep 2021 11:36:45 +0800 +Subject: powerpc/85xx: Fix oops when mpc85xx_smp_guts_ids node cannot be found + +From: Xiaoming Ni + +commit 3c2172c1c47b4079c29f0e6637d764a99355ebcd upstream. + +When the field described in mpc85xx_smp_guts_ids[] is not configured in +dtb, the mpc85xx_setup_pmc() does not assign a value to the "guts" +variable. As a result, the oops is triggered when +mpc85xx_freeze_time_base() is executed. + +Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations") +Cc: stable@vger.kernel.org # v4.6+ +Signed-off-by: Xiaoming Ni +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20210929033646.39630-2-nixiaoming@huawei.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c ++++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +@@ -94,9 +94,8 @@ int __init mpc85xx_setup_pmc(void) + pr_err("Could not map guts node address\n"); + return -ENOMEM; + } ++ qoriq_pm_ops = &mpc85xx_pm_ops; + } + +- qoriq_pm_ops = &mpc85xx_pm_ops; +- + return 0; + } diff --git a/queue-5.15/ring-buffer-protect-ring_buffer_reset-from-reentrancy.patch b/queue-5.15/ring-buffer-protect-ring_buffer_reset-from-reentrancy.patch new file mode 100644 index 00000000000..5afe232e4e5 --- /dev/null +++ b/queue-5.15/ring-buffer-protect-ring_buffer_reset-from-reentrancy.patch @@ -0,0 +1,50 @@ +From 51d157946666382e779f94c39891e8e9a020da78 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Mon, 8 Nov 2021 10:58:10 -0500 +Subject: ring-buffer: Protect ring_buffer_reset() from reentrancy + +From: Steven Rostedt (VMware) + +commit 51d157946666382e779f94c39891e8e9a020da78 upstream. + +The resetting of the entire ring buffer use to simply go through and reset +each individual CPU buffer that had its own protection and synchronization. +But this was very slow, due to performing a synchronization for each CPU. +The code was reshuffled to do one disabling of all CPU buffers, followed +by a single RCU synchronization, and then the resetting of each of the CPU +buffers. But unfortunately, the mutex that prevented multiple occurrences +of resetting the buffer was not moved to the upper function, and there is +nothing to protect from it. + +Take the ring buffer mutex around the global reset. + +Cc: stable@vger.kernel.org +Fixes: b23d7a5f4a07a ("ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU") +Reported-by: "Tzvetomir Stoyanov (VMware)" +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ring_buffer.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -5233,6 +5233,9 @@ void ring_buffer_reset(struct trace_buff + struct ring_buffer_per_cpu *cpu_buffer; + int cpu; + ++ /* prevent another thread from changing buffer sizes */ ++ mutex_lock(&buffer->mutex); ++ + for_each_buffer_cpu(buffer, cpu) { + cpu_buffer = buffer->buffers[cpu]; + +@@ -5251,6 +5254,8 @@ void ring_buffer_reset(struct trace_buff + atomic_dec(&cpu_buffer->record_disabled); + atomic_dec(&cpu_buffer->resize_disabled); + } ++ ++ mutex_unlock(&buffer->mutex); + } + EXPORT_SYMBOL_GPL(ring_buffer_reset); + diff --git a/queue-5.15/serial-core-fix-initializing-and-restoring-termios-speed.patch b/queue-5.15/serial-core-fix-initializing-and-restoring-termios-speed.patch new file mode 100644 index 00000000000..103789141ad --- /dev/null +++ b/queue-5.15/serial-core-fix-initializing-and-restoring-termios-speed.patch @@ -0,0 +1,111 @@ +From 027b57170bf8bb6999a28e4a5f3d78bf1db0f90c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Pali=20Roh=C3=A1r?= +Date: Sat, 2 Oct 2021 15:09:00 +0200 +Subject: serial: core: Fix initializing and restoring termios speed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Pali Rohár + +commit 027b57170bf8bb6999a28e4a5f3d78bf1db0f90c upstream. + +Since commit edc6afc54968 ("tty: switch to ktermios and new framework") +termios speed is no longer stored only in c_cflag member but also in new +additional c_ispeed and c_ospeed members. If BOTHER flag is set in c_cflag +then termios speed is stored only in these new members. + +Therefore to correctly restore termios speed it is required to store also +ispeed and ospeed members, not only cflag member. + +In case only cflag member with BOTHER flag is restored then functions +tty_termios_baud_rate() and tty_termios_input_baud_rate() returns baudrate +stored in c_ospeed / c_ispeed member, which is zero as it was not restored +too. If reported baudrate is invalid (e.g. zero) then serial core functions +report fallback baudrate value 9600. So it means that in this case original +baudrate is lost and kernel changes it to value 9600. + +Simple reproducer of this issue is to boot kernel with following command +line argument: "console=ttyXXX,86400" (where ttyXXX is the device name). +For speed 86400 there is no Bnnn constant and therefore kernel has to +represent this speed via BOTHER c_cflag. Which means that speed is stored +only in c_ospeed and c_ispeed members, not in c_cflag anymore. + +If bootloader correctly configures serial device to speed 86400 then kernel +prints boot log to early console at speed speed 86400 without any issue. +But after kernel starts initializing real console device ttyXXX then speed +is changed to fallback value 9600 because information about speed was lost. + +This patch fixes above issue by storing and restoring also ispeed and +ospeed members, which are required for BOTHER flag. + +Fixes: edc6afc54968 ("[PATCH] tty: switch to ktermios and new framework") +Cc: stable@vger.kernel.org +Signed-off-by: Pali Rohár +Link: https://lore.kernel.org/r/20211002130900.9518-1-pali@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/serial_core.c | 16 ++++++++++++++-- + include/linux/console.h | 2 ++ + 2 files changed, 16 insertions(+), 2 deletions(-) + +--- a/drivers/tty/serial/serial_core.c ++++ b/drivers/tty/serial/serial_core.c +@@ -222,7 +222,11 @@ static int uart_port_startup(struct tty_ + if (retval == 0) { + if (uart_console(uport) && uport->cons->cflag) { + tty->termios.c_cflag = uport->cons->cflag; ++ tty->termios.c_ispeed = uport->cons->ispeed; ++ tty->termios.c_ospeed = uport->cons->ospeed; + uport->cons->cflag = 0; ++ uport->cons->ispeed = 0; ++ uport->cons->ospeed = 0; + } + /* + * Initialise the hardware port settings. +@@ -290,8 +294,11 @@ static void uart_shutdown(struct tty_str + /* + * Turn off DTR and RTS early. + */ +- if (uport && uart_console(uport) && tty) ++ if (uport && uart_console(uport) && tty) { + uport->cons->cflag = tty->termios.c_cflag; ++ uport->cons->ispeed = tty->termios.c_ispeed; ++ uport->cons->ospeed = tty->termios.c_ospeed; ++ } + + if (!tty || C_HUPCL(tty)) + uart_port_dtr_rts(uport, 0); +@@ -2094,8 +2101,11 @@ uart_set_options(struct uart_port *port, + * Allow the setting of the UART parameters with a NULL console + * too: + */ +- if (co) ++ if (co) { + co->cflag = termios.c_cflag; ++ co->ispeed = termios.c_ispeed; ++ co->ospeed = termios.c_ospeed; ++ } + + return 0; + } +@@ -2229,6 +2239,8 @@ int uart_resume_port(struct uart_driver + */ + memset(&termios, 0, sizeof(struct ktermios)); + termios.c_cflag = uport->cons->cflag; ++ termios.c_ispeed = uport->cons->ispeed; ++ termios.c_ospeed = uport->cons->ospeed; + + /* + * If that's unset, use the tty termios setting. +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -149,6 +149,8 @@ struct console { + short flags; + short index; + int cflag; ++ uint ispeed; ++ uint ospeed; + void *data; + struct console *next; + }; diff --git a/queue-5.15/series b/queue-5.15/series index c2e0960ac10..d2a11a26dab 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -118,3 +118,18 @@ power-supply-max17042_battery-prevent-int-underflow-in-set_soc_threshold.patch power-supply-max17042_battery-use-vfsoc-for-capacity-when-no-rsns.patch iio-core-fix-double-free-in-iio_device_unregister_sysfs.patch iio-core-check-return-value-when-calling-dev_set_name.patch +kvm-arm64-extract-esr_elx.ec-only.patch +kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch +kvm-x86-add-helper-to-consolidate-core-logic-of-set_cpuid-2-flows.patch +kvm-nvmx-query-current-vmcs-when-determining-if-msr-bitmaps-are-in-use.patch +kvm-nvmx-handle-dynamic-msr-intercept-toggling.patch +can-peak_usb-always-ask-for-berr-reporting-for-pcan-usb-devices.patch +can-mcp251xfd-mcp251xfd_irq-add-missing-can_rx_offload_threaded_irq_finish-in-case-of-bus-off.patch +can-j1939-j1939_tp_cmd_recv-ignore-abort-message-in-the-bam-transport.patch +can-j1939-j1939_can_recv-ignore-messages-with-invalid-source-address.patch +can-j1939-j1939_tp_cmd_recv-check-the-dst-address-of-tp.cm_bam.patch +iio-adc-tsc2046-fix-scan-interval-warning.patch +powerpc-85xx-fix-oops-when-mpc85xx_smp_guts_ids-node-cannot-be-found.patch +io_uring-honour-zeroes-as-io-wq-worker-limits.patch +ring-buffer-protect-ring_buffer_reset-from-reentrancy.patch +serial-core-fix-initializing-and-restoring-termios-speed.patch -- 2.47.2