--- /dev/null
+From ef9989afda73332df566852d6e9ca695c05f10ce Mon Sep 17 00:00:00 2001
+From: Mark Rutland <mark.rutland@arm.com>
+Date: Tue, 1 Feb 2022 13:29:22 +0000
+Subject: kvm: add guest_state_{enter,exit}_irqoff()
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+commit ef9989afda73332df566852d6e9ca695c05f10ce upstream.
+
+When transitioning to/from guest mode, it is necessary to inform
+lockdep, tracing, and RCU in a specific order, similar to the
+requirements for transitions to/from user mode. Additionally, it is
+necessary to perform vtime accounting for a window around running the
+guest, with RCU enabled, such that timer interrupts taken from the guest
+can be accounted as guest time.
+
+Most architectures don't handle all the necessary pieces, and a have a
+number of common bugs, including unsafe usage of RCU during the window
+between guest_enter() and guest_exit().
+
+On x86, this was dealt with across commits:
+
+ 87fa7f3e98a1310e ("x86/kvm: Move context tracking where it belongs")
+ 0642391e2139a2c1 ("x86/kvm/vmx: Add hardirq tracing to guest enter/exit")
+ 9fc975e9efd03e57 ("x86/kvm/svm: Add hardirq tracing on guest enter/exit")
+ 3ebccdf373c21d86 ("x86/kvm/vmx: Move guest enter/exit into .noinstr.text")
+ 135961e0a7d555fc ("x86/kvm/svm: Move guest enter/exit into .noinstr.text")
+ 160457140187c5fb ("KVM: x86: Defer vtime accounting 'til after IRQ handling")
+ bc908e091b326467 ("KVM: x86: Consolidate guest enter/exit logic to common helpers")
+
+... but those fixes are specific to x86, and as the resulting logic
+(while correct) is split across generic helper functions and
+x86-specific helper functions, it is difficult to see that the
+entry/exit accounting is balanced.
+
+This patch adds generic helpers which architectures can use to handle
+guest entry/exit consistently and correctly. The guest_{enter,exit}()
+helpers are split into guest_timing_{enter,exit}() to perform vtime
+accounting, and guest_context_{enter,exit}() to perform the necessary
+context tracking and RCU management. The existing guest_{enter,exit}()
+heleprs are left as wrappers of these.
+
+Atop this, new guest_state_enter_irqoff() and guest_state_exit_irqoff()
+helpers are added to handle the ordering of lockdep, tracing, and RCU
+manageent. These are inteneded to mirror exit_to_user_mode() and
+enter_from_user_mode().
+
+Subsequent patches will migrate architectures over to the new helpers,
+following a sequence:
+
+ guest_timing_enter_irqoff();
+
+ guest_state_enter_irqoff();
+ < run the vcpu >
+ guest_state_exit_irqoff();
+
+ < take any pending IRQs >
+
+ guest_timing_exit_irqoff();
+
+This sequences handles all of the above correctly, and more clearly
+balances the entry and exit portions, making it easier to understand.
+
+The existing helpers are marked as deprecated, and will be removed once
+all architectures have been converted.
+
+There should be no functional change as a result of this patch.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Marc Zyngier <maz@kernel.org>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+Message-Id: <20220201132926.3301912-2-mark.rutland@arm.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/kvm_host.h | 112 +++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 109 insertions(+), 3 deletions(-)
+
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -15,6 +15,8 @@
+ #include <linux/minmax.h>
+ #include <linux/mm.h>
+ #include <linux/mmu_notifier.h>
++#include <linux/ftrace.h>
++#include <linux/instrumentation.h>
+ #include <linux/preempt.h>
+ #include <linux/msi.h>
+ #include <linux/slab.h>
+@@ -363,8 +365,11 @@ struct kvm_vcpu {
+ int last_used_slot;
+ };
+
+-/* must be called with irqs disabled */
+-static __always_inline void guest_enter_irqoff(void)
++/*
++ * Start accounting time towards a guest.
++ * Must be called before entering guest context.
++ */
++static __always_inline void guest_timing_enter_irqoff(void)
+ {
+ /*
+ * This is running in ioctl context so its safe to assume that it's the
+@@ -373,7 +378,18 @@ static __always_inline void guest_enter_
+ instrumentation_begin();
+ vtime_account_guest_enter();
+ instrumentation_end();
++}
+
++/*
++ * Enter guest context and enter an RCU extended quiescent state.
++ *
++ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
++ * unsafe to use any code which may directly or indirectly use RCU, tracing
++ * (including IRQ flag tracing), or lockdep. All code in this period must be
++ * non-instrumentable.
++ */
++static __always_inline void guest_context_enter_irqoff(void)
++{
+ /*
+ * KVM does not hold any references to rcu protected data when it
+ * switches CPU into a guest mode. In fact switching to a guest mode
+@@ -389,16 +405,79 @@ static __always_inline void guest_enter_
+ }
+ }
+
+-static __always_inline void guest_exit_irqoff(void)
++/*
++ * Deprecated. Architectures should move to guest_timing_enter_irqoff() and
++ * guest_state_enter_irqoff().
++ */
++static __always_inline void guest_enter_irqoff(void)
++{
++ guest_timing_enter_irqoff();
++ guest_context_enter_irqoff();
++}
++
++/**
++ * guest_state_enter_irqoff - Fixup state when entering a guest
++ *
++ * Entry to a guest will enable interrupts, but the kernel state is interrupts
++ * disabled when this is invoked. Also tell RCU about it.
++ *
++ * 1) Trace interrupts on state
++ * 2) Invoke context tracking if enabled to adjust RCU state
++ * 3) Tell lockdep that interrupts are enabled
++ *
++ * Invoked from architecture specific code before entering a guest.
++ * Must be called with interrupts disabled and the caller must be
++ * non-instrumentable.
++ * The caller has to invoke guest_timing_enter_irqoff() before this.
++ *
++ * Note: this is analogous to exit_to_user_mode().
++ */
++static __always_inline void guest_state_enter_irqoff(void)
++{
++ instrumentation_begin();
++ trace_hardirqs_on_prepare();
++ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++ instrumentation_end();
++
++ guest_context_enter_irqoff();
++ lockdep_hardirqs_on(CALLER_ADDR0);
++}
++
++/*
++ * Exit guest context and exit an RCU extended quiescent state.
++ *
++ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
++ * unsafe to use any code which may directly or indirectly use RCU, tracing
++ * (including IRQ flag tracing), or lockdep. All code in this period must be
++ * non-instrumentable.
++ */
++static __always_inline void guest_context_exit_irqoff(void)
+ {
+ context_tracking_guest_exit();
++}
+
++/*
++ * Stop accounting time towards a guest.
++ * Must be called after exiting guest context.
++ */
++static __always_inline void guest_timing_exit_irqoff(void)
++{
+ instrumentation_begin();
+ /* Flush the guest cputime we spent on the guest */
+ vtime_account_guest_exit();
+ instrumentation_end();
+ }
+
++/*
++ * Deprecated. Architectures should move to guest_state_exit_irqoff() and
++ * guest_timing_exit_irqoff().
++ */
++static __always_inline void guest_exit_irqoff(void)
++{
++ guest_context_exit_irqoff();
++ guest_timing_exit_irqoff();
++}
++
+ static inline void guest_exit(void)
+ {
+ unsigned long flags;
+@@ -408,6 +487,33 @@ static inline void guest_exit(void)
+ local_irq_restore(flags);
+ }
+
++/**
++ * guest_state_exit_irqoff - Establish state when returning from guest mode
++ *
++ * Entry from a guest disables interrupts, but guest mode is traced as
++ * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
++ *
++ * 1) Tell lockdep that interrupts are disabled
++ * 2) Invoke context tracking if enabled to reactivate RCU
++ * 3) Trace interrupts off state
++ *
++ * Invoked from architecture specific code after exiting a guest.
++ * Must be invoked with interrupts disabled and the caller must be
++ * non-instrumentable.
++ * The caller has to invoke guest_timing_exit_irqoff() after this.
++ *
++ * Note: this is analogous to enter_from_user_mode().
++ */
++static __always_inline void guest_state_exit_irqoff(void)
++{
++ lockdep_hardirqs_off(CALLER_ADDR0);
++ guest_context_exit_irqoff();
++
++ instrumentation_begin();
++ trace_hardirqs_off_finish();
++ instrumentation_end();
++}
++
+ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
+ {
+ /*