--- /dev/null
+From 8327bd4fcb6c1dab01ce5c6ff00b42496836dcd2 Mon Sep 17 00:00:00 2001
+From: Varad Gautam <varadgautam@google.com>
+Date: Sun, 30 Mar 2025 16:42:29 +0000
+Subject: asm-generic/io.h: Skip trace helpers if rwmmio events are disabled
+
+From: Varad Gautam <varadgautam@google.com>
+
+commit 8327bd4fcb6c1dab01ce5c6ff00b42496836dcd2 upstream.
+
+With `CONFIG_TRACE_MMIO_ACCESS=y`, the `{read,write}{b,w,l,q}{_relaxed}()`
+mmio accessors unconditionally call `log_{post_}{read,write}_mmio()`
+helpers, which in turn call the ftrace ops for `rwmmio` trace events
+
+This adds a performance penalty per mmio accessor call, even when
+`rwmmio` events are disabled at runtime (~80% overhead on local
+measurement).
+
+Guard these with `tracepoint_enabled()`.
+
+Signed-off-by: Varad Gautam <varadgautam@google.com>
+Fixes: 210031971cdd ("asm-generic/io: Add logging support for MMIO accessors")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/io.h | 98 +++++++++++++++++++++++++++++++----------------
+ 1 file changed, 66 insertions(+), 32 deletions(-)
+
+--- a/include/asm-generic/io.h
++++ b/include/asm-generic/io.h
+@@ -74,6 +74,7 @@
+ #if IS_ENABLED(CONFIG_TRACE_MMIO_ACCESS) && !(defined(__DISABLE_TRACE_MMIO__))
+ #include <linux/tracepoint-defs.h>
+
++#define rwmmio_tracepoint_enabled(tracepoint) tracepoint_enabled(tracepoint)
+ DECLARE_TRACEPOINT(rwmmio_write);
+ DECLARE_TRACEPOINT(rwmmio_post_write);
+ DECLARE_TRACEPOINT(rwmmio_read);
+@@ -90,6 +91,7 @@ void log_post_read_mmio(u64 val, u8 widt
+
+ #else
+
++#define rwmmio_tracepoint_enabled(tracepoint) false
+ static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+ static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
+@@ -188,11 +190,13 @@ static inline u8 readb(const volatile vo
+ {
+ u8 val;
+
+- log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_read))
++ log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __raw_readb(addr);
+ __io_ar(val);
+- log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_read))
++ log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
+ return val;
+ }
+ #endif
+@@ -203,11 +207,13 @@ static inline u16 readw(const volatile v
+ {
+ u16 val;
+
+- log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_read))
++ log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
+ __io_ar(val);
+- log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_read))
++ log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
+ return val;
+ }
+ #endif
+@@ -218,11 +224,13 @@ static inline u32 readl(const volatile v
+ {
+ u32 val;
+
+- log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_read))
++ log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
+ __io_ar(val);
+- log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_read))
++ log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
+ return val;
+ }
+ #endif
+@@ -234,11 +242,13 @@ static inline u64 readq(const volatile v
+ {
+ u64 val;
+
+- log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_read))
++ log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
+ __io_ar(val);
+- log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_read))
++ log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
+ return val;
+ }
+ #endif
+@@ -248,11 +258,13 @@ static inline u64 readq(const volatile v
+ #define writeb writeb
+ static inline void writeb(u8 value, volatile void __iomem *addr)
+ {
+- log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_write))
++ log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writeb(value, addr);
+ __io_aw();
+- log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_write))
++ log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+ }
+ #endif
+
+@@ -260,11 +272,13 @@ static inline void writeb(u8 value, vola
+ #define writew writew
+ static inline void writew(u16 value, volatile void __iomem *addr)
+ {
+- log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_write))
++ log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writew((u16 __force)cpu_to_le16(value), addr);
+ __io_aw();
+- log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_write))
++ log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+ }
+ #endif
+
+@@ -272,11 +286,13 @@ static inline void writew(u16 value, vol
+ #define writel writel
+ static inline void writel(u32 value, volatile void __iomem *addr)
+ {
+- log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_write))
++ log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writel((u32 __force)__cpu_to_le32(value), addr);
+ __io_aw();
+- log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_write))
++ log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+ }
+ #endif
+
+@@ -285,11 +301,13 @@ static inline void writel(u32 value, vol
+ #define writeq writeq
+ static inline void writeq(u64 value, volatile void __iomem *addr)
+ {
+- log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_write))
++ log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
+ __io_aw();
+- log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_write))
++ log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+ }
+ #endif
+ #endif /* CONFIG_64BIT */
+@@ -305,9 +323,11 @@ static inline u8 readb_relaxed(const vol
+ {
+ u8 val;
+
+- log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_read))
++ log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
+ val = __raw_readb(addr);
+- log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_read))
++ log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
+ return val;
+ }
+ #endif
+@@ -318,9 +338,11 @@ static inline u16 readw_relaxed(const vo
+ {
+ u16 val;
+
+- log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_read))
++ log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
+ val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
+- log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_read))
++ log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
+ return val;
+ }
+ #endif
+@@ -331,9 +353,11 @@ static inline u32 readl_relaxed(const vo
+ {
+ u32 val;
+
+- log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_read))
++ log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
+ val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
+- log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_read))
++ log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
+ return val;
+ }
+ #endif
+@@ -344,9 +368,11 @@ static inline u64 readq_relaxed(const vo
+ {
+ u64 val;
+
+- log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_read))
++ log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
+ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
+- log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_read))
++ log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
+ return val;
+ }
+ #endif
+@@ -355,9 +381,11 @@ static inline u64 readq_relaxed(const vo
+ #define writeb_relaxed writeb_relaxed
+ static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
+ {
+- log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_write))
++ log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+ __raw_writeb(value, addr);
+- log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_write))
++ log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+ }
+ #endif
+
+@@ -365,9 +393,11 @@ static inline void writeb_relaxed(u8 val
+ #define writew_relaxed writew_relaxed
+ static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
+ {
+- log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_write))
++ log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+ __raw_writew((u16 __force)cpu_to_le16(value), addr);
+- log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_write))
++ log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+ }
+ #endif
+
+@@ -375,9 +405,11 @@ static inline void writew_relaxed(u16 va
+ #define writel_relaxed writel_relaxed
+ static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
+ {
+- log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_write))
++ log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+ __raw_writel((u32 __force)__cpu_to_le32(value), addr);
+- log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_write))
++ log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+ }
+ #endif
+
+@@ -385,9 +417,11 @@ static inline void writel_relaxed(u32 va
+ #define writeq_relaxed writeq_relaxed
+ static inline void writeq_relaxed(u64 value, volatile void __iomem *addr)
+ {
+- log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_write))
++ log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
+- log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
++ if (rwmmio_tracepoint_enabled(rwmmio_post_write))
++ log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+ }
+ #endif
+
--- /dev/null
+From 68e61f6fd65610e73b17882f86fedfd784d99229 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 11 Jul 2025 10:27:46 -0700
+Subject: KVM: SVM: Emulate PERF_CNTR_GLOBAL_STATUS_SET for PerfMonV2
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 68e61f6fd65610e73b17882f86fedfd784d99229 upstream.
+
+Emulate PERF_CNTR_GLOBAL_STATUS_SET when PerfMonV2 is enumerated to the
+guest, as the MSR is supposed to exist in all AMD v2 PMUs.
+
+Fixes: 4a2771895ca6 ("KVM: x86/svm/pmu: Add AMD PerfMonV2 support")
+Cc: stable@vger.kernel.org
+Cc: Sandipan Das <sandipan.das@amd.com>
+Link: https://lore.kernel.org/r/20250711172746.1579423-1-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h | 1 +
+ arch/x86/kvm/pmu.c | 5 +++++
+ arch/x86/kvm/svm/pmu.c | 1 +
+ arch/x86/kvm/x86.c | 2 ++
+ 4 files changed, 9 insertions(+)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -661,6 +661,7 @@
+ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
+ #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
+ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
++#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303
+
+ /* AMD Last Branch Record MSRs */
+ #define MSR_AMD64_LBR_SELECT 0xc000010e
+--- a/arch/x86/kvm/pmu.c
++++ b/arch/x86/kvm/pmu.c
+@@ -588,6 +588,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcp
+ msr_info->data = pmu->global_ctrl;
+ break;
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ msr_info->data = 0;
+ break;
+@@ -649,6 +650,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcp
+ if (!msr_info->host_initiated)
+ pmu->global_status &= ~data;
+ break;
++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
++ if (!msr_info->host_initiated)
++ pmu->global_status |= data & ~pmu->global_status_rsvd;
++ break;
+ default:
+ kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
+ return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
+--- a/arch/x86/kvm/svm/pmu.c
++++ b/arch/x86/kvm/svm/pmu.c
+@@ -117,6 +117,7 @@ static bool amd_is_valid_msr(struct kvm_
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
+ return pmu->version > 1;
+ default:
+ if (msr > MSR_F15H_PERF_CTR5 &&
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1495,6 +1495,7 @@ static const u32 msrs_to_save_pmu[] = {
+ MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
+ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
+ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
++ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET,
+ };
+
+ static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) +
+@@ -7194,6 +7195,7 @@ static void kvm_probe_msr_to_save(u32 ms
+ case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
+ if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
+ return;
+ break;