From: Greg Kroah-Hartman Date: Wed, 15 Oct 2025 11:09:08 +0000 (+0200) Subject: 6.17-stable patches X-Git-Tag: v5.15.195~111 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=edb096bd2e783fbc5271df37521fb4ffade072e6;p=thirdparty%2Fkernel%2Fstable-queue.git 6.17-stable patches added patches: asm-generic-io.h-skip-trace-helpers-if-rwmmio-events-are-disabled.patch clocksource-drivers-clps711x-fix-resource-leaks-in-error-paths.patch cpufreq-make-drivers-using-cpufreq_eternal-specify-transition-latency.patch dma-mapping-fix-direction-in-dma_alloc-direction-traces.patch iio-frequency-adf4350-fix-adf4350_reg3_12bit_clkdiv_mode.patch kvm-svm-emulate-perf_cntr_global_status_set-for-perfmonv2.patch kvm-x86-add-helper-to-retrieve-current-value-of-user-return-msr.patch listmount-don-t-call-path_put-under-namespace-semaphore.patch media-v4l2-subdev-fix-alloc-failure-check-in-v4l2_subdev_call_state_try.patch memcg-skip-cgroup_file_notify-if-spinning-is-not-allowed.patch nfsd-unregister-with-rpcbind-when-deleting-a-transport.patch page_pool-fix-pp_magic_mask-to-avoid-crashing-on-some-32-bit-arches.patch pm-runtime-update-kerneldoc-return-codes.patch statmount-don-t-call-path_put-under-namespace-semaphore.patch --- diff --git a/queue-6.17/asm-generic-io.h-skip-trace-helpers-if-rwmmio-events-are-disabled.patch b/queue-6.17/asm-generic-io.h-skip-trace-helpers-if-rwmmio-events-are-disabled.patch new file mode 100644 index 0000000000..e1ff570505 --- /dev/null +++ b/queue-6.17/asm-generic-io.h-skip-trace-helpers-if-rwmmio-events-are-disabled.patch @@ -0,0 +1,286 @@ +From 8327bd4fcb6c1dab01ce5c6ff00b42496836dcd2 Mon Sep 17 00:00:00 2001 +From: Varad Gautam +Date: Sun, 30 Mar 2025 16:42:29 +0000 +Subject: asm-generic/io.h: Skip trace helpers if rwmmio events are disabled + +From: Varad Gautam + +commit 8327bd4fcb6c1dab01ce5c6ff00b42496836dcd2 upstream. + +With `CONFIG_TRACE_MMIO_ACCESS=y`, the `{read,write}{b,w,l,q}{_relaxed}()` +mmio accessors unconditionally call `log_{post_}{read,write}_mmio()` +helpers, which in turn call the ftrace ops for `rwmmio` trace events + +This adds a performance penalty per mmio accessor call, even when +`rwmmio` events are disabled at runtime (~80% overhead on local +measurement). + +Guard these with `tracepoint_enabled()`. + +Signed-off-by: Varad Gautam +Fixes: 210031971cdd ("asm-generic/io: Add logging support for MMIO accessors") +Cc: stable@vger.kernel.org +Signed-off-by: Arnd Bergmann +Signed-off-by: Greg Kroah-Hartman +--- + include/asm-generic/io.h | 98 +++++++++++++++++++++++++++++++---------------- + 1 file changed, 66 insertions(+), 32 deletions(-) + +--- a/include/asm-generic/io.h ++++ b/include/asm-generic/io.h +@@ -75,6 +75,7 @@ + #if IS_ENABLED(CONFIG_TRACE_MMIO_ACCESS) && !(defined(__DISABLE_TRACE_MMIO__)) + #include + ++#define rwmmio_tracepoint_enabled(tracepoint) tracepoint_enabled(tracepoint) + DECLARE_TRACEPOINT(rwmmio_write); + DECLARE_TRACEPOINT(rwmmio_post_write); + DECLARE_TRACEPOINT(rwmmio_read); +@@ -91,6 +92,7 @@ void log_post_read_mmio(u64 val, u8 widt + + #else + ++#define rwmmio_tracepoint_enabled(tracepoint) false + static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} + static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, +@@ -189,11 +191,13 @@ static inline u8 readb(const volatile vo + { + u8 val; + +- log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_read)) ++ log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __raw_readb(addr); + __io_ar(val); +- log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_read)) ++ log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; + } + #endif +@@ -204,11 +208,13 @@ static inline u16 readw(const volatile v + { + u16 val; + +- log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_read)) ++ log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + __io_ar(val); +- log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_read)) ++ log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; + } + #endif +@@ -219,11 +225,13 @@ static inline u32 readl(const volatile v + { + u32 val; + +- log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_read)) ++ log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + __io_ar(val); +- log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_read)) ++ log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; + } + #endif +@@ -235,11 +243,13 @@ static inline u64 readq(const volatile v + { + u64 val; + +- log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_read)) ++ log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + __io_ar(val); +- log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_read)) ++ log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; + } + #endif +@@ -249,11 +259,13 @@ static inline u64 readq(const volatile v + #define writeb writeb + static inline void writeb(u8 value, volatile void __iomem *addr) + { +- log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_write)) ++ log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeb(value, addr); + __io_aw(); +- log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_write)) ++ log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + } + #endif + +@@ -261,11 +273,13 @@ static inline void writeb(u8 value, vola + #define writew writew + static inline void writew(u16 value, volatile void __iomem *addr) + { +- log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_write)) ++ log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + __io_aw(); +- log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_write)) ++ log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + } + #endif + +@@ -273,11 +287,13 @@ static inline void writew(u16 value, vol + #define writel writel + static inline void writel(u32 value, volatile void __iomem *addr) + { +- log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_write)) ++ log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + __io_aw(); +- log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_write)) ++ log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + } + #endif + +@@ -286,11 +302,13 @@ static inline void writel(u32 value, vol + #define writeq writeq + static inline void writeq(u64 value, volatile void __iomem *addr) + { +- log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_write)) ++ log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + __io_aw(); +- log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_write)) ++ log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + } + #endif + #endif /* CONFIG_64BIT */ +@@ -306,9 +324,11 @@ static inline u8 readb_relaxed(const vol + { + u8 val; + +- log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_read)) ++ log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + val = __raw_readb(addr); +- log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_read)) ++ log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; + } + #endif +@@ -319,9 +339,11 @@ static inline u16 readw_relaxed(const vo + { + u16 val; + +- log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_read)) ++ log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); +- log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_read)) ++ log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; + } + #endif +@@ -332,9 +354,11 @@ static inline u32 readl_relaxed(const vo + { + u32 val; + +- log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_read)) ++ log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); +- log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_read)) ++ log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; + } + #endif +@@ -345,9 +369,11 @@ static inline u64 readq_relaxed(const vo + { + u64 val; + +- log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_read)) ++ log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); +- log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_read)) ++ log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; + } + #endif +@@ -356,9 +382,11 @@ static inline u64 readq_relaxed(const vo + #define writeb_relaxed writeb_relaxed + static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) + { +- log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_write)) ++ log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __raw_writeb(value, addr); +- log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_write)) ++ log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + } + #endif + +@@ -366,9 +394,11 @@ static inline void writeb_relaxed(u8 val + #define writew_relaxed writew_relaxed + static inline void writew_relaxed(u16 value, volatile void __iomem *addr) + { +- log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_write)) ++ log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __raw_writew((u16 __force)cpu_to_le16(value), addr); +- log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_write)) ++ log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + } + #endif + +@@ -376,9 +406,11 @@ static inline void writew_relaxed(u16 va + #define writel_relaxed writel_relaxed + static inline void writel_relaxed(u32 value, volatile void __iomem *addr) + { +- log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_write)) ++ log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); +- log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_write)) ++ log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + } + #endif + +@@ -386,9 +418,11 @@ static inline void writel_relaxed(u32 va + #define writeq_relaxed writeq_relaxed + static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) + { +- log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_write)) ++ log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); +- log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); ++ if (rwmmio_tracepoint_enabled(rwmmio_post_write)) ++ log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + } + #endif + diff --git a/queue-6.17/clocksource-drivers-clps711x-fix-resource-leaks-in-error-paths.patch b/queue-6.17/clocksource-drivers-clps711x-fix-resource-leaks-in-error-paths.patch new file mode 100644 index 0000000000..15590d7f71 --- /dev/null +++ b/queue-6.17/clocksource-drivers-clps711x-fix-resource-leaks-in-error-paths.patch @@ -0,0 +1,67 @@ +From cd32e596f02fc981674573402c1138f616df1728 Mon Sep 17 00:00:00 2001 +From: Zhen Ni +Date: Thu, 14 Aug 2025 20:33:24 +0800 +Subject: clocksource/drivers/clps711x: Fix resource leaks in error paths + +From: Zhen Ni + +commit cd32e596f02fc981674573402c1138f616df1728 upstream. + +The current implementation of clps711x_timer_init() has multiple error +paths that directly return without releasing the base I/O memory mapped +via of_iomap(). Fix of_iomap leaks in error paths. + +Fixes: 04410efbb6bc ("clocksource/drivers/clps711x: Convert init function to return error") +Fixes: 2a6a8e2d9004 ("clocksource/drivers/clps711x: Remove board support") +Signed-off-by: Zhen Ni +Signed-off-by: Daniel Lezcano +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20250814123324.1516495-1-zhen.ni@easystack.cn +Signed-off-by: Greg Kroah-Hartman +--- + drivers/clocksource/clps711x-timer.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +--- a/drivers/clocksource/clps711x-timer.c ++++ b/drivers/clocksource/clps711x-timer.c +@@ -78,24 +78,33 @@ static int __init clps711x_timer_init(st + unsigned int irq = irq_of_parse_and_map(np, 0); + struct clk *clock = of_clk_get(np, 0); + void __iomem *base = of_iomap(np, 0); ++ int ret = 0; + + if (!base) + return -ENOMEM; +- if (!irq) +- return -EINVAL; +- if (IS_ERR(clock)) +- return PTR_ERR(clock); ++ if (!irq) { ++ ret = -EINVAL; ++ goto unmap_io; ++ } ++ if (IS_ERR(clock)) { ++ ret = PTR_ERR(clock); ++ goto unmap_io; ++ } + + switch (of_alias_get_id(np, "timer")) { + case CLPS711X_CLKSRC_CLOCKSOURCE: + clps711x_clksrc_init(clock, base); + break; + case CLPS711X_CLKSRC_CLOCKEVENT: +- return _clps711x_clkevt_init(clock, base, irq); ++ ret = _clps711x_clkevt_init(clock, base, irq); ++ break; + default: +- return -EINVAL; ++ ret = -EINVAL; ++ break; + } + +- return 0; ++unmap_io: ++ iounmap(base); ++ return ret; + } + TIMER_OF_DECLARE(clps711x, "cirrus,ep7209-timer", clps711x_timer_init); diff --git a/queue-6.17/cpufreq-make-drivers-using-cpufreq_eternal-specify-transition-latency.patch b/queue-6.17/cpufreq-make-drivers-using-cpufreq_eternal-specify-transition-latency.patch new file mode 100644 index 0000000000..9bed96718c --- /dev/null +++ b/queue-6.17/cpufreq-make-drivers-using-cpufreq_eternal-specify-transition-latency.patch @@ -0,0 +1,179 @@ +From f97aef092e199c10a3da96ae79b571edd5362faa Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Fri, 26 Sep 2025 12:12:37 +0200 +Subject: cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency + +From: Rafael J. Wysocki + +commit f97aef092e199c10a3da96ae79b571edd5362faa upstream. + +Commit a755d0e2d41b ("cpufreq: Honour transition_latency over +transition_delay_us") caused platforms where cpuinfo.transition_latency +is CPUFREQ_ETERNAL to get a very large transition latency whereas +previously it had been capped at 10 ms (and later at 2 ms). + +This led to a user-observable regression between 6.6 and 6.12 as +described by Shawn: + +"The dbs sampling_rate was 10000 us on 6.6 and suddently becomes + 6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these platforms + because the default transition delay was dropped [...]. + + It slows down dbs governor's reacting to CPU loading change + dramatically. Also, as transition_delay_us is used by schedutil + governor as rate_limit_us, it shows a negative impact on device + idle power consumption, because the device gets slightly less time + in the lowest OPP." + +Evidently, the expectation of the drivers using CPUFREQ_ETERNAL as +cpuinfo.transition_latency was that it would be capped by the core, +but they may as well return a default transition latency value instead +of CPUFREQ_ETERNAL and the core need not do anything with it. + +Accordingly, introduce CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS and make +all of the drivers in question use it instead of CPUFREQ_ETERNAL. Also +update the related Rust binding. + +Fixes: a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us") +Closes: https://lore.kernel.org/linux-pm/20250922125929.453444-1-shawnguo2@yeah.net/ +Reported-by: Shawn Guo +Reviewed-by: Mario Limonciello (AMD) +Reviewed-by: Jie Zhan +Acked-by: Viresh Kumar +Cc: 6.6+ # 6.6+ +Signed-off-by: Rafael J. Wysocki +Link: https://patch.msgid.link/2264949.irdbgypaU6@rafael.j.wysocki +[ rjw: Fix typo in new symbol name, drop redundant type cast from Rust binding ] +Tested-by: Shawn Guo # with cpufreq-dt driver +Reviewed-by: Qais Yousef +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/cpufreq-dt.c | 2 +- + drivers/cpufreq/imx6q-cpufreq.c | 2 +- + drivers/cpufreq/mediatek-cpufreq-hw.c | 2 +- + drivers/cpufreq/rcpufreq_dt.rs | 2 +- + drivers/cpufreq/scmi-cpufreq.c | 2 +- + drivers/cpufreq/scpi-cpufreq.c | 2 +- + drivers/cpufreq/spear-cpufreq.c | 2 +- + include/linux/cpufreq.h | 3 +++ + rust/kernel/cpufreq.rs | 7 ++++--- + 9 files changed, 14 insertions(+), 10 deletions(-) + +--- a/drivers/cpufreq/cpufreq-dt.c ++++ b/drivers/cpufreq/cpufreq-dt.c +@@ -104,7 +104,7 @@ static int cpufreq_init(struct cpufreq_p + + transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); + if (!transition_latency) +- transition_latency = CPUFREQ_ETERNAL; ++ transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; + + cpumask_copy(policy->cpus, priv->cpus); + policy->driver_data = priv; +--- a/drivers/cpufreq/imx6q-cpufreq.c ++++ b/drivers/cpufreq/imx6q-cpufreq.c +@@ -442,7 +442,7 @@ soc_opp_out: + } + + if (of_property_read_u32(np, "clock-latency", &transition_latency)) +- transition_latency = CPUFREQ_ETERNAL; ++ transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; + + /* + * Calculate the ramp time for max voltage change in the +--- a/drivers/cpufreq/mediatek-cpufreq-hw.c ++++ b/drivers/cpufreq/mediatek-cpufreq-hw.c +@@ -238,7 +238,7 @@ static int mtk_cpufreq_hw_cpu_init(struc + + latency = readl_relaxed(data->reg_bases[REG_FREQ_LATENCY]) * 1000; + if (!latency) +- latency = CPUFREQ_ETERNAL; ++ latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; + + policy->cpuinfo.transition_latency = latency; + policy->fast_switch_possible = true; +--- a/drivers/cpufreq/rcpufreq_dt.rs ++++ b/drivers/cpufreq/rcpufreq_dt.rs +@@ -123,7 +123,7 @@ impl cpufreq::Driver for CPUFreqDTDriver + + let mut transition_latency = opp_table.max_transition_latency_ns() as u32; + if transition_latency == 0 { +- transition_latency = cpufreq::ETERNAL_LATENCY_NS; ++ transition_latency = cpufreq::DEFAULT_TRANSITION_LATENCY_NS; + } + + policy +--- a/drivers/cpufreq/scmi-cpufreq.c ++++ b/drivers/cpufreq/scmi-cpufreq.c +@@ -294,7 +294,7 @@ static int scmi_cpufreq_init(struct cpuf + + latency = perf_ops->transition_latency_get(ph, domain); + if (!latency) +- latency = CPUFREQ_ETERNAL; ++ latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; + + policy->cpuinfo.transition_latency = latency; + +--- a/drivers/cpufreq/scpi-cpufreq.c ++++ b/drivers/cpufreq/scpi-cpufreq.c +@@ -157,7 +157,7 @@ static int scpi_cpufreq_init(struct cpuf + + latency = scpi_ops->get_transition_latency(cpu_dev); + if (!latency) +- latency = CPUFREQ_ETERNAL; ++ latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; + + policy->cpuinfo.transition_latency = latency; + +--- a/drivers/cpufreq/spear-cpufreq.c ++++ b/drivers/cpufreq/spear-cpufreq.c +@@ -182,7 +182,7 @@ static int spear_cpufreq_probe(struct pl + + if (of_property_read_u32(np, "clock-latency", + &spear_cpufreq.transition_latency)) +- spear_cpufreq.transition_latency = CPUFREQ_ETERNAL; ++ spear_cpufreq.transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; + + cnt = of_property_count_u32_elems(np, "cpufreq_tbl"); + if (cnt <= 0) { +--- a/include/linux/cpufreq.h ++++ b/include/linux/cpufreq.h +@@ -32,6 +32,9 @@ + */ + + #define CPUFREQ_ETERNAL (-1) ++ ++#define CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS NSEC_PER_MSEC ++ + #define CPUFREQ_NAME_LEN 16 + /* Print length for names. Extra 1 space for accommodating '\n' in prints */ + #define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1) +--- a/rust/kernel/cpufreq.rs ++++ b/rust/kernel/cpufreq.rs +@@ -39,7 +39,8 @@ use macros::vtable; + const CPUFREQ_NAME_LEN: usize = bindings::CPUFREQ_NAME_LEN as usize; + + /// Default transition latency value in nanoseconds. +-pub const ETERNAL_LATENCY_NS: u32 = bindings::CPUFREQ_ETERNAL as u32; ++pub const DEFAULT_TRANSITION_LATENCY_NS: u32 = ++ bindings::CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; + + /// CPU frequency driver flags. + pub mod flags { +@@ -400,13 +401,13 @@ impl TableBuilder { + /// The following example demonstrates how to create a CPU frequency table. + /// + /// ``` +-/// use kernel::cpufreq::{ETERNAL_LATENCY_NS, Policy}; ++/// use kernel::cpufreq::{DEFAULT_TRANSITION_LATENCY_NS, Policy}; + /// + /// fn update_policy(policy: &mut Policy) { + /// policy + /// .set_dvfs_possible_from_any_cpu(true) + /// .set_fast_switch_possible(true) +-/// .set_transition_latency_ns(ETERNAL_LATENCY_NS); ++/// .set_transition_latency_ns(DEFAULT_TRANSITION_LATENCY_NS); + /// + /// pr_info!("The policy details are: {:?}\n", (policy.cpu(), policy.cur())); + /// } diff --git a/queue-6.17/cxl-acpi-hmat-update-cxl-access-coordinates-directly-instead-of-through-hmat.patch b/queue-6.17/cxl-acpi-hmat-update-cxl-access-coordinates-directly-instead-of-through-hmat.patch deleted file mode 100644 index e94eb9f7b9..0000000000 --- a/queue-6.17/cxl-acpi-hmat-update-cxl-access-coordinates-directly-instead-of-through-hmat.patch +++ /dev/null @@ -1,172 +0,0 @@ -From 2e454fb8056df6da4bba7d89a57bf60e217463c0 Mon Sep 17 00:00:00 2001 -From: Dave Jiang -Date: Fri, 29 Aug 2025 15:29:06 -0700 -Subject: cxl, acpi/hmat: Update CXL access coordinates directly instead of through HMAT - -From: Dave Jiang - -commit 2e454fb8056df6da4bba7d89a57bf60e217463c0 upstream. - -The current implementation of CXL memory hotplug notifier gets called -before the HMAT memory hotplug notifier. The CXL driver calculates the -access coordinates (bandwidth and latency values) for the CXL end to -end path (i.e. CPU to endpoint). When the CXL region is onlined, the CXL -memory hotplug notifier writes the access coordinates to the HMAT target -structs. Then the HMAT memory hotplug notifier is called and it creates -the access coordinates for the node sysfs attributes. - -During testing on an Intel platform, it was found that although the -newly calculated coordinates were pushed to sysfs, the sysfs attributes for -the access coordinates showed up with the wrong initiator. The system has -4 nodes (0, 1, 2, 3) where node 0 and 1 are CPU nodes and node 2 and 3 are -CXL nodes. The expectation is that node 2 would show up as a target to node -0: -/sys/devices/system/node/node2/access0/initiators/node0 - -However it was observed that node 2 showed up as a target under node 1: -/sys/devices/system/node/node2/access0/initiators/node1 - -The original intent of the 'ext_updated' flag in HMAT handling code was to -stop HMAT memory hotplug callback from clobbering the access coordinates -after CXL has injected its calculated coordinates and replaced the generic -target access coordinates provided by the HMAT table in the HMAT target -structs. However the flag is hacky at best and blocks the updates from -other CXL regions that are onlined in the same node later on. Remove the -'ext_updated' flag usage and just update the access coordinates for the -nodes directly without touching HMAT target data. - -The hotplug memory callback ordering is changed. Instead of changing CXL, -move HMAT back so there's room for the levels rather than have CXL share -the same level as SLAB_CALLBACK_PRI. The change will resulting in the CXL -callback to be executed after the HMAT callback. - -With the change, the CXL hotplug memory notifier runs after the HMAT -callback. The HMAT callback will create the node sysfs attributes for -access coordinates. The CXL callback will write the access coordinates to -the now created node sysfs attributes directly and will not pollute the -HMAT target values. - -A nodemask is introduced to keep track if a node has been updated and -prevents further updates. - -Fixes: 067353a46d8c ("cxl/region: Add memory hotplug notifier for cxl region") -Cc: stable@vger.kernel.org -Tested-by: Marc Herbert -Reviewed-by: Dan Williams -Reviewed-by: Jonathan Cameron -Link: https://patch.msgid.link/20250829222907.1290912-4-dave.jiang@intel.com -Signed-off-by: Dave Jiang -Signed-off-by: Greg Kroah-Hartman ---- - drivers/acpi/numa/hmat.c | 6 ------ - drivers/cxl/core/cdat.c | 5 ----- - drivers/cxl/core/core.h | 1 - - drivers/cxl/core/region.c | 20 ++++++++++++-------- - include/linux/memory.h | 2 +- - 5 files changed, 13 insertions(+), 21 deletions(-) - ---- a/drivers/acpi/numa/hmat.c -+++ b/drivers/acpi/numa/hmat.c -@@ -74,7 +74,6 @@ struct memory_target { - struct node_cache_attrs cache_attrs; - u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE]; - bool registered; -- bool ext_updated; /* externally updated */ - }; - - struct memory_initiator { -@@ -391,7 +390,6 @@ int hmat_update_target_coordinates(int n - coord->read_bandwidth, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH, - coord->write_bandwidth, access); -- target->ext_updated = true; - - return 0; - } -@@ -773,10 +771,6 @@ static void hmat_update_target_attrs(str - u32 best = 0; - int i; - -- /* Don't update if an external agent has changed the data. */ -- if (target->ext_updated) -- return; -- - /* Don't update for generic port if there's no device handle */ - if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL || - access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) && ---- a/drivers/cxl/core/cdat.c -+++ b/drivers/cxl/core/cdat.c -@@ -1081,8 +1081,3 @@ int cxl_update_hmat_access_coordinates(i - { - return hmat_update_target_coordinates(nid, &cxlr->coord[access], access); - } -- --bool cxl_need_node_perf_attrs_update(int nid) --{ -- return !acpi_node_backed_by_real_pxm(nid); --} ---- a/drivers/cxl/core/core.h -+++ b/drivers/cxl/core/core.h -@@ -139,7 +139,6 @@ long cxl_pci_get_latency(struct pci_dev - int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); - int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access); --bool cxl_need_node_perf_attrs_update(int nid); - int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, - struct access_coordinate *c); - ---- a/drivers/cxl/core/region.c -+++ b/drivers/cxl/core/region.c -@@ -30,6 +30,12 @@ - * 3. Decoder targets - */ - -+/* -+ * nodemask that sets per node when the access_coordinates for the node has -+ * been updated by the CXL memory hotplug notifier. -+ */ -+static nodemask_t nodemask_region_seen = NODE_MASK_NONE; -+ - static struct cxl_region *to_cxl_region(struct device *dev); - - #define __ACCESS_ATTR_RO(_level, _name) { \ -@@ -2442,14 +2448,8 @@ static bool cxl_region_update_coordinate - - for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { - if (cxlr->coord[i].read_bandwidth) { -- rc = 0; -- if (cxl_need_node_perf_attrs_update(nid)) -- node_set_perf_attrs(nid, &cxlr->coord[i], i); -- else -- rc = cxl_update_hmat_access_coordinates(nid, cxlr, i); -- -- if (rc == 0) -- cset++; -+ node_update_perf_attrs(nid, &cxlr->coord[i], i); -+ cset++; - } - } - -@@ -2487,6 +2487,10 @@ static int cxl_region_perf_attrs_callbac - if (nid != region_nid) - return NOTIFY_DONE; - -+ /* No action needed if node bit already set */ -+ if (node_test_and_set(nid, nodemask_region_seen)) -+ return NOTIFY_DONE; -+ - if (!cxl_region_update_coordinates(cxlr, nid)) - return NOTIFY_DONE; - ---- a/include/linux/memory.h -+++ b/include/linux/memory.h -@@ -120,8 +120,8 @@ struct mem_section; - */ - #define DEFAULT_CALLBACK_PRI 0 - #define SLAB_CALLBACK_PRI 1 --#define HMAT_CALLBACK_PRI 2 - #define CXL_CALLBACK_PRI 5 -+#define HMAT_CALLBACK_PRI 6 - #define MM_COMPUTE_BATCH_PRI 10 - #define CPUSET_CALLBACK_PRI 10 - #define MEMTIER_HOTPLUG_PRI 100 diff --git a/queue-6.17/dma-mapping-fix-direction-in-dma_alloc-direction-traces.patch b/queue-6.17/dma-mapping-fix-direction-in-dma_alloc-direction-traces.patch new file mode 100644 index 0000000000..b9f91b37fa --- /dev/null +++ b/queue-6.17/dma-mapping-fix-direction-in-dma_alloc-direction-traces.patch @@ -0,0 +1,34 @@ +From 16abbabc004bedeeaa702e11913da9d4fa70e63a Mon Sep 17 00:00:00 2001 +From: Petr Tesarik +Date: Wed, 1 Oct 2025 08:10:28 +0200 +Subject: dma-mapping: fix direction in dma_alloc direction traces + +From: Petr Tesarik + +commit 16abbabc004bedeeaa702e11913da9d4fa70e63a upstream. + +Set __entry->dir to the actual "dir" parameter of all trace events +in dma_alloc_class. This struct member was left uninitialized by +mistake. + +Signed-off-by: Petr Tesarik +Fixes: 3afff779a725 ("dma-mapping: trace dma_alloc/free direction") +Cc: stable@vger.kernel.org +Reviewed-by: Sean Anderson +Signed-off-by: Marek Szyprowski +Link: https://lore.kernel.org/r/20251001061028.412258-1-ptesarik@suse.com +Signed-off-by: Greg Kroah-Hartman +--- + include/trace/events/dma.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/include/trace/events/dma.h ++++ b/include/trace/events/dma.h +@@ -134,6 +134,7 @@ DECLARE_EVENT_CLASS(dma_alloc_class, + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->flags = flags; ++ __entry->dir = dir; + __entry->attrs = attrs; + ), + diff --git a/queue-6.17/iio-frequency-adf4350-fix-adf4350_reg3_12bit_clkdiv_mode.patch b/queue-6.17/iio-frequency-adf4350-fix-adf4350_reg3_12bit_clkdiv_mode.patch new file mode 100644 index 0000000000..f9badfca3a --- /dev/null +++ b/queue-6.17/iio-frequency-adf4350-fix-adf4350_reg3_12bit_clkdiv_mode.patch @@ -0,0 +1,37 @@ +From 1d8fdabe19267338f29b58f968499e5b55e6a3b6 Mon Sep 17 00:00:00 2001 +From: Michael Hennerich +Date: Fri, 29 Aug 2025 12:25:43 +0100 +Subject: iio: frequency: adf4350: Fix ADF4350_REG3_12BIT_CLKDIV_MODE +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Michael Hennerich + +commit 1d8fdabe19267338f29b58f968499e5b55e6a3b6 upstream. + +The clk div bits (2 bits wide) do not start in bit 16 but in bit 15. Fix it +accordingly. + +Fixes: e31166f0fd48 ("iio: frequency: New driver for Analog Devices ADF4350/ADF4351 Wideband Synthesizers") +Signed-off-by: Michael Hennerich +Signed-off-by: Nuno Sá +Link: https://patch.msgid.link/20250829-adf4350-fix-v2-2-0bf543ba797d@analog.com +Cc: +Signed-off-by: Jonathan Cameron +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/iio/frequency/adf4350.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/iio/frequency/adf4350.h ++++ b/include/linux/iio/frequency/adf4350.h +@@ -51,7 +51,7 @@ + + /* REG3 Bit Definitions */ + #define ADF4350_REG3_12BIT_CLKDIV(x) ((x) << 3) +-#define ADF4350_REG3_12BIT_CLKDIV_MODE(x) ((x) << 16) ++#define ADF4350_REG3_12BIT_CLKDIV_MODE(x) ((x) << 15) + #define ADF4350_REG3_12BIT_CSR_EN (1 << 18) + #define ADF4351_REG3_CHARGE_CANCELLATION_EN (1 << 21) + #define ADF4351_REG3_ANTI_BACKLASH_3ns_EN (1 << 22) diff --git a/queue-6.17/kvm-svm-emulate-perf_cntr_global_status_set-for-perfmonv2.patch b/queue-6.17/kvm-svm-emulate-perf_cntr_global_status_set-for-perfmonv2.patch new file mode 100644 index 0000000000..a7fa9844c8 --- /dev/null +++ b/queue-6.17/kvm-svm-emulate-perf_cntr_global_status_set-for-perfmonv2.patch @@ -0,0 +1,84 @@ +From 68e61f6fd65610e73b17882f86fedfd784d99229 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 11 Jul 2025 10:27:46 -0700 +Subject: KVM: SVM: Emulate PERF_CNTR_GLOBAL_STATUS_SET for PerfMonV2 + +From: Sean Christopherson + +commit 68e61f6fd65610e73b17882f86fedfd784d99229 upstream. + +Emulate PERF_CNTR_GLOBAL_STATUS_SET when PerfMonV2 is enumerated to the +guest, as the MSR is supposed to exist in all AMD v2 PMUs. + +Fixes: 4a2771895ca6 ("KVM: x86/svm/pmu: Add AMD PerfMonV2 support") +Cc: stable@vger.kernel.org +Cc: Sandipan Das +Link: https://lore.kernel.org/r/20250711172746.1579423-1-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/msr-index.h | 1 + + arch/x86/kvm/pmu.c | 5 +++++ + arch/x86/kvm/svm/pmu.c | 1 + + arch/x86/kvm/x86.c | 2 ++ + 4 files changed, 9 insertions(+) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -733,6 +733,7 @@ + #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 + #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 + #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 ++#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303 + + /* AMD Hardware Feedback Support MSRs */ + #define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 +--- a/arch/x86/kvm/pmu.c ++++ b/arch/x86/kvm/pmu.c +@@ -650,6 +650,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcp + msr_info->data = pmu->global_ctrl; + break; + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: ++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + msr_info->data = 0; + break; +@@ -711,6 +712,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcp + if (!msr_info->host_initiated) + pmu->global_status &= ~data; + break; ++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: ++ if (!msr_info->host_initiated) ++ pmu->global_status |= data & ~pmu->global_status_rsvd; ++ break; + default: + kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); + return kvm_pmu_call(set_msr)(vcpu, msr_info); +--- a/arch/x86/kvm/svm/pmu.c ++++ b/arch/x86/kvm/svm/pmu.c +@@ -113,6 +113,7 @@ static bool amd_is_valid_msr(struct kvm_ + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: ++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: + return pmu->version > 1; + default: + if (msr > MSR_F15H_PERF_CTR5 && +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -367,6 +367,7 @@ static const u32 msrs_to_save_pmu[] = { + MSR_AMD64_PERF_CNTR_GLOBAL_CTL, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, ++ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET, + }; + + static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + +@@ -7359,6 +7360,7 @@ static void kvm_probe_msr_to_save(u32 ms + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: ++ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: + if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) + return; + break; diff --git a/queue-6.17/kvm-x86-add-helper-to-retrieve-current-value-of-user-return-msr.patch b/queue-6.17/kvm-x86-add-helper-to-retrieve-current-value-of-user-return-msr.patch new file mode 100644 index 0000000000..c9ca1e9d6a --- /dev/null +++ b/queue-6.17/kvm-x86-add-helper-to-retrieve-current-value-of-user-return-msr.patch @@ -0,0 +1,52 @@ +From 9bc366350734246301b090802fc71f9924daad39 Mon Sep 17 00:00:00 2001 +From: Hou Wenlong +Date: Tue, 23 Sep 2025 08:37:37 -0700 +Subject: KVM: x86: Add helper to retrieve current value of user return MSR + +From: Hou Wenlong + +commit 9bc366350734246301b090802fc71f9924daad39 upstream. + +In the user return MSR support, the cached value is always the hardware +value of the specific MSR. Therefore, add a helper to retrieve the +cached value, which can replace the need for RDMSR, for example, to +allow SEV-ES guests to restore the correct host hardware value without +using RDMSR. + +Cc: stable@vger.kernel.org +Signed-off-by: Hou Wenlong +[sean: drop "cache" from the name, make it a one-liner, tag for stable] +Reviewed-by: Xiaoyao Li +Link: https://lore.kernel.org/r/20250923153738.1875174-2-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 1 + + arch/x86/kvm/x86.c | 6 ++++++ + 2 files changed, 7 insertions(+) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -2356,6 +2356,7 @@ int kvm_add_user_return_msr(u32 msr); + int kvm_find_user_return_msr(u32 msr); + int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); + void kvm_user_return_msr_update_cache(unsigned int index, u64 val); ++u64 kvm_get_user_return_msr(unsigned int slot); + + static inline bool kvm_is_supported_user_return_msr(u32 msr) + { +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -677,6 +677,12 @@ void kvm_user_return_msr_update_cache(un + } + EXPORT_SYMBOL_GPL(kvm_user_return_msr_update_cache); + ++u64 kvm_get_user_return_msr(unsigned int slot) ++{ ++ return this_cpu_ptr(user_return_msrs)->values[slot].curr; ++} ++EXPORT_SYMBOL_GPL(kvm_get_user_return_msr); ++ + static void drop_user_return_notifiers(void) + { + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); diff --git a/queue-6.17/listmount-don-t-call-path_put-under-namespace-semaphore.patch b/queue-6.17/listmount-don-t-call-path_put-under-namespace-semaphore.patch new file mode 100644 index 0000000000..1c6cd43e93 --- /dev/null +++ b/queue-6.17/listmount-don-t-call-path_put-under-namespace-semaphore.patch @@ -0,0 +1,164 @@ +From c1f86d0ac322c7e77f6f8dbd216c65d39358ffc0 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Fri, 19 Sep 2025 17:33:47 +0200 +Subject: listmount: don't call path_put() under namespace semaphore + +From: Christian Brauner + +commit c1f86d0ac322c7e77f6f8dbd216c65d39358ffc0 upstream. + +Massage listmount() and make sure we don't call path_put() under the +namespace semaphore. If we put the last reference we're fscked. + +Fixes: b4c2bea8ceaa ("add listmount(2) syscall") +Cc: stable@vger.kernel.org # v6.8+ +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/namespace.c | 87 ++++++++++++++++++++++++++++++++++++++------------------- + 1 file changed, 59 insertions(+), 28 deletions(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -5966,23 +5966,34 @@ retry: + return ret; + } + +-static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id, +- u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids, +- bool reverse) ++struct klistmount { ++ u64 last_mnt_id; ++ u64 mnt_parent_id; ++ u64 *kmnt_ids; ++ u32 nr_mnt_ids; ++ struct mnt_namespace *ns; ++ struct path root; ++}; ++ ++static ssize_t do_listmount(struct klistmount *kls, bool reverse) + { +- struct path root __free(path_put) = {}; ++ struct mnt_namespace *ns = kls->ns; ++ u64 mnt_parent_id = kls->mnt_parent_id; ++ u64 last_mnt_id = kls->last_mnt_id; ++ u64 *mnt_ids = kls->kmnt_ids; ++ size_t nr_mnt_ids = kls->nr_mnt_ids; + struct path orig; + struct mount *r, *first; + ssize_t ret; + + rwsem_assert_held(&namespace_sem); + +- ret = grab_requested_root(ns, &root); ++ ret = grab_requested_root(ns, &kls->root); + if (ret) + return ret; + + if (mnt_parent_id == LSMT_ROOT) { +- orig = root; ++ orig = kls->root; + } else { + orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns); + if (!orig.mnt) +@@ -5994,7 +6005,7 @@ static ssize_t do_listmount(struct mnt_n + * Don't trigger audit denials. We just want to determine what + * mounts to show users. + */ +- if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &root) && ++ if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &kls->root) && + !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + +@@ -6027,14 +6038,45 @@ static ssize_t do_listmount(struct mnt_n + return ret; + } + ++static void __free_klistmount_free(const struct klistmount *kls) ++{ ++ path_put(&kls->root); ++ kvfree(kls->kmnt_ids); ++ mnt_ns_release(kls->ns); ++} ++ ++static inline int prepare_klistmount(struct klistmount *kls, struct mnt_id_req *kreq, ++ size_t nr_mnt_ids) ++{ ++ ++ u64 last_mnt_id = kreq->param; ++ ++ /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */ ++ if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET) ++ return -EINVAL; ++ ++ kls->last_mnt_id = last_mnt_id; ++ ++ kls->nr_mnt_ids = nr_mnt_ids; ++ kls->kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kls->kmnt_ids), ++ GFP_KERNEL_ACCOUNT); ++ if (!kls->kmnt_ids) ++ return -ENOMEM; ++ ++ kls->ns = grab_requested_mnt_ns(kreq); ++ if (!kls->ns) ++ return -ENOENT; ++ ++ kls->mnt_parent_id = kreq->mnt_id; ++ return 0; ++} ++ + SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, + u64 __user *, mnt_ids, size_t, nr_mnt_ids, unsigned int, flags) + { +- u64 *kmnt_ids __free(kvfree) = NULL; ++ struct klistmount kls __free(klistmount_free) = {}; + const size_t maxcount = 1000000; +- struct mnt_namespace *ns __free(mnt_ns_release) = NULL; + struct mnt_id_req kreq; +- u64 last_mnt_id; + ssize_t ret; + + if (flags & ~LISTMOUNT_REVERSE) +@@ -6055,22 +6097,12 @@ SYSCALL_DEFINE4(listmount, const struct + if (ret) + return ret; + +- last_mnt_id = kreq.param; +- /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */ +- if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET) +- return -EINVAL; +- +- kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kmnt_ids), +- GFP_KERNEL_ACCOUNT); +- if (!kmnt_ids) +- return -ENOMEM; +- +- ns = grab_requested_mnt_ns(&kreq); +- if (!ns) +- return -ENOENT; ++ ret = prepare_klistmount(&kls, &kreq, nr_mnt_ids); ++ if (ret) ++ return ret; + +- if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) && +- !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) ++ if (kreq.mnt_ns_id && (kls.ns != current->nsproxy->mnt_ns) && ++ !ns_capable_noaudit(kls.ns->user_ns, CAP_SYS_ADMIN)) + return -ENOENT; + + /* +@@ -6078,12 +6110,11 @@ SYSCALL_DEFINE4(listmount, const struct + * listmount() doesn't care about any mount properties. + */ + scoped_guard(rwsem_read, &namespace_sem) +- ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids, +- nr_mnt_ids, (flags & LISTMOUNT_REVERSE)); ++ ret = do_listmount(&kls, (flags & LISTMOUNT_REVERSE)); + if (ret <= 0) + return ret; + +- if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids))) ++ if (copy_to_user(mnt_ids, kls.kmnt_ids, ret * sizeof(*mnt_ids))) + return -EFAULT; + + return ret; diff --git a/queue-6.17/media-v4l2-subdev-fix-alloc-failure-check-in-v4l2_subdev_call_state_try.patch b/queue-6.17/media-v4l2-subdev-fix-alloc-failure-check-in-v4l2_subdev_call_state_try.patch new file mode 100644 index 0000000000..4cd77add3f --- /dev/null +++ b/queue-6.17/media-v4l2-subdev-fix-alloc-failure-check-in-v4l2_subdev_call_state_try.patch @@ -0,0 +1,68 @@ +From f37df9a0eb5e43fcfe02cbaef076123dc0d79c7e Mon Sep 17 00:00:00 2001 +From: Tomi Valkeinen +Date: Fri, 8 Aug 2025 11:59:15 +0300 +Subject: media: v4l2-subdev: Fix alloc failure check in v4l2_subdev_call_state_try() + +From: Tomi Valkeinen + +commit f37df9a0eb5e43fcfe02cbaef076123dc0d79c7e upstream. + +v4l2_subdev_call_state_try() macro allocates a subdev state with +__v4l2_subdev_state_alloc(), but does not check the returned value. If +__v4l2_subdev_state_alloc fails, it returns an ERR_PTR, and that would +cause v4l2_subdev_call_state_try() to crash. + +Add proper error handling to v4l2_subdev_call_state_try(). + +Signed-off-by: Tomi Valkeinen +Fixes: 982c0487185b ("media: subdev: Add v4l2_subdev_call_state_try() macro") +Reported-by: Dan Carpenter +Closes: https://lore.kernel.org/all/aJTNtpDUbTz7eyJc%40stanley.mountain/ +Cc: stable@vger.kernel.org +Reviewed-by: Dan Carpenter +Signed-off-by: Sakari Ailus +Signed-off-by: Hans Verkuil +Signed-off-by: Greg Kroah-Hartman +--- + include/media/v4l2-subdev.h | 30 +++++++++++++++++------------- + 1 file changed, 17 insertions(+), 13 deletions(-) + +--- a/include/media/v4l2-subdev.h ++++ b/include/media/v4l2-subdev.h +@@ -1962,19 +1962,23 @@ extern const struct v4l2_subdev_ops v4l2 + * + * Note: only legacy non-MC drivers may need this macro. + */ +-#define v4l2_subdev_call_state_try(sd, o, f, args...) \ +- ({ \ +- int __result; \ +- static struct lock_class_key __key; \ +- const char *name = KBUILD_BASENAME \ +- ":" __stringify(__LINE__) ":state->lock"; \ +- struct v4l2_subdev_state *state = \ +- __v4l2_subdev_state_alloc(sd, name, &__key); \ +- v4l2_subdev_lock_state(state); \ +- __result = v4l2_subdev_call(sd, o, f, state, ##args); \ +- v4l2_subdev_unlock_state(state); \ +- __v4l2_subdev_state_free(state); \ +- __result; \ ++#define v4l2_subdev_call_state_try(sd, o, f, args...) \ ++ ({ \ ++ int __result; \ ++ static struct lock_class_key __key; \ ++ const char *name = KBUILD_BASENAME \ ++ ":" __stringify(__LINE__) ":state->lock"; \ ++ struct v4l2_subdev_state *state = \ ++ __v4l2_subdev_state_alloc(sd, name, &__key); \ ++ if (IS_ERR(state)) { \ ++ __result = PTR_ERR(state); \ ++ } else { \ ++ v4l2_subdev_lock_state(state); \ ++ __result = v4l2_subdev_call(sd, o, f, state, ##args); \ ++ v4l2_subdev_unlock_state(state); \ ++ __v4l2_subdev_state_free(state); \ ++ } \ ++ __result; \ + }) + + /** diff --git a/queue-6.17/memcg-skip-cgroup_file_notify-if-spinning-is-not-allowed.patch b/queue-6.17/memcg-skip-cgroup_file_notify-if-spinning-is-not-allowed.patch new file mode 100644 index 0000000000..ea3756e005 --- /dev/null +++ b/queue-6.17/memcg-skip-cgroup_file_notify-if-spinning-is-not-allowed.patch @@ -0,0 +1,133 @@ +From fcc0669c5aa681994c507b50f1c706c969d99730 Mon Sep 17 00:00:00 2001 +From: Shakeel Butt +Date: Mon, 22 Sep 2025 15:02:03 -0700 +Subject: memcg: skip cgroup_file_notify if spinning is not allowed + +From: Shakeel Butt + +commit fcc0669c5aa681994c507b50f1c706c969d99730 upstream. + +Generally memcg charging is allowed from all the contexts including NMI +where even spinning on spinlock can cause locking issues. However one +call chain was missed during the addition of memcg charging from any +context support. That is try_charge_memcg() -> memcg_memory_event() -> +cgroup_file_notify(). + +The possible function call tree under cgroup_file_notify() can acquire +many different spin locks in spinning mode. Some of them are +cgroup_file_kn_lock, kernfs_notify_lock, pool_workqeue's lock. So, let's +just skip cgroup_file_notify() from memcg charging if the context does not +allow spinning. + +Alternative approach was also explored where instead of skipping +cgroup_file_notify(), we defer the memcg event processing to irq_work [1]. +However it adds complexity and it was decided to keep things simple until +we need more memcg events with !allow_spinning requirement. + +Link: https://lore.kernel.org/all/5qi2llyzf7gklncflo6gxoozljbm4h3tpnuv4u4ej4ztysvi6f@x44v7nz2wdzd/ [1] +Link: https://lkml.kernel.org/r/20250922220203.261714-1-shakeel.butt@linux.dev +Fixes: 3ac4638a734a ("memcg: make memcg_rstat_updated nmi safe") +Signed-off-by: Shakeel Butt +Acked-by: Michal Hocko +Closes: https://lore.kernel.org/all/20250905061919.439648-1-yepeilin@google.com/ +Cc: Alexei Starovoitov +Cc: Johannes Weiner +Cc: Kumar Kartikeya Dwivedi +Cc: Muchun Song +Cc: Peilin Ye +Cc: Roman Gushchin +Cc: Tejun Heo +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/memcontrol.h | 26 +++++++++++++++++++------- + mm/memcontrol.c | 7 ++++--- + 2 files changed, 23 insertions(+), 10 deletions(-) + +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -987,22 +987,28 @@ static inline void count_memcg_event_mm( + count_memcg_events_mm(mm, idx, 1); + } + +-static inline void memcg_memory_event(struct mem_cgroup *memcg, +- enum memcg_memory_event event) ++static inline void __memcg_memory_event(struct mem_cgroup *memcg, ++ enum memcg_memory_event event, ++ bool allow_spinning) + { + bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || + event == MEMCG_SWAP_FAIL; + ++ /* For now only MEMCG_MAX can happen with !allow_spinning context. */ ++ VM_WARN_ON_ONCE(!allow_spinning && event != MEMCG_MAX); ++ + atomic_long_inc(&memcg->memory_events_local[event]); +- if (!swap_event) ++ if (!swap_event && allow_spinning) + cgroup_file_notify(&memcg->events_local_file); + + do { + atomic_long_inc(&memcg->memory_events[event]); +- if (swap_event) +- cgroup_file_notify(&memcg->swap_events_file); +- else +- cgroup_file_notify(&memcg->events_file); ++ if (allow_spinning) { ++ if (swap_event) ++ cgroup_file_notify(&memcg->swap_events_file); ++ else ++ cgroup_file_notify(&memcg->events_file); ++ } + + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + break; +@@ -1012,6 +1018,12 @@ static inline void memcg_memory_event(st + !mem_cgroup_is_root(memcg)); + } + ++static inline void memcg_memory_event(struct mem_cgroup *memcg, ++ enum memcg_memory_event event) ++{ ++ __memcg_memory_event(memcg, event, true); ++} ++ + static inline void memcg_memory_event_mm(struct mm_struct *mm, + enum memcg_memory_event event) + { +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -2309,12 +2309,13 @@ static int try_charge_memcg(struct mem_c + bool drained = false; + bool raised_max_event = false; + unsigned long pflags; ++ bool allow_spinning = gfpflags_allow_spinning(gfp_mask); + + retry: + if (consume_stock(memcg, nr_pages)) + return 0; + +- if (!gfpflags_allow_spinning(gfp_mask)) ++ if (!allow_spinning) + /* Avoid the refill and flush of the older stock */ + batch = nr_pages; + +@@ -2350,7 +2351,7 @@ retry: + if (!gfpflags_allow_blocking(gfp_mask)) + goto nomem; + +- memcg_memory_event(mem_over_limit, MEMCG_MAX); ++ __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning); + raised_max_event = true; + + psi_memstall_enter(&pflags); +@@ -2417,7 +2418,7 @@ force: + * a MEMCG_MAX event. + */ + if (!raised_max_event) +- memcg_memory_event(mem_over_limit, MEMCG_MAX); ++ __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning); + + /* + * The allocation either can't fail or will lead to more memory diff --git a/queue-6.17/nfsd-unregister-with-rpcbind-when-deleting-a-transport.patch b/queue-6.17/nfsd-unregister-with-rpcbind-when-deleting-a-transport.patch new file mode 100644 index 0000000000..7fa9de39b9 --- /dev/null +++ b/queue-6.17/nfsd-unregister-with-rpcbind-when-deleting-a-transport.patch @@ -0,0 +1,84 @@ +From 898374fdd7f06fa4c4a66e8be3135efeae6128d5 Mon Sep 17 00:00:00 2001 +From: Olga Kornievskaia +Date: Tue, 19 Aug 2025 14:04:02 -0400 +Subject: nfsd: unregister with rpcbind when deleting a transport + +From: Olga Kornievskaia + +commit 898374fdd7f06fa4c4a66e8be3135efeae6128d5 upstream. + +When a listener is added, a part of creation of transport also registers +program/port with rpcbind. However, when the listener is removed, +while transport goes away, rpcbind still has the entry for that +port/type. + +When deleting the transport, unregister with rpcbind when appropriate. + +---v2 created a new xpt_flag XPT_RPCB_UNREG to mark TCP and UDP +transport and at xprt destroy send rpcbind unregister if flag set. + +Suggested-by: Chuck Lever +Fixes: d093c9089260 ("nfsd: fix management of listener transports") +Cc: stable@vger.kernel.org +Signed-off-by: Olga Kornievskaia +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sunrpc/svc_xprt.h | 3 +++ + net/sunrpc/svc_xprt.c | 13 +++++++++++++ + net/sunrpc/svcsock.c | 2 ++ + 3 files changed, 18 insertions(+) + +--- a/include/linux/sunrpc/svc_xprt.h ++++ b/include/linux/sunrpc/svc_xprt.h +@@ -104,6 +104,9 @@ enum { + * it has access to. It is NOT counted + * in ->sv_tmpcnt. + */ ++ XPT_RPCB_UNREG, /* transport that needs unregistering ++ * with rpcbind (TCP, UDP) on destroy ++ */ + }; + + /* +--- a/net/sunrpc/svc_xprt.c ++++ b/net/sunrpc/svc_xprt.c +@@ -1014,6 +1014,19 @@ static void svc_delete_xprt(struct svc_x + struct svc_serv *serv = xprt->xpt_server; + struct svc_deferred_req *dr; + ++ /* unregister with rpcbind for when transport type is TCP or UDP. ++ */ ++ if (test_bit(XPT_RPCB_UNREG, &xprt->xpt_flags)) { ++ struct svc_sock *svsk = container_of(xprt, struct svc_sock, ++ sk_xprt); ++ struct socket *sock = svsk->sk_sock; ++ ++ if (svc_register(serv, xprt->xpt_net, sock->sk->sk_family, ++ sock->sk->sk_protocol, 0) < 0) ++ pr_warn("failed to unregister %s with rpcbind\n", ++ xprt->xpt_class->xcl_name); ++ } ++ + if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) + return; + +--- a/net/sunrpc/svcsock.c ++++ b/net/sunrpc/svcsock.c +@@ -836,6 +836,7 @@ static void svc_udp_init(struct svc_sock + /* data might have come in before data_ready set up */ + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); ++ set_bit(XPT_RPCB_UNREG, &svsk->sk_xprt.xpt_flags); + + /* make sure we get destination address info */ + switch (svsk->sk_sk->sk_family) { +@@ -1355,6 +1356,7 @@ static void svc_tcp_init(struct svc_sock + if (sk->sk_state == TCP_LISTEN) { + strcpy(svsk->sk_xprt.xpt_remotebuf, "listener"); + set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); ++ set_bit(XPT_RPCB_UNREG, &svsk->sk_xprt.xpt_flags); + sk->sk_data_ready = svc_tcp_listen_data_ready; + set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); + } else { diff --git a/queue-6.17/page_pool-fix-pp_magic_mask-to-avoid-crashing-on-some-32-bit-arches.patch b/queue-6.17/page_pool-fix-pp_magic_mask-to-avoid-crashing-on-some-32-bit-arches.patch new file mode 100644 index 0000000000..f67a9eae51 --- /dev/null +++ b/queue-6.17/page_pool-fix-pp_magic_mask-to-avoid-crashing-on-some-32-bit-arches.patch @@ -0,0 +1,212 @@ +From 95920c2ed02bde551ab654e9749c2ca7bc3100e0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Tue, 30 Sep 2025 13:43:29 +0200 +Subject: page_pool: Fix PP_MAGIC_MASK to avoid crashing on some 32-bit arches +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Toke Høiland-Jørgensen + +commit 95920c2ed02bde551ab654e9749c2ca7bc3100e0 upstream. + +Helge reported that the introduction of PP_MAGIC_MASK let to crashes on +boot on his 32-bit parisc machine. The cause of this is the mask is set +too wide, so the page_pool_page_is_pp() incurs false positives which +crashes the machine. + +Just disabling the check in page_pool_is_pp() will lead to the page_pool +code itself malfunctioning; so instead of doing this, this patch changes +the define for PP_DMA_INDEX_BITS to avoid mistaking arbitrary kernel +pointers for page_pool-tagged pages. + +The fix relies on the kernel pointers that alias with the pp_magic field +always being above PAGE_OFFSET. With this assumption, we can use the +lowest bit of the value of PAGE_OFFSET as the upper bound of the +PP_DMA_INDEX_MASK, which should avoid the false positives. + +Because we cannot rely on PAGE_OFFSET always being a compile-time +constant, nor on it always being >0, we fall back to disabling the +dma_index storage when there are not enough bits available. This leaves +us in the situation we were in before the patch in the Fixes tag, but +only on a subset of architecture configurations. This seems to be the +best we can do until the transition to page types in complete for +page_pool pages. + +v2: +- Make sure there's at least 8 bits available and that the PAGE_OFFSET + bit calculation doesn't wrap + +Link: https://lore.kernel.org/all/aMNJMFa5fDalFmtn@p100/ +Fixes: ee62ce7a1d90 ("page_pool: Track DMA-mapped pages and unmap them when destroying the pool") +Cc: stable@vger.kernel.org # 6.15+ +Tested-by: Helge Deller +Signed-off-by: Toke Høiland-Jørgensen +Reviewed-by: Mina Almasry +Tested-by: Helge Deller +Link: https://patch.msgid.link/20250930114331.675412-1-toke@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 22 ++++++++------ + net/core/page_pool.c | 76 +++++++++++++++++++++++++++++++++++---------------- + 2 files changed, 66 insertions(+), 32 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -4159,14 +4159,13 @@ int arch_lock_shadow_stack_status(struct + * since this value becomes part of PP_SIGNATURE; meaning we can just use the + * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the + * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is +- * 0, we make sure that we leave the two topmost bits empty, as that guarantees +- * we won't mistake a valid kernel pointer for a value we set, regardless of the +- * VMSPLIT setting. ++ * 0, we use the lowest bit of PAGE_OFFSET as the boundary if that value is ++ * known at compile-time. + * +- * Altogether, this means that the number of bits available is constrained by +- * the size of an unsigned long (at the upper end, subtracting two bits per the +- * above), and the definition of PP_SIGNATURE (with or without +- * POISON_POINTER_DELTA). ++ * If the value of PAGE_OFFSET is not known at compile time, or if it is too ++ * small to leave at least 8 bits available above PP_SIGNATURE, we define the ++ * number of bits to be 0, which turns off the DMA index tracking altogether ++ * (see page_pool_register_dma_index()). + */ + #define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) + #if POISON_POINTER_DELTA > 0 +@@ -4175,8 +4174,13 @@ int arch_lock_shadow_stack_status(struct + */ + #define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) + #else +-/* Always leave out the topmost two; see above. */ +-#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2) ++/* Use the lowest bit of PAGE_OFFSET if there's at least 8 bits available; see above */ ++#define PP_DMA_INDEX_MIN_OFFSET (1 << (PP_DMA_INDEX_SHIFT + 8)) ++#define PP_DMA_INDEX_BITS ((__builtin_constant_p(PAGE_OFFSET) && \ ++ PAGE_OFFSET >= PP_DMA_INDEX_MIN_OFFSET && \ ++ !(PAGE_OFFSET & (PP_DMA_INDEX_MIN_OFFSET - 1))) ? \ ++ MIN(32, __ffs(PAGE_OFFSET) - PP_DMA_INDEX_SHIFT) : 0) ++ + #endif + + #define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ +--- a/net/core/page_pool.c ++++ b/net/core/page_pool.c +@@ -472,11 +472,60 @@ page_pool_dma_sync_for_device(const stru + } + } + ++static int page_pool_register_dma_index(struct page_pool *pool, ++ netmem_ref netmem, gfp_t gfp) ++{ ++ int err = 0; ++ u32 id; ++ ++ if (unlikely(!PP_DMA_INDEX_BITS)) ++ goto out; ++ ++ if (in_softirq()) ++ err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), ++ PP_DMA_INDEX_LIMIT, gfp); ++ else ++ err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), ++ PP_DMA_INDEX_LIMIT, gfp); ++ if (err) { ++ WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@"); ++ goto out; ++ } ++ ++ netmem_set_dma_index(netmem, id); ++out: ++ return err; ++} ++ ++static int page_pool_release_dma_index(struct page_pool *pool, ++ netmem_ref netmem) ++{ ++ struct page *old, *page = netmem_to_page(netmem); ++ unsigned long id; ++ ++ if (unlikely(!PP_DMA_INDEX_BITS)) ++ return 0; ++ ++ id = netmem_get_dma_index(netmem); ++ if (!id) ++ return -1; ++ ++ if (in_softirq()) ++ old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0); ++ else ++ old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0); ++ if (old != page) ++ return -1; ++ ++ netmem_set_dma_index(netmem, 0); ++ ++ return 0; ++} ++ + static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp) + { + dma_addr_t dma; + int err; +- u32 id; + + /* Setup DMA mapping: use 'struct page' area for storing DMA-addr + * since dma_addr_t can be either 32 or 64 bits and does not always fit +@@ -495,18 +544,10 @@ static bool page_pool_dma_map(struct pag + goto unmap_failed; + } + +- if (in_softirq()) +- err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), +- PP_DMA_INDEX_LIMIT, gfp); +- else +- err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), +- PP_DMA_INDEX_LIMIT, gfp); +- if (err) { +- WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@"); ++ err = page_pool_register_dma_index(pool, netmem, gfp); ++ if (err) + goto unset_failed; +- } + +- netmem_set_dma_index(netmem, id); + page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); + + return true; +@@ -678,8 +719,6 @@ void page_pool_clear_pp_info(netmem_ref + static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool, + netmem_ref netmem) + { +- struct page *old, *page = netmem_to_page(netmem); +- unsigned long id; + dma_addr_t dma; + + if (!pool->dma_map) +@@ -688,15 +727,7 @@ static __always_inline void __page_pool_ + */ + return; + +- id = netmem_get_dma_index(netmem); +- if (!id) +- return; +- +- if (in_softirq()) +- old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0); +- else +- old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0); +- if (old != page) ++ if (page_pool_release_dma_index(pool, netmem)) + return; + + dma = page_pool_get_dma_addr_netmem(netmem); +@@ -706,7 +737,6 @@ static __always_inline void __page_pool_ + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + page_pool_set_dma_addr_netmem(netmem, 0); +- netmem_set_dma_index(netmem, 0); + } + + /* Disconnects a page (from a page_pool). API users can have a need diff --git a/queue-6.17/pm-runtime-update-kerneldoc-return-codes.patch b/queue-6.17/pm-runtime-update-kerneldoc-return-codes.patch new file mode 100644 index 0000000000..aa792610c2 --- /dev/null +++ b/queue-6.17/pm-runtime-update-kerneldoc-return-codes.patch @@ -0,0 +1,254 @@ +From fed7eaa4f037361fe4f3d4170649d6849a25998d Mon Sep 17 00:00:00 2001 +From: Brian Norris +Date: Thu, 25 Sep 2025 12:42:16 -0700 +Subject: PM: runtime: Update kerneldoc return codes + +From: Brian Norris + +commit fed7eaa4f037361fe4f3d4170649d6849a25998d upstream. + +APIs based on __pm_runtime_idle() (pm_runtime_idle(), pm_request_idle()) +do not return 1 when already suspended. They return -EAGAIN. This is +already covered in the docs, so the entry for "1" is redundant and +conflicting. + +(pm_runtime_put() and pm_runtime_put_sync() were previously incorrect, +but that's fixed in "PM: runtime: pm_runtime_put{,_sync}() returns 1 +when already suspended", to ensure consistency with APIs like +pm_runtime_put_autosuspend().) + +RPM_GET_PUT APIs based on __pm_runtime_suspend() do return 1 when +already suspended, but the language is a little unclear -- it's not +really an "error", so it seems better to list as a clarification before +the 0/success case. Additionally, they only actually return 1 when the +refcount makes it to 0; if the usage counter is still non-zero, we +return 0. + +pm_runtime_put(), etc., also don't appear at first like they can ever +see "-EAGAIN: Runtime PM usage_count non-zero", because in non-racy +conditions, pm_runtime_put() would drop its reference count, see it's +non-zero, and return early (in __pm_runtime_idle()). However, it's +possible to race with another actor that increments the usage_count +afterward, since rpm_idle() is protected by a separate lock; in such a +case, we may see -EAGAIN. + +Because this case is only seen in the presence of concurrent actors, it +makes sense to clarify that this is when "usage_count **became** +non-zero", by way of some racing actor. + +Lastly, pm_runtime_put_sync_suspend() duplicated some -EAGAIN language. +Fix that. + +Fixes: 271ff96d6066 ("PM: runtime: Document return values of suspend-related API functions") +Link: https://lore.kernel.org/linux-pm/aJ5pkEJuixTaybV4@google.com/ +Signed-off-by: Brian Norris +Reviewed-by: Sakari Ailus +Cc: 6.17+ # 6.17+ +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/pm_runtime.h | 56 ++++++++++++++++++++++++--------------------- + 1 file changed, 31 insertions(+), 25 deletions(-) + +--- a/include/linux/pm_runtime.h ++++ b/include/linux/pm_runtime.h +@@ -350,13 +350,12 @@ static inline int pm_runtime_force_resum + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing +- * or device not in %RPM_ACTIVE state. ++ * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change ++ * ongoing or device not in %RPM_ACTIVE state. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM idle and suspend callbacks. + */ +@@ -370,14 +369,15 @@ static inline int pm_runtime_idle(struct + * @dev: Target device. + * + * Return: ++ * * 1: Success; device was already suspended. + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. ++ * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change ++ * ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. + */ +@@ -396,14 +396,15 @@ static inline int pm_runtime_suspend(str + * engaging its "idle check" callback. + * + * Return: ++ * * 1: Success; device was already suspended. + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. ++ * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change ++ * ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. + */ +@@ -433,13 +434,12 @@ static inline int pm_runtime_resume(stru + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing +- * or device not in %RPM_ACTIVE state. ++ * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change ++ * ongoing or device not in %RPM_ACTIVE state. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + */ + static inline int pm_request_idle(struct device *dev) + { +@@ -464,15 +464,16 @@ static inline int pm_request_resume(stru + * equivalent pm_runtime_autosuspend() for @dev asynchronously. + * + * Return: ++ * * 1: Success; device was already suspended. + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. ++ * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change ++ * ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + */ + static inline int pm_request_autosuspend(struct device *dev) + { +@@ -540,15 +541,16 @@ static inline int pm_runtime_resume_and_ + * equal to 0, queue up a work item for @dev like in pm_request_idle(). + * + * Return: ++ * * 1: Success. Usage counter dropped to zero, but device was already suspended. + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. ++ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status ++ * change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + */ + static inline int pm_runtime_put(struct device *dev) + { +@@ -565,15 +567,16 @@ DEFINE_FREE(pm_runtime_put, struct devic + * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). + * + * Return: ++ * * 1: Success. Usage counter dropped to zero, but device was already suspended. + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. ++ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status ++ * change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + */ + static inline int __pm_runtime_put_autosuspend(struct device *dev) + { +@@ -590,15 +593,16 @@ static inline int __pm_runtime_put_autos + * in pm_request_autosuspend(). + * + * Return: ++ * * 1: Success. Usage counter dropped to zero, but device was already suspended. + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. ++ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status ++ * change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + */ + static inline int pm_runtime_put_autosuspend(struct device *dev) + { +@@ -619,14 +623,15 @@ static inline int pm_runtime_put_autosus + * if it returns an error code. + * + * Return: ++ * * 1: Success. Usage counter dropped to zero, but device was already suspended. + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. ++ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status ++ * change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. + */ +@@ -646,15 +651,15 @@ static inline int pm_runtime_put_sync(st + * if it returns an error code. + * + * Return: ++ * * 1: Success. Usage counter dropped to zero, but device was already suspended. + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. +- * * -EAGAIN: usage_count non-zero or Runtime PM status change ongoing. ++ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status ++ * change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. + */ +@@ -677,15 +682,16 @@ static inline int pm_runtime_put_sync_su + * if it returns an error code. + * + * Return: ++ * * 1: Success. Usage counter dropped to zero, but device was already suspended. + * * 0: Success. + * * -EINVAL: Runtime PM error. + * * -EACCES: Runtime PM disabled. +- * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. ++ * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status ++ * change ongoing. + * * -EBUSY: Runtime PM child_count non-zero. + * * -EPERM: Device PM QoS resume latency 0. + * * -EINPROGRESS: Suspend already in progress. + * * -ENOSYS: CONFIG_PM not enabled. +- * * 1: Device already suspended. + * Other values and conditions for the above values are possible as returned by + * Runtime PM suspend callbacks. + */ diff --git a/queue-6.17/series b/queue-6.17/series index 8801f19b32..490108944b 100644 --- a/queue-6.17/series +++ b/queue-6.17/series @@ -2,5 +2,18 @@ fs-always-return-zero-on-success-from-replace_fd.patch fscontext-do-not-consume-log-entries-when-returning-emsgsize.patch btrfs-fix-the-incorrect-max_bytes-value-for-find_lock_delalloc_range.patch arm64-map-_text-_stext-virtual-address-range-non-executable-read-only.patch -cxl-acpi-hmat-update-cxl-access-coordinates-directly-instead-of-through-hmat.patch rseq-protect-event-mask-against-membarrier-ipi.patch +statmount-don-t-call-path_put-under-namespace-semaphore.patch +listmount-don-t-call-path_put-under-namespace-semaphore.patch +clocksource-drivers-clps711x-fix-resource-leaks-in-error-paths.patch +memcg-skip-cgroup_file_notify-if-spinning-is-not-allowed.patch +page_pool-fix-pp_magic_mask-to-avoid-crashing-on-some-32-bit-arches.patch +pm-runtime-update-kerneldoc-return-codes.patch +dma-mapping-fix-direction-in-dma_alloc-direction-traces.patch +cpufreq-make-drivers-using-cpufreq_eternal-specify-transition-latency.patch +nfsd-unregister-with-rpcbind-when-deleting-a-transport.patch +kvm-x86-add-helper-to-retrieve-current-value-of-user-return-msr.patch +kvm-svm-emulate-perf_cntr_global_status_set-for-perfmonv2.patch +iio-frequency-adf4350-fix-adf4350_reg3_12bit_clkdiv_mode.patch +media-v4l2-subdev-fix-alloc-failure-check-in-v4l2_subdev_call_state_try.patch +asm-generic-io.h-skip-trace-helpers-if-rwmmio-events-are-disabled.patch diff --git a/queue-6.17/statmount-don-t-call-path_put-under-namespace-semaphore.patch b/queue-6.17/statmount-don-t-call-path_put-under-namespace-semaphore.patch new file mode 100644 index 0000000000..8b157171bf --- /dev/null +++ b/queue-6.17/statmount-don-t-call-path_put-under-namespace-semaphore.patch @@ -0,0 +1,65 @@ +From e8c84e2082e69335f66c8ade4895e80ec270d7c4 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Fri, 19 Sep 2025 17:03:51 +0200 +Subject: statmount: don't call path_put() under namespace semaphore + +From: Christian Brauner + +commit e8c84e2082e69335f66c8ade4895e80ec270d7c4 upstream. + +Massage statmount() and make sure we don't call path_put() under the +namespace semaphore. If we put the last reference we're fscked. + +Fixes: 46eae99ef733 ("add statmount(2) syscall") +Cc: stable@vger.kernel.org # v6.8+ +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/namespace.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -5711,7 +5711,6 @@ static int grab_requested_root(struct mn + static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, + struct mnt_namespace *ns) + { +- struct path root __free(path_put) = {}; + struct mount *m; + int err; + +@@ -5723,7 +5722,7 @@ static int do_statmount(struct kstatmoun + if (!s->mnt) + return -ENOENT; + +- err = grab_requested_root(ns, &root); ++ err = grab_requested_root(ns, &s->root); + if (err) + return err; + +@@ -5732,7 +5731,7 @@ static int do_statmount(struct kstatmoun + * mounts to show users. + */ + m = real_mount(s->mnt); +- if (!is_path_reachable(m, m->mnt.mnt_root, &root) && ++ if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) && + !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + +@@ -5740,8 +5739,6 @@ static int do_statmount(struct kstatmoun + if (err) + return err; + +- s->root = root; +- + /* + * Note that mount properties in mnt->mnt_flags, mnt->mnt_idmap + * can change concurrently as we only hold the read-side of the +@@ -5963,6 +5960,7 @@ retry: + if (!ret) + ret = copy_statmount_to_user(ks); + kvfree(ks->seq.buf); ++ path_put(&ks->root); + if (retry_statmount(ret, &seq_size)) + goto retry; + return ret;