]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.12
authorSasha Levin <sashal@kernel.org>
Sun, 13 Apr 2025 17:12:36 +0000 (13:12 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 13 Apr 2025 17:12:36 +0000 (13:12 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
44 files changed:
queue-6.12/ata-pata_pxa-fix-potential-null-pointer-dereference-.patch [new file with mode: 0644]
queue-6.12/ata-sata_sx4-add-error-handling-in-pdc20621_i2c_read.patch [new file with mode: 0644]
queue-6.12/cgroup-cpuset-enforce-at-most-one-rebuild_sched_doma.patch [new file with mode: 0644]
queue-6.12/cgroup-cpuset-fix-error-handling-in-remote_partition.patch [new file with mode: 0644]
queue-6.12/cgroup-cpuset-fix-incorrect-isolated_cpus-update-in-.patch [new file with mode: 0644]
queue-6.12/cgroup-cpuset-fix-race-between-newly-created-partiti.patch [new file with mode: 0644]
queue-6.12/cgroup-cpuset-further-optimize-code-if-config_cpuset.patch [new file with mode: 0644]
queue-6.12/cgroup-cpuset-revert-allow-suppression-of-sched-doma.patch [new file with mode: 0644]
queue-6.12/codel-remove-sch-q.qlen-check-before-qdisc_tree_redu.patch [new file with mode: 0644]
queue-6.12/drm-i915-disable-rpg-during-live-selftest.patch [new file with mode: 0644]
queue-6.12/drm-i915-huc-fix-fence-not-released-on-early-probe-e.patch [new file with mode: 0644]
queue-6.12/drm-tests-cmdline-fix-drm_display_mode-memory-leak.patch [new file with mode: 0644]
queue-6.12/drm-tests-helpers-create-kunit-helper-to-destroy-a-d.patch [new file with mode: 0644]
queue-6.12/drm-tests-modes-fix-drm_display_mode-memory-leak.patch [new file with mode: 0644]
queue-6.12/drm-tests-modeset-fix-drm_display_mode-memory-leak.patch [new file with mode: 0644]
queue-6.12/drm-tests-probe-helper-fix-drm_display_mode-memory-l.patch [new file with mode: 0644]
queue-6.12/drm-xe-hw_engine-define-sysfs_ops-on-all-directories.patch [new file with mode: 0644]
queue-6.12/gpiolib-of-fix-the-choice-for-ingenic-nand-quirk.patch [new file with mode: 0644]
queue-6.12/iommu-exynos-fix-suspend-resume-with-identity-domain.patch [new file with mode: 0644]
queue-6.12/iommu-mediatek-fix-null-pointer-deference-in-mtk_iom.patch [new file with mode: 0644]
queue-6.12/ipv6-align-behavior-across-nexthops-during-path-sele.patch [new file with mode: 0644]
queue-6.12/net-ethtool-don-t-call-.cleanup_data-when-prepare_da.patch [new file with mode: 0644]
queue-6.12/net-libwx-handle-page_pool_dev_alloc_pages-error.patch [new file with mode: 0644]
queue-6.12/net-phy-allow-mdio-bus-pm-ops-to-start-stop-state-ma.patch [new file with mode: 0644]
queue-6.12/net-phy-move-phy_link_change-prior-to-mdio_bus_phy_m.patch [new file with mode: 0644]
queue-6.12/net-ppp-add-bound-checking-for-skb-data-on-ppp_sync_.patch [new file with mode: 0644]
queue-6.12/net-tls-explicitly-disallow-disconnect.patch [new file with mode: 0644]
queue-6.12/net_sched-sch_sfq-move-the-limit-validation.patch [new file with mode: 0644]
queue-6.12/net_sched-sch_sfq-use-a-temporary-work-area-for-vali.patch [new file with mode: 0644]
queue-6.12/nft_set_pipapo-fix-incorrect-avx2-match-of-5th-field.patch [new file with mode: 0644]
queue-6.12/nvmet-fcloop-swap-list_add_tail-arguments.patch [new file with mode: 0644]
queue-6.12/objtool-fix-insn_context_switch-handling-in-validate.patch [new file with mode: 0644]
queue-6.12/octeontx2-pf-qos-fix-vf-root-node-parent-queue-index.patch [new file with mode: 0644]
queue-6.12/perf-core-add-aux_pause-aux_resume-aux_start_paused.patch [new file with mode: 0644]
queue-6.12/perf-core-simplify-the-perf_event_alloc-error-path.patch [new file with mode: 0644]
queue-6.12/perf-fix-hang-while-freeing-sigtrap-event.patch [new file with mode: 0644]
queue-6.12/selftests-futex-futex_waitv-wouldblock-test-should-f.patch [new file with mode: 0644]
queue-6.12/series
queue-6.12/smb-client-fix-uaf-in-decryption-with-multichannel.patch [new file with mode: 0644]
queue-6.12/tc-ensure-we-have-enough-buffer-space-when-sending-f.patch [new file with mode: 0644]
queue-6.12/tipc-fix-memory-leak-in-tipc_link_xmit.patch [new file with mode: 0644]
queue-6.12/ublk-fix-handling-recovery-reissue-in-ublk_abort_que.patch [new file with mode: 0644]
queue-6.12/ublk-refactor-recovery-configuration-flag-helpers.patch [new file with mode: 0644]
queue-6.12/x86-acpi-don-t-limit-cpus-to-1-for-xen-pv-guests-due.patch [new file with mode: 0644]

diff --git a/queue-6.12/ata-pata_pxa-fix-potential-null-pointer-dereference-.patch b/queue-6.12/ata-pata_pxa-fix-potential-null-pointer-dereference-.patch
new file mode 100644 (file)
index 0000000..283b01b
--- /dev/null
@@ -0,0 +1,47 @@
+From e9e28ebb2dbe7298a40d52a59b47479992a16bce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Apr 2025 14:14:38 +0800
+Subject: ata: pata_pxa: Fix potential NULL pointer dereference in
+ pxa_ata_probe()
+
+From: Henry Martin <bsdhenrymartin@gmail.com>
+
+[ Upstream commit ad320e408a8c95a282ab9c05cdf0c9b95e317985 ]
+
+devm_ioremap() returns NULL on error. Currently, pxa_ata_probe() does
+not check for this case, which can result in a NULL pointer dereference.
+
+Add NULL check after devm_ioremap() to prevent this issue.
+
+Fixes: 2dc6c6f15da9 ("[ARM] pata_pxa: DMA-capable PATA driver")
+Signed-off-by: Henry Martin <bsdhenrymartin@gmail.com>
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/pata_pxa.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c
+index 538bd3423d859..1bdcd6ee741d3 100644
+--- a/drivers/ata/pata_pxa.c
++++ b/drivers/ata/pata_pxa.c
+@@ -223,10 +223,16 @@ static int pxa_ata_probe(struct platform_device *pdev)
+       ap->ioaddr.cmd_addr     = devm_ioremap(&pdev->dev, cmd_res->start,
+                                               resource_size(cmd_res));
++      if (!ap->ioaddr.cmd_addr)
++              return -ENOMEM;
+       ap->ioaddr.ctl_addr     = devm_ioremap(&pdev->dev, ctl_res->start,
+                                               resource_size(ctl_res));
++      if (!ap->ioaddr.ctl_addr)
++              return -ENOMEM;
+       ap->ioaddr.bmdma_addr   = devm_ioremap(&pdev->dev, dma_res->start,
+                                               resource_size(dma_res));
++      if (!ap->ioaddr.bmdma_addr)
++              return -ENOMEM;
+       /*
+        * Adjust register offsets
+-- 
+2.39.5
+
diff --git a/queue-6.12/ata-sata_sx4-add-error-handling-in-pdc20621_i2c_read.patch b/queue-6.12/ata-sata_sx4-add-error-handling-in-pdc20621_i2c_read.patch
new file mode 100644 (file)
index 0000000..96697a0
--- /dev/null
@@ -0,0 +1,66 @@
+From 6c4c35b408bebe76f47805f186257f9723e59224 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 15:30:01 +0800
+Subject: ata: sata_sx4: Add error handling in pdc20621_i2c_read()
+
+From: Wentao Liang <vulab@iscas.ac.cn>
+
+[ Upstream commit 8d46a27085039158eb5e253ab8a35a0e33b5e864 ]
+
+The function pdc20621_prog_dimm0() calls the function pdc20621_i2c_read()
+but does not handle the error if the read fails. This could lead to
+process with invalid data. A proper implementation can be found in
+/source/drivers/ata/sata_sx4.c, pdc20621_prog_dimm_global(). As mentioned
+in its commit: bb44e154e25125bef31fa956785e90fccd24610b, the variable spd0
+might be used uninitialized when pdc20621_i2c_read() fails.
+
+Add error handling to pdc20621_i2c_read(). If a read operation fails,
+an error message is logged via dev_err(), and return a negative error
+code.
+
+Add error handling to pdc20621_prog_dimm0() in pdc20621_dimm_init(), and
+return a negative error code if pdc20621_prog_dimm0() fails.
+
+Fixes: 4447d3515616 ("libata: convert the remaining SATA drivers to new init model")
+Signed-off-by: Wentao Liang <vulab@iscas.ac.cn>
+Reviewed-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/sata_sx4.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
+index a482741eb181f..c3042eca6332d 100644
+--- a/drivers/ata/sata_sx4.c
++++ b/drivers/ata/sata_sx4.c
+@@ -1117,9 +1117,14 @@ static int pdc20621_prog_dimm0(struct ata_host *host)
+       mmio += PDC_CHIP0_OFS;
+       for (i = 0; i < ARRAY_SIZE(pdc_i2c_read_data); i++)
+-              pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS,
+-                                pdc_i2c_read_data[i].reg,
+-                                &spd0[pdc_i2c_read_data[i].ofs]);
++              if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS,
++                                     pdc_i2c_read_data[i].reg,
++                                     &spd0[pdc_i2c_read_data[i].ofs])) {
++                      dev_err(host->dev,
++                              "Failed in i2c read at index %d: device=%#x, reg=%#x\n",
++                              i, PDC_DIMM0_SPD_DEV_ADDRESS, pdc_i2c_read_data[i].reg);
++                      return -EIO;
++              }
+       data |= (spd0[4] - 8) | ((spd0[21] != 0) << 3) | ((spd0[3]-11) << 4);
+       data |= ((spd0[17] / 4) << 6) | ((spd0[5] / 2) << 7) |
+@@ -1284,6 +1289,8 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
+       /* Programming DIMM0 Module Control Register (index_CID0:80h) */
+       size = pdc20621_prog_dimm0(host);
++      if (size < 0)
++              return size;
+       dev_dbg(host->dev, "Local DIMM Size = %dMB\n", size);
+       /* Programming DIMM Module Global Control Register (index_CID0:88h) */
+-- 
+2.39.5
+
diff --git a/queue-6.12/cgroup-cpuset-enforce-at-most-one-rebuild_sched_doma.patch b/queue-6.12/cgroup-cpuset-enforce-at-most-one-rebuild_sched_doma.patch
new file mode 100644 (file)
index 0000000..6b12a38
--- /dev/null
@@ -0,0 +1,173 @@
+From dfec9762be414d6cb96d9dd919239b0b584355ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Nov 2024 21:50:22 -0500
+Subject: cgroup/cpuset: Enforce at most one rebuild_sched_domains_locked()
+ call per operation
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit a040c351283e3ac75422621ea205b1d8d687e108 ]
+
+Since commit ff0ce721ec21 ("cgroup/cpuset: Eliminate unncessary
+sched domains rebuilds in hotplug"), there is only one
+rebuild_sched_domains_locked() call per hotplug operation. However,
+writing to the various cpuset control files may still casue more than
+one rebuild_sched_domains_locked() call to happen in some cases.
+
+Juri had found that two rebuild_sched_domains_locked() calls in
+update_prstate(), one from update_cpumasks_hier() and another one from
+update_partition_sd_lb() could cause cpuset partition to be created
+with null total_bw for DL tasks. IOW, DL tasks may not be scheduled
+correctly in such a partition.
+
+A sample command sequence that can reproduce null total_bw is as
+follows.
+
+  # echo Y >/sys/kernel/debug/sched/verbose
+  # echo +cpuset >/sys/fs/cgroup/cgroup.subtree_control
+  # mkdir /sys/fs/cgroup/test
+  # echo 0-7 > /sys/fs/cgroup/test/cpuset.cpus
+  # echo 6-7 > /sys/fs/cgroup/test/cpuset.cpus.exclusive
+  # echo root >/sys/fs/cgroup/test/cpuset.cpus.partition
+
+Fix this double rebuild_sched_domains_locked() calls problem
+by replacing existing calls with cpuset_force_rebuild() except
+the rebuild_sched_domains_cpuslocked() call at the end of
+cpuset_handle_hotplug(). Checking of the force_sd_rebuild flag is
+now done at the end of cpuset_write_resmask() and update_prstate()
+to determine if rebuild_sched_domains_locked() should be called or not.
+
+The cpuset v1 code can still call rebuild_sched_domains_locked()
+directly as double rebuild_sched_domains_locked() calls is not possible.
+
+Reported-by: Juri Lelli <juri.lelli@redhat.com>
+Closes: https://lore.kernel.org/lkml/ZyuUcJDPBln1BK1Y@jlelli-thinkpadt14gen4.remote.csb/
+Signed-off-by: Waiman Long <longman@redhat.com>
+Tested-by: Juri Lelli <juri.lelli@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Stable-dep-of: a22b3d54de94 ("cgroup/cpuset: Fix race between newly created partition and dying one")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cpuset.c | 49 ++++++++++++++++++++++++++++--------------
+ 1 file changed, 33 insertions(+), 16 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 0012c34bb8601..7ac2a634128b3 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -84,9 +84,19 @@ static bool         have_boot_isolcpus;
+ static struct list_head remote_children;
+ /*
+- * A flag to force sched domain rebuild at the end of an operation while
+- * inhibiting it in the intermediate stages when set. Currently it is only
+- * set in hotplug code.
++ * A flag to force sched domain rebuild at the end of an operation.
++ * It can be set in
++ *  - update_partition_sd_lb()
++ *  - remote_partition_check()
++ *  - update_cpumasks_hier()
++ *  - cpuset_update_flag()
++ *  - cpuset_hotplug_update_tasks()
++ *  - cpuset_handle_hotplug()
++ *
++ * Protected by cpuset_mutex (with cpus_read_lock held) or cpus_write_lock.
++ *
++ * Note that update_relax_domain_level() in cpuset-v1.c can still call
++ * rebuild_sched_domains_locked() directly without using this flag.
+  */
+ static bool force_sd_rebuild;
+@@ -998,6 +1008,7 @@ void rebuild_sched_domains_locked(void)
+       lockdep_assert_cpus_held();
+       lockdep_assert_held(&cpuset_mutex);
++      force_sd_rebuild = false;
+       /*
+        * If we have raced with CPU hotplug, return early to avoid
+@@ -1172,8 +1183,8 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
+                       clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
+       }
+-      if (rebuild_domains && !force_sd_rebuild)
+-              rebuild_sched_domains_locked();
++      if (rebuild_domains)
++              cpuset_force_rebuild();
+ }
+ /*
+@@ -1530,8 +1541,8 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask,
+                       remote_partition_disable(child, tmp);
+                       disable_cnt++;
+               }
+-      if (disable_cnt && !force_sd_rebuild)
+-              rebuild_sched_domains_locked();
++      if (disable_cnt)
++              cpuset_force_rebuild();
+ }
+ /*
+@@ -2124,8 +2135,8 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
+       }
+       rcu_read_unlock();
+-      if (need_rebuild_sched_domains && !force_sd_rebuild)
+-              rebuild_sched_domains_locked();
++      if (need_rebuild_sched_domains)
++              cpuset_force_rebuild();
+ }
+ /**
+@@ -2744,9 +2755,13 @@ int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
+       cs->flags = trialcs->flags;
+       spin_unlock_irq(&callback_lock);
+-      if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed &&
+-          !force_sd_rebuild)
+-              rebuild_sched_domains_locked();
++      if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) {
++              if (!IS_ENABLED(CONFIG_CPUSETS_V1) ||
++                  cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
++                      cpuset_force_rebuild();
++              else
++                      rebuild_sched_domains_locked();
++      }
+       if (spread_flag_changed)
+               cpuset1_update_tasks_flags(cs);
+@@ -2866,6 +2881,8 @@ static int update_prstate(struct cpuset *cs, int new_prs)
+       update_partition_sd_lb(cs, old_prs);
+       notify_partition_change(cs, old_prs);
++      if (force_sd_rebuild)
++              rebuild_sched_domains_locked();
+       free_cpumasks(NULL, &tmpmask);
+       return 0;
+ }
+@@ -3136,6 +3153,8 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
+       }
+       free_cpuset(trialcs);
++      if (force_sd_rebuild)
++              rebuild_sched_domains_locked();
+ out_unlock:
+       mutex_unlock(&cpuset_mutex);
+       cpus_read_unlock();
+@@ -3879,11 +3898,9 @@ static void cpuset_handle_hotplug(void)
+               rcu_read_unlock();
+       }
+-      /* rebuild sched domains if cpus_allowed has changed */
+-      if (force_sd_rebuild) {
+-              force_sd_rebuild = false;
++      /* rebuild sched domains if necessary */
++      if (force_sd_rebuild)
+               rebuild_sched_domains_cpuslocked();
+-      }
+       free_cpumasks(NULL, ptmp);
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.12/cgroup-cpuset-fix-error-handling-in-remote_partition.patch b/queue-6.12/cgroup-cpuset-fix-error-handling-in-remote_partition.patch
new file mode 100644 (file)
index 0000000..c03e1d3
--- /dev/null
@@ -0,0 +1,102 @@
+From 404218837e9d61a1e8080c32dd86273693626a86 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 30 Mar 2025 17:52:41 -0400
+Subject: cgroup/cpuset: Fix error handling in remote_partition_disable()
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit 8bf450f3aec3d1bbd725d179502c64b8992588e4 ]
+
+When remote_partition_disable() is called to disable a remote partition,
+it always sets the partition to an invalid partition state. It should
+only do so if an error code (prs_err) has been set. Correct that and
+add proper error code in places where remote_partition_disable() is
+called due to error.
+
+Fixes: 181c8e091aae ("cgroup/cpuset: Introduce remote partition")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cpuset.c | 29 ++++++++++++++++++++---------
+ 1 file changed, 20 insertions(+), 9 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index f7ad5651c93db..70fac05123c6d 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1383,6 +1383,7 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
+       list_add(&cs->remote_sibling, &remote_children);
+       spin_unlock_irq(&callback_lock);
+       update_unbound_workqueue_cpumask(isolcpus_updated);
++      cs->prs_err = 0;
+       /*
+        * Proprogate changes in top_cpuset's effective_cpus down the hierarchy.
+@@ -1413,9 +1414,11 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
+       list_del_init(&cs->remote_sibling);
+       isolcpus_updated = partition_xcpus_del(cs->partition_root_state,
+                                              NULL, tmp->new_cpus);
+-      cs->partition_root_state = -cs->partition_root_state;
+-      if (!cs->prs_err)
+-              cs->prs_err = PERR_INVCPUS;
++      if (cs->prs_err)
++              cs->partition_root_state = -cs->partition_root_state;
++      else
++              cs->partition_root_state = PRS_MEMBER;
++
+       reset_partition_data(cs);
+       spin_unlock_irq(&callback_lock);
+       update_unbound_workqueue_cpumask(isolcpus_updated);
+@@ -1448,8 +1451,10 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
+       WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus));
+-      if (cpumask_empty(newmask))
++      if (cpumask_empty(newmask)) {
++              cs->prs_err = PERR_CPUSEMPTY;
+               goto invalidate;
++      }
+       adding   = cpumask_andnot(tmp->addmask, newmask, cs->effective_xcpus);
+       deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, newmask);
+@@ -1459,10 +1464,15 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
+        * not allocated to other partitions and there are effective_cpus
+        * left in the top cpuset.
+        */
+-      if (adding && (!capable(CAP_SYS_ADMIN) ||
+-                     cpumask_intersects(tmp->addmask, subpartitions_cpus) ||
+-                     cpumask_subset(top_cpuset.effective_cpus, tmp->addmask)))
+-              goto invalidate;
++      if (adding) {
++              if (!capable(CAP_SYS_ADMIN))
++                      cs->prs_err = PERR_ACCESS;
++              else if (cpumask_intersects(tmp->addmask, subpartitions_cpus) ||
++                       cpumask_subset(top_cpuset.effective_cpus, tmp->addmask))
++                      cs->prs_err = PERR_NOCPUS;
++              if (cs->prs_err)
++                      goto invalidate;
++      }
+       spin_lock_irq(&callback_lock);
+       if (adding)
+@@ -1578,7 +1588,7 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
+  * The partcmd_update command is used by update_cpumasks_hier() with newmask
+  * NULL and update_cpumask() with newmask set. The partcmd_invalidate is used
+  * by update_cpumask() with NULL newmask. In both cases, the callers won't
+- * check for error and so partition_root_state and prs_error will be updated
++ * check for error and so partition_root_state and prs_err will be updated
+  * directly.
+  */
+ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
+@@ -3726,6 +3736,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
+       if (remote && cpumask_empty(&new_cpus) &&
+           partition_is_populated(cs, NULL)) {
++              cs->prs_err = PERR_HOTPLUG;
+               remote_partition_disable(cs, tmp);
+               compute_effective_cpumask(&new_cpus, cs, parent);
+               remote = false;
+-- 
+2.39.5
+
diff --git a/queue-6.12/cgroup-cpuset-fix-incorrect-isolated_cpus-update-in-.patch b/queue-6.12/cgroup-cpuset-fix-incorrect-isolated_cpus-update-in-.patch
new file mode 100644 (file)
index 0000000..1e7bf84
--- /dev/null
@@ -0,0 +1,56 @@
+From b81b1f0815c00515c983706d1a70a44cff8c1972 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 30 Mar 2025 17:52:40 -0400
+Subject: cgroup/cpuset: Fix incorrect isolated_cpus update in
+ update_parent_effective_cpumask()
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit 668e041662e92ab3ebcb9eb606d3ec01884546ab ]
+
+Before commit f0af1bfc27b5 ("cgroup/cpuset: Relax constraints to
+partition & cpus changes"), a cpuset partition cannot be enabled if not
+all the requested CPUs can be granted from the parent cpuset. After
+that commit, a cpuset partition can be created even if the requested
+exclusive CPUs contain CPUs not allowed its parent.  The delmask
+containing exclusive CPUs to be removed from its parent wasn't
+adjusted accordingly.
+
+That is not a problem until the introduction of a new isolated_cpus
+mask in commit 11e5f407b64a ("cgroup/cpuset: Keep track of CPUs in
+isolated partitions") as the CPUs in the delmask may be added directly
+into isolated_cpus.
+
+As a result, isolated_cpus may incorrectly contain CPUs that are not
+isolated leading to incorrect data reporting. Fix this by adjusting
+the delmask to reflect the actual exclusive CPUs for the creation of
+the partition.
+
+Fixes: 11e5f407b64a ("cgroup/cpuset: Keep track of CPUs in isolated partitions")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cpuset.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 24ece85fd3b12..f7ad5651c93db 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1656,9 +1656,9 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
+               if (nocpu)
+                       return PERR_NOCPUS;
+-              cpumask_copy(tmp->delmask, xcpus);
+-              deleting = true;
+-              subparts_delta++;
++              deleting = cpumask_and(tmp->delmask, xcpus, parent->effective_xcpus);
++              if (deleting)
++                      subparts_delta++;
+               new_prs = (cmd == partcmd_enable) ? PRS_ROOT : PRS_ISOLATED;
+       } else if (cmd == partcmd_disable) {
+               /*
+-- 
+2.39.5
+
diff --git a/queue-6.12/cgroup-cpuset-fix-race-between-newly-created-partiti.patch b/queue-6.12/cgroup-cpuset-fix-race-between-newly-created-partiti.patch
new file mode 100644 (file)
index 0000000..08d046f
--- /dev/null
@@ -0,0 +1,144 @@
+From bcd649e716bee3dbdaf99ca56a639b8e639def7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 30 Mar 2025 17:52:39 -0400
+Subject: cgroup/cpuset: Fix race between newly created partition and dying one
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit a22b3d54de94f82ca057cc2ebf9496fa91ebf698 ]
+
+There is a possible race between removing a cgroup diectory that is
+a partition root and the creation of a new partition.  The partition
+to be removed can be dying but still online, it doesn't not currently
+participate in checking for exclusive CPUs conflict, but the exclusive
+CPUs are still there in subpartitions_cpus and isolated_cpus. These
+two cpumasks are global states that affect the operation of cpuset
+partitions. The exclusive CPUs in dying cpusets will only be removed
+when cpuset_css_offline() function is called after an RCU delay.
+
+As a result, it is possible that a new partition can be created with
+exclusive CPUs that overlap with those of a dying one. When that dying
+partition is finally offlined, it removes those overlapping exclusive
+CPUs from subpartitions_cpus and maybe isolated_cpus resulting in an
+incorrect CPU configuration.
+
+This bug was found when a warning was triggered in
+remote_partition_disable() during testing because the subpartitions_cpus
+mask was empty.
+
+One possible way to fix this is to iterate the dying cpusets as well and
+avoid using the exclusive CPUs in those dying cpusets. However, this
+can still cause random partition creation failures or other anomalies
+due to racing. A better way to fix this race is to reset the partition
+state at the moment when a cpuset is being killed.
+
+Introduce a new css_killed() CSS function pointer and call it, if
+defined, before setting CSS_DYING flag in kill_css(). Also update the
+css_is_dying() helper to use the CSS_DYING flag introduced by commit
+33c35aa48178 ("cgroup: Prevent kill_css() from being called more than
+once") for proper synchronization.
+
+Add a new cpuset_css_killed() function to reset the partition state of
+a valid partition root if it is being killed.
+
+Fixes: ee8dde0cd2ce ("cpuset: Add new v2 cpuset.sched.partition flag")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/cgroup-defs.h |  1 +
+ include/linux/cgroup.h      |  2 +-
+ kernel/cgroup/cgroup.c      |  6 ++++++
+ kernel/cgroup/cpuset.c      | 20 +++++++++++++++++---
+ 4 files changed, 25 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
+index 38b2af336e4a0..252eed781a6e9 100644
+--- a/include/linux/cgroup-defs.h
++++ b/include/linux/cgroup-defs.h
+@@ -711,6 +711,7 @@ struct cgroup_subsys {
+       void (*css_released)(struct cgroup_subsys_state *css);
+       void (*css_free)(struct cgroup_subsys_state *css);
+       void (*css_reset)(struct cgroup_subsys_state *css);
++      void (*css_killed)(struct cgroup_subsys_state *css);
+       void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
+       int (*css_extra_stat_show)(struct seq_file *seq,
+                                  struct cgroup_subsys_state *css);
+diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
+index f8ef47f8a634d..fc1324ed597d6 100644
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -343,7 +343,7 @@ static inline u64 cgroup_id(const struct cgroup *cgrp)
+  */
+ static inline bool css_is_dying(struct cgroup_subsys_state *css)
+ {
+-      return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
++      return css->flags & CSS_DYING;
+ }
+ static inline void cgroup_get(struct cgroup *cgrp)
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index 216535e055e11..4378f3eff25d2 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -5909,6 +5909,12 @@ static void kill_css(struct cgroup_subsys_state *css)
+       if (css->flags & CSS_DYING)
+               return;
++      /*
++       * Call css_killed(), if defined, before setting the CSS_DYING flag
++       */
++      if (css->ss->css_killed)
++              css->ss->css_killed(css);
++
+       css->flags |= CSS_DYING;
+       /*
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 07ea3a563150b..839f88ba17f7d 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -3479,9 +3479,6 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
+       cpus_read_lock();
+       mutex_lock(&cpuset_mutex);
+-      if (is_partition_valid(cs))
+-              update_prstate(cs, 0);
+-
+       if (!cpuset_v2() && is_sched_load_balance(cs))
+               cpuset_update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
+@@ -3492,6 +3489,22 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
+       cpus_read_unlock();
+ }
++static void cpuset_css_killed(struct cgroup_subsys_state *css)
++{
++      struct cpuset *cs = css_cs(css);
++
++      cpus_read_lock();
++      mutex_lock(&cpuset_mutex);
++
++      /* Reset valid partition back to member */
++      if (is_partition_valid(cs))
++              update_prstate(cs, PRS_MEMBER);
++
++      mutex_unlock(&cpuset_mutex);
++      cpus_read_unlock();
++
++}
++
+ static void cpuset_css_free(struct cgroup_subsys_state *css)
+ {
+       struct cpuset *cs = css_cs(css);
+@@ -3613,6 +3626,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
+       .css_alloc      = cpuset_css_alloc,
+       .css_online     = cpuset_css_online,
+       .css_offline    = cpuset_css_offline,
++      .css_killed     = cpuset_css_killed,
+       .css_free       = cpuset_css_free,
+       .can_attach     = cpuset_can_attach,
+       .cancel_attach  = cpuset_cancel_attach,
+-- 
+2.39.5
+
diff --git a/queue-6.12/cgroup-cpuset-further-optimize-code-if-config_cpuset.patch b/queue-6.12/cgroup-cpuset-further-optimize-code-if-config_cpuset.patch
new file mode 100644 (file)
index 0000000..3b1a795
--- /dev/null
@@ -0,0 +1,168 @@
+From afe6105bbd1857ec636b7b285c874a46e81edde0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Nov 2024 21:50:23 -0500
+Subject: cgroup/cpuset: Further optimize code if CONFIG_CPUSETS_V1 not set
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit c4c9cebe2fb9cdc73e55513de7af7a4f50260e88 ]
+
+Currently the cpuset code uses group_subsys_on_dfl() to check if we
+are running with cgroup v2. If CONFIG_CPUSETS_V1 isn't set, there is
+really no need to do this check and we can optimize out some of the
+unneeded v1 specific code paths. Introduce a new cpuset_v2() and use it
+to replace the cgroup_subsys_on_dfl() check to further optimize the
+code.
+
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Stable-dep-of: a22b3d54de94 ("cgroup/cpuset: Fix race between newly created partition and dying one")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cpuset.c | 39 +++++++++++++++++++--------------------
+ 1 file changed, 19 insertions(+), 20 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 7ac2a634128b3..07ea3a563150b 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -293,6 +293,12 @@ static inline void dec_attach_in_progress(struct cpuset *cs)
+       mutex_unlock(&cpuset_mutex);
+ }
++static inline bool cpuset_v2(void)
++{
++      return !IS_ENABLED(CONFIG_CPUSETS_V1) ||
++              cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
++}
++
+ /*
+  * Cgroup v2 behavior is used on the "cpus" and "mems" control files when
+  * on default hierarchy or when the cpuset_v2_mode flag is set by mounting
+@@ -303,7 +309,7 @@ static inline void dec_attach_in_progress(struct cpuset *cs)
+  */
+ static inline bool is_in_v2_mode(void)
+ {
+-      return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
++      return cpuset_v2() ||
+             (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
+ }
+@@ -738,7 +744,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
+       int nslot;              /* next empty doms[] struct cpumask slot */
+       struct cgroup_subsys_state *pos_css;
+       bool root_load_balance = is_sched_load_balance(&top_cpuset);
+-      bool cgrpv2 = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
++      bool cgrpv2 = cpuset_v2();
+       int nslot_update;
+       doms = NULL;
+@@ -1206,7 +1212,7 @@ static void reset_partition_data(struct cpuset *cs)
+ {
+       struct cpuset *parent = parent_cs(cs);
+-      if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
++      if (!cpuset_v2())
+               return;
+       lockdep_assert_held(&callback_lock);
+@@ -2035,7 +2041,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
+                */
+               if (!cp->partition_root_state && !force &&
+                   cpumask_equal(tmp->new_cpus, cp->effective_cpus) &&
+-                  (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
++                  (!cpuset_v2() ||
+                   (is_sched_load_balance(parent) == is_sched_load_balance(cp)))) {
+                       pos_css = css_rightmost_descendant(pos_css);
+                       continue;
+@@ -2109,8 +2115,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
+                * from parent if current cpuset isn't a valid partition root
+                * and their load balance states differ.
+                */
+-              if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+-                  !is_partition_valid(cp) &&
++              if (cpuset_v2() && !is_partition_valid(cp) &&
+                   (is_sched_load_balance(parent) != is_sched_load_balance(cp))) {
+                       if (is_sched_load_balance(parent))
+                               set_bit(CS_SCHED_LOAD_BALANCE, &cp->flags);
+@@ -2126,8 +2131,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
+                */
+               if (!cpumask_empty(cp->cpus_allowed) &&
+                   is_sched_load_balance(cp) &&
+-                 (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
+-                  is_partition_valid(cp)))
++                 (!cpuset_v2() || is_partition_valid(cp)))
+                       need_rebuild_sched_domains = true;
+               rcu_read_lock();
+@@ -2264,7 +2268,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+       retval = validate_change(cs, trialcs);
+-      if ((retval == -EINVAL) && cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
++      if ((retval == -EINVAL) && cpuset_v2()) {
+               struct cgroup_subsys_state *css;
+               struct cpuset *cp;
+@@ -2756,8 +2760,7 @@ int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
+       spin_unlock_irq(&callback_lock);
+       if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) {
+-              if (!IS_ENABLED(CONFIG_CPUSETS_V1) ||
+-                  cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
++              if (cpuset_v2())
+                       cpuset_force_rebuild();
+               else
+                       rebuild_sched_domains_locked();
+@@ -2943,8 +2946,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
+                * migration permission derives from hierarchy ownership in
+                * cgroup_procs_write_permission()).
+                */
+-              if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
+-                  (cpus_updated || mems_updated)) {
++              if (!cpuset_v2() || (cpus_updated || mems_updated)) {
+                       ret = security_task_setscheduler(task);
+                       if (ret)
+                               goto out_unlock;
+@@ -3058,8 +3060,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+        * in effective cpus and mems. In that case, we can optimize out
+        * by skipping the task iteration and update.
+        */
+-      if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+-          !cpus_updated && !mems_updated) {
++      if (cpuset_v2() && !cpus_updated && !mems_updated) {
+               cpuset_attach_nodemask_to = cs->effective_mems;
+               goto out;
+       }
+@@ -3384,7 +3385,7 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
+       INIT_LIST_HEAD(&cs->remote_sibling);
+       /* Set CS_MEMORY_MIGRATE for default hierarchy */
+-      if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
++      if (cpuset_v2())
+               __set_bit(CS_MEMORY_MIGRATE, &cs->flags);
+       return &cs->css;
+@@ -3411,8 +3412,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
+       /*
+        * For v2, clear CS_SCHED_LOAD_BALANCE if parent is isolated
+        */
+-      if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+-          !is_sched_load_balance(parent))
++      if (cpuset_v2() && !is_sched_load_balance(parent))
+               clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
+       cpuset_inc();
+@@ -3482,8 +3482,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
+       if (is_partition_valid(cs))
+               update_prstate(cs, 0);
+-      if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+-          is_sched_load_balance(cs))
++      if (!cpuset_v2() && is_sched_load_balance(cs))
+               cpuset_update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
+       cpuset_dec();
+-- 
+2.39.5
+
diff --git a/queue-6.12/cgroup-cpuset-revert-allow-suppression-of-sched-doma.patch b/queue-6.12/cgroup-cpuset-revert-allow-suppression-of-sched-doma.patch
new file mode 100644 (file)
index 0000000..455201c
--- /dev/null
@@ -0,0 +1,162 @@
+From 9504cb2ff3491769f43d10a648dae93bce2cd795 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Nov 2024 21:50:21 -0500
+Subject: cgroup/cpuset: Revert "Allow suppression of sched domain rebuild in
+ update_cpumasks_hier()"
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit bcd7012afd7bcd45fcd7a0e2f48e57b273702317 ]
+
+Revert commit 3ae0b773211e ("cgroup/cpuset: Allow suppression of sched
+domain rebuild in update_cpumasks_hier()") to allow for an alternative
+way to suppress unnecessary rebuild_sched_domains_locked() calls in
+update_cpumasks_hier() and elsewhere in a following commit.
+
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Stable-dep-of: a22b3d54de94 ("cgroup/cpuset: Fix race between newly created partition and dying one")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cpuset.c | 39 ++++++++++++++-------------------------
+ 1 file changed, 14 insertions(+), 25 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 70fac05123c6d..0012c34bb8601 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1940,12 +1940,6 @@ static void compute_partition_effective_cpumask(struct cpuset *cs,
+       rcu_read_unlock();
+ }
+-/*
+- * update_cpumasks_hier() flags
+- */
+-#define HIER_CHECKALL         0x01    /* Check all cpusets with no skipping */
+-#define HIER_NO_SD_REBUILD    0x02    /* Don't rebuild sched domains */
+-
+ /*
+  * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
+  * @cs:  the cpuset to consider
+@@ -1960,7 +1954,7 @@ static void compute_partition_effective_cpumask(struct cpuset *cs,
+  * Called with cpuset_mutex held
+  */
+ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
+-                               int flags)
++                               bool force)
+ {
+       struct cpuset *cp;
+       struct cgroup_subsys_state *pos_css;
+@@ -2025,10 +2019,10 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
+                * Skip the whole subtree if
+                * 1) the cpumask remains the same,
+                * 2) has no partition root state,
+-               * 3) HIER_CHECKALL flag not set, and
++               * 3) force flag not set, and
+                * 4) for v2 load balance state same as its parent.
+                */
+-              if (!cp->partition_root_state && !(flags & HIER_CHECKALL) &&
++              if (!cp->partition_root_state && !force &&
+                   cpumask_equal(tmp->new_cpus, cp->effective_cpus) &&
+                   (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
+                   (is_sched_load_balance(parent) == is_sched_load_balance(cp)))) {
+@@ -2130,8 +2124,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
+       }
+       rcu_read_unlock();
+-      if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD) &&
+-          !force_sd_rebuild)
++      if (need_rebuild_sched_domains && !force_sd_rebuild)
+               rebuild_sched_domains_locked();
+ }
+@@ -2159,9 +2152,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
+        * directly.
+        *
+        * The update_cpumasks_hier() function may sleep. So we have to
+-       * release the RCU read lock before calling it. HIER_NO_SD_REBUILD
+-       * flag is used to suppress rebuild of sched domains as the callers
+-       * will take care of that.
++       * release the RCU read lock before calling it.
+        */
+       rcu_read_lock();
+       cpuset_for_each_child(sibling, pos_css, parent) {
+@@ -2177,7 +2168,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
+                       continue;
+               rcu_read_unlock();
+-              update_cpumasks_hier(sibling, tmp, HIER_NO_SD_REBUILD);
++              update_cpumasks_hier(sibling, tmp, false);
+               rcu_read_lock();
+               css_put(&sibling->css);
+       }
+@@ -2197,7 +2188,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+       struct tmpmasks tmp;
+       struct cpuset *parent = parent_cs(cs);
+       bool invalidate = false;
+-      int hier_flags = 0;
++      bool force = false;
+       int old_prs = cs->partition_root_state;
+       /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
+@@ -2258,8 +2249,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+        * Check all the descendants in update_cpumasks_hier() if
+        * effective_xcpus is to be changed.
+        */
+-      if (!cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus))
+-              hier_flags = HIER_CHECKALL;
++      force = !cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus);
+       retval = validate_change(cs, trialcs);
+@@ -2327,7 +2317,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+       spin_unlock_irq(&callback_lock);
+       /* effective_cpus/effective_xcpus will be updated here */
+-      update_cpumasks_hier(cs, &tmp, hier_flags);
++      update_cpumasks_hier(cs, &tmp, force);
+       /* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */
+       if (cs->partition_root_state)
+@@ -2352,7 +2342,7 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+       struct tmpmasks tmp;
+       struct cpuset *parent = parent_cs(cs);
+       bool invalidate = false;
+-      int hier_flags = 0;
++      bool force = false;
+       int old_prs = cs->partition_root_state;
+       if (!*buf) {
+@@ -2375,8 +2365,7 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+        * Check all the descendants in update_cpumasks_hier() if
+        * effective_xcpus is to be changed.
+        */
+-      if (!cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus))
+-              hier_flags = HIER_CHECKALL;
++      force = !cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus);
+       retval = validate_change(cs, trialcs);
+       if (retval)
+@@ -2429,8 +2418,8 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+        * of the subtree when it is a valid partition root or effective_xcpus
+        * is updated.
+        */
+-      if (is_partition_valid(cs) || hier_flags)
+-              update_cpumasks_hier(cs, &tmp, hier_flags);
++      if (is_partition_valid(cs) || force)
++              update_cpumasks_hier(cs, &tmp, force);
+       /* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */
+       if (cs->partition_root_state)
+@@ -2871,7 +2860,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
+       update_unbound_workqueue_cpumask(new_xcpus_state);
+       /* Force update if switching back to member */
+-      update_cpumasks_hier(cs, &tmpmask, !new_prs ? HIER_CHECKALL : 0);
++      update_cpumasks_hier(cs, &tmpmask, !new_prs);
+       /* Update sched domains and load balance flag */
+       update_partition_sd_lb(cs, old_prs);
+-- 
+2.39.5
+
diff --git a/queue-6.12/codel-remove-sch-q.qlen-check-before-qdisc_tree_redu.patch b/queue-6.12/codel-remove-sch-q.qlen-check-before-qdisc_tree_redu.patch
new file mode 100644 (file)
index 0000000..2f9e201
--- /dev/null
@@ -0,0 +1,63 @@
+From 988f1acdfd23857c1b73b1e6358b88adbe7b8c92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Apr 2025 14:16:31 -0700
+Subject: codel: remove sch->q.qlen check before qdisc_tree_reduce_backlog()
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 342debc12183b51773b3345ba267e9263bdfaaef ]
+
+After making all ->qlen_notify() callbacks idempotent, now it is safe to
+remove the check of qlen!=0 from both fq_codel_dequeue() and
+codel_qdisc_dequeue().
+
+Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
+Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM")
+Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM")
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250403211636.166257-1-xiyou.wangcong@gmail.com
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_codel.c    | 5 +----
+ net/sched/sch_fq_codel.c | 6 ++----
+ 2 files changed, 3 insertions(+), 8 deletions(-)
+
+diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
+index 3e8d4fe4d91e3..e1f6e7618debd 100644
+--- a/net/sched/sch_codel.c
++++ b/net/sched/sch_codel.c
+@@ -65,10 +65,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
+                           &q->stats, qdisc_pkt_len, codel_get_enqueue_time,
+                           drop_func, dequeue_func);
+-      /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
+-       * or HTB crashes. Defer it for next round.
+-       */
+-      if (q->stats.drop_count && sch->q.qlen) {
++      if (q->stats.drop_count) {
+               qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
+               q->stats.drop_count = 0;
+               q->stats.drop_len = 0;
+diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
+index 4f908c11ba952..778f6e5966be8 100644
+--- a/net/sched/sch_fq_codel.c
++++ b/net/sched/sch_fq_codel.c
+@@ -314,10 +314,8 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
+       }
+       qdisc_bstats_update(sch, skb);
+       flow->deficit -= qdisc_pkt_len(skb);
+-      /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
+-       * or HTB crashes. Defer it for next round.
+-       */
+-      if (q->cstats.drop_count && sch->q.qlen) {
++
++      if (q->cstats.drop_count) {
+               qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
+                                         q->cstats.drop_len);
+               q->cstats.drop_count = 0;
+-- 
+2.39.5
+
diff --git a/queue-6.12/drm-i915-disable-rpg-during-live-selftest.patch b/queue-6.12/drm-i915-disable-rpg-during-live-selftest.patch
new file mode 100644 (file)
index 0000000..7645a88
--- /dev/null
@@ -0,0 +1,110 @@
+From d7645673374a4accc1b7510c61aa9f8f6de67b82 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Mar 2025 20:58:21 +0530
+Subject: drm/i915: Disable RPG during live selftest
+
+From: Badal Nilawar <badal.nilawar@intel.com>
+
+[ Upstream commit 9d3d9776bd3bd9c32d460dfe6c3363134de578bc ]
+
+The Forcewake timeout issue has been observed on Gen 12.0 and above.
+To address this, disable Render Power-Gating (RPG) during live self-tests
+for these generations. The temporary workaround 'drm/i915/mtl: do not
+enable render power-gating on MTL' disables RPG globally, which is
+unnecessary since the issues were only seen during self-tests.
+
+v2: take runtime pm wakeref
+
+Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/9413
+Fixes: 25e7976db86b ("drm/i915/mtl: do not enable render power-gating on MTL")
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Cc: Andi Shyti <andi.shyti@intel.com>
+Cc: Andrzej Hajda <andrzej.hajda@intel.com>
+Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
+Signed-off-by: Sk Anirban <sk.anirban@intel.com>
+Reviewed-by: Karthik Poosa <karthik.poosa@intel.com>
+Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20250310152821.2931678-1-sk.anirban@intel.com
+(cherry picked from commit 0a4ae87706c6d15d14648e428c3a76351f823e48)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/intel_rc6.c           | 19 ++++---------------
+ .../gpu/drm/i915/selftests/i915_selftest.c    | 18 ++++++++++++++++++
+ 2 files changed, 22 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
+index 9378d5901c493..9ca42589da4da 100644
+--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
++++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
+@@ -117,21 +117,10 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
+                       GEN6_RC_CTL_RC6_ENABLE |
+                       GEN6_RC_CTL_EI_MODE(1);
+-      /*
+-       * BSpec 52698 - Render powergating must be off.
+-       * FIXME BSpec is outdated, disabling powergating for MTL is just
+-       * temporary wa and should be removed after fixing real cause
+-       * of forcewake timeouts.
+-       */
+-      if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
+-              pg_enable =
+-                      GEN9_MEDIA_PG_ENABLE |
+-                      GEN11_MEDIA_SAMPLER_PG_ENABLE;
+-      else
+-              pg_enable =
+-                      GEN9_RENDER_PG_ENABLE |
+-                      GEN9_MEDIA_PG_ENABLE |
+-                      GEN11_MEDIA_SAMPLER_PG_ENABLE;
++      pg_enable =
++              GEN9_RENDER_PG_ENABLE |
++              GEN9_MEDIA_PG_ENABLE |
++              GEN11_MEDIA_SAMPLER_PG_ENABLE;
+       if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) {
+               for (i = 0; i < I915_MAX_VCS; i++)
+diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
+index fee76c1d2f450..889281819c5b1 100644
+--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
++++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
+@@ -23,7 +23,9 @@
+ #include <linux/random.h>
++#include "gt/intel_gt.h"
+ #include "gt/intel_gt_pm.h"
++#include "gt/intel_gt_regs.h"
+ #include "gt/uc/intel_gsc_fw.h"
+ #include "i915_driver.h"
+@@ -253,11 +255,27 @@ int i915_mock_selftests(void)
+ int i915_live_selftests(struct pci_dev *pdev)
+ {
+       struct drm_i915_private *i915 = pdev_to_i915(pdev);
++      struct intel_uncore *uncore = &i915->uncore;
+       int err;
++      u32 pg_enable;
++      intel_wakeref_t wakeref;
+       if (!i915_selftest.live)
+               return 0;
++      /*
++       * FIXME Disable render powergating, this is temporary wa and should be removed
++       * after fixing real cause of forcewake timeouts.
++       */
++      with_intel_runtime_pm(uncore->rpm, wakeref) {
++              if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 00), IP_VER(12, 74))) {
++                      pg_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE);
++                      if (pg_enable & GEN9_RENDER_PG_ENABLE)
++                              intel_uncore_write_fw(uncore, GEN9_PG_ENABLE,
++                                                    pg_enable & ~GEN9_RENDER_PG_ENABLE);
++              }
++      }
++
+       __wait_gsc_proxy_completed(i915);
+       __wait_gsc_huc_load_completed(i915);
+-- 
+2.39.5
+
diff --git a/queue-6.12/drm-i915-huc-fix-fence-not-released-on-early-probe-e.patch b/queue-6.12/drm-i915-huc-fix-fence-not-released-on-early-probe-e.patch
new file mode 100644 (file)
index 0000000..49b9161
--- /dev/null
@@ -0,0 +1,142 @@
+From d2bd289e3163073ab1f9ea60ba2257c040014692 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Apr 2025 19:20:57 +0200
+Subject: drm/i915/huc: Fix fence not released on early probe errors
+
+From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
+
+[ Upstream commit e3ea2eae70692a455e256787e4f54153fb739b90 ]
+
+HuC delayed loading fence, introduced with commit 27536e03271da
+("drm/i915/huc: track delayed HuC load with a fence"), is registered with
+object tracker early on driver probe but unregistered only from driver
+remove, which is not called on early probe errors.  Since its memory is
+allocated under devres, then released anyway, it may happen to be
+allocated again to the fence and reused on future driver probes, resulting
+in kernel warnings that taint the kernel:
+
+<4> [309.731371] ------------[ cut here ]------------
+<3> [309.731373] ODEBUG: init destroyed (active state 0) object: ffff88813d7dd2e0 object type: i915_sw_fence hint: sw_fence_dummy_notify+0x0/0x20 [i915]
+<4> [309.731575] WARNING: CPU: 2 PID: 3161 at lib/debugobjects.c:612 debug_print_object+0x93/0xf0
+...
+<4> [309.731693] CPU: 2 UID: 0 PID: 3161 Comm: i915_module_loa Tainted: G     U             6.14.0-CI_DRM_16362-gf0fd77956987+ #1
+...
+<4> [309.731700] RIP: 0010:debug_print_object+0x93/0xf0
+...
+<4> [309.731728] Call Trace:
+<4> [309.731730]  <TASK>
+...
+<4> [309.731949]  __debug_object_init+0x17b/0x1c0
+<4> [309.731957]  debug_object_init+0x34/0x50
+<4> [309.732126]  __i915_sw_fence_init+0x34/0x60 [i915]
+<4> [309.732256]  intel_huc_init_early+0x4b/0x1d0 [i915]
+<4> [309.732468]  intel_uc_init_early+0x61/0x680 [i915]
+<4> [309.732667]  intel_gt_common_init_early+0x105/0x130 [i915]
+<4> [309.732804]  intel_root_gt_init_early+0x63/0x80 [i915]
+<4> [309.732938]  i915_driver_probe+0x1fa/0xeb0 [i915]
+<4> [309.733075]  i915_pci_probe+0xe6/0x220 [i915]
+<4> [309.733198]  local_pci_probe+0x44/0xb0
+<4> [309.733203]  pci_device_probe+0xf4/0x270
+<4> [309.733209]  really_probe+0xee/0x3c0
+<4> [309.733215]  __driver_probe_device+0x8c/0x180
+<4> [309.733219]  driver_probe_device+0x24/0xd0
+<4> [309.733223]  __driver_attach+0x10f/0x220
+<4> [309.733230]  bus_for_each_dev+0x7d/0xe0
+<4> [309.733236]  driver_attach+0x1e/0x30
+<4> [309.733239]  bus_add_driver+0x151/0x290
+<4> [309.733244]  driver_register+0x5e/0x130
+<4> [309.733247]  __pci_register_driver+0x7d/0x90
+<4> [309.733251]  i915_pci_register_driver+0x23/0x30 [i915]
+<4> [309.733413]  i915_init+0x34/0x120 [i915]
+<4> [309.733655]  do_one_initcall+0x62/0x3f0
+<4> [309.733667]  do_init_module+0x97/0x2a0
+<4> [309.733671]  load_module+0x25ff/0x2890
+<4> [309.733688]  init_module_from_file+0x97/0xe0
+<4> [309.733701]  idempotent_init_module+0x118/0x330
+<4> [309.733711]  __x64_sys_finit_module+0x77/0x100
+<4> [309.733715]  x64_sys_call+0x1f37/0x2650
+<4> [309.733719]  do_syscall_64+0x91/0x180
+<4> [309.733763]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
+<4> [309.733792]  </TASK>
+...
+<4> [309.733806] ---[ end trace 0000000000000000 ]---
+
+That scenario is most easily reproducible with
+igt@i915_module_load@reload-with-fault-injection.
+
+Fix the issue by moving the cleanup step to driver release path.
+
+Fixes: 27536e03271da ("drm/i915/huc: track delayed HuC load with a fence")
+Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13592
+Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
+Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
+Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Reviewed-by: Krzysztof Karas <krzysztof.karas@intel.com>
+Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Link: https://lore.kernel.org/r/20250402172057.209924-2-janusz.krzysztofik@linux.intel.com
+(cherry picked from commit 795dbde92fe5c6996a02a5b579481de73035e7bf)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/uc/intel_huc.c | 11 +++++------
+ drivers/gpu/drm/i915/gt/uc/intel_huc.h |  1 +
+ drivers/gpu/drm/i915/gt/uc/intel_uc.c  |  1 +
+ 3 files changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+index 2d9152eb72825..24fdce844d9e3 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+@@ -317,6 +317,11 @@ void intel_huc_init_early(struct intel_huc *huc)
+       }
+ }
++void intel_huc_fini_late(struct intel_huc *huc)
++{
++      delayed_huc_load_fini(huc);
++}
++
+ #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy")
+ static int check_huc_loading_mode(struct intel_huc *huc)
+ {
+@@ -414,12 +419,6 @@ int intel_huc_init(struct intel_huc *huc)
+ void intel_huc_fini(struct intel_huc *huc)
+ {
+-      /*
+-       * the fence is initialized in init_early, so we need to clean it up
+-       * even if HuC loading is off.
+-       */
+-      delayed_huc_load_fini(huc);
+-
+       if (huc->heci_pkt)
+               i915_vma_unpin_and_release(&huc->heci_pkt, 0);
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+index ba5cb08e9e7bf..09aff3148f7dd 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
++++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+@@ -55,6 +55,7 @@ struct intel_huc {
+ int intel_huc_sanitize(struct intel_huc *huc);
+ void intel_huc_init_early(struct intel_huc *huc);
++void intel_huc_fini_late(struct intel_huc *huc);
+ int intel_huc_init(struct intel_huc *huc);
+ void intel_huc_fini(struct intel_huc *huc);
+ void intel_huc_suspend(struct intel_huc *huc);
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+index 5b8080ec5315b..4f751ce74214d 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+@@ -136,6 +136,7 @@ void intel_uc_init_late(struct intel_uc *uc)
+ void intel_uc_driver_late_release(struct intel_uc *uc)
+ {
++      intel_huc_fini_late(&uc->huc);
+ }
+ /**
+-- 
+2.39.5
+
diff --git a/queue-6.12/drm-tests-cmdline-fix-drm_display_mode-memory-leak.patch b/queue-6.12/drm-tests-cmdline-fix-drm_display_mode-memory-leak.patch
new file mode 100644 (file)
index 0000000..9553bb6
--- /dev/null
@@ -0,0 +1,58 @@
+From 04f2ceaecd9aba9f68bdea16ce349b6e412b7703 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 09:34:10 +0200
+Subject: drm/tests: cmdline: Fix drm_display_mode memory leak
+
+From: Maxime Ripard <mripard@kernel.org>
+
+[ Upstream commit 70f29ca3117a8796cd6bde7612a3ded96d0f2dde ]
+
+drm_analog_tv_mode() and its variants return a drm_display_mode that
+needs to be destroyed later one. The drm_test_cmdline_tv_options() test
+never does however, which leads to a memory leak.
+
+Let's make sure it's freed.
+
+Reported-by: Philipp Stanner <phasta@mailbox.org>
+Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/
+Fixes: e691c9992ae1 ("drm/modes: Introduce the tv_mode property as a command-line option")
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-4-996305a2e75a@kernel.org
+Signed-off-by: Maxime Ripard <mripard@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/tests/drm_cmdline_parser_test.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/tests/drm_cmdline_parser_test.c b/drivers/gpu/drm/tests/drm_cmdline_parser_test.c
+index 59c8408c453c2..1cfcb597b088b 100644
+--- a/drivers/gpu/drm/tests/drm_cmdline_parser_test.c
++++ b/drivers/gpu/drm/tests/drm_cmdline_parser_test.c
+@@ -7,6 +7,7 @@
+ #include <kunit/test.h>
+ #include <drm/drm_connector.h>
++#include <drm/drm_kunit_helpers.h>
+ #include <drm/drm_modes.h>
+ static const struct drm_connector no_connector = {};
+@@ -955,8 +956,15 @@ struct drm_cmdline_tv_option_test {
+ static void drm_test_cmdline_tv_options(struct kunit *test)
+ {
+       const struct drm_cmdline_tv_option_test *params = test->param_value;
+-      const struct drm_display_mode *expected_mode = params->mode_fn(NULL);
++      struct drm_display_mode *expected_mode;
+       struct drm_cmdline_mode mode = { };
++      int ret;
++
++      expected_mode = params->mode_fn(NULL);
++      KUNIT_ASSERT_NOT_NULL(test, expected_mode);
++
++      ret = drm_kunit_add_mode_destroy_action(test, expected_mode);
++      KUNIT_ASSERT_EQ(test, ret, 0);
+       KUNIT_EXPECT_TRUE(test, drm_mode_parse_command_line_for_connector(params->cmdline,
+                                                                         &no_connector, &mode));
+-- 
+2.39.5
+
diff --git a/queue-6.12/drm-tests-helpers-create-kunit-helper-to-destroy-a-d.patch b/queue-6.12/drm-tests-helpers-create-kunit-helper-to-destroy-a-d.patch
new file mode 100644 (file)
index 0000000..2ef014d
--- /dev/null
@@ -0,0 +1,78 @@
+From 4fa7c6c9a0102c7364c28013c8d9d5dc613cbb81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 09:34:07 +0200
+Subject: drm/tests: helpers: Create kunit helper to destroy a drm_display_mode
+
+From: Maxime Ripard <mripard@kernel.org>
+
+[ Upstream commit 13c1d5f3a7fa7b55a26e73bb9e95342374a489b2 ]
+
+A number of test suites call functions that expect the returned
+drm_display_mode to be destroyed eventually.
+
+However, none of the tests called drm_mode_destroy, which results in a
+memory leak.
+
+Since drm_mode_destroy takes two pointers as argument, we can't use a
+kunit wrapper. Let's just create a helper every test suite can use.
+
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-1-996305a2e75a@kernel.org
+Signed-off-by: Maxime Ripard <mripard@kernel.org>
+Stable-dep-of: 70f29ca3117a ("drm/tests: cmdline: Fix drm_display_mode memory leak")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/tests/drm_kunit_helpers.c | 22 ++++++++++++++++++++++
+ include/drm/drm_kunit_helpers.h           |  3 +++
+ 2 files changed, 25 insertions(+)
+
+diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c
+index 3c0b7824c0be3..922c4b6ed1dc9 100644
+--- a/drivers/gpu/drm/tests/drm_kunit_helpers.c
++++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c
+@@ -319,6 +319,28 @@ static void kunit_action_drm_mode_destroy(void *ptr)
+       drm_mode_destroy(NULL, mode);
+ }
++/**
++ * drm_kunit_add_mode_destroy_action() - Add a drm_destroy_mode kunit action
++ * @test: The test context object
++ * @mode: The drm_display_mode to destroy eventually
++ *
++ * Registers a kunit action that will destroy the drm_display_mode at
++ * the end of the test.
++ *
++ * If an error occurs, the drm_display_mode will be destroyed.
++ *
++ * Returns:
++ * 0 on success, an error code otherwise.
++ */
++int drm_kunit_add_mode_destroy_action(struct kunit *test,
++                                    struct drm_display_mode *mode)
++{
++      return kunit_add_action_or_reset(test,
++                                       kunit_action_drm_mode_destroy,
++                                       mode);
++}
++EXPORT_SYMBOL_GPL(drm_kunit_add_mode_destroy_action);
++
+ /**
+  * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC for a KUnit test
+  * @test: The test context object
+diff --git a/include/drm/drm_kunit_helpers.h b/include/drm/drm_kunit_helpers.h
+index afdd46ef04f70..c835f113055dc 100644
+--- a/include/drm/drm_kunit_helpers.h
++++ b/include/drm/drm_kunit_helpers.h
+@@ -120,6 +120,9 @@ drm_kunit_helper_create_crtc(struct kunit *test,
+                            const struct drm_crtc_funcs *funcs,
+                            const struct drm_crtc_helper_funcs *helper_funcs);
++int drm_kunit_add_mode_destroy_action(struct kunit *test,
++                                    struct drm_display_mode *mode);
++
+ struct drm_display_mode *
+ drm_kunit_display_mode_from_cea_vic(struct kunit *test, struct drm_device *dev,
+                                   u8 video_code);
+-- 
+2.39.5
+
diff --git a/queue-6.12/drm-tests-modes-fix-drm_display_mode-memory-leak.patch b/queue-6.12/drm-tests-modes-fix-drm_display_mode-memory-leak.patch
new file mode 100644 (file)
index 0000000..f13ef77
--- /dev/null
@@ -0,0 +1,117 @@
+From 9cae88f438ed0f71a796bdfd72f6b4cd51d649c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 09:34:11 +0200
+Subject: drm/tests: modes: Fix drm_display_mode memory leak
+
+From: Maxime Ripard <mripard@kernel.org>
+
+[ Upstream commit d34146340f95cd9bf06d4ce71cca72127dc0b7cd ]
+
+drm_analog_tv_mode() and its variants return a drm_display_mode that
+needs to be destroyed later one. The drm_modes_analog_tv tests never
+do however, which leads to a memory leak.
+
+Let's make sure it's freed.
+
+Reported-by: Philipp Stanner <phasta@mailbox.org>
+Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/
+Fixes: 4fcd238560ee ("drm/modes: Add a function to generate analog display modes")
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-5-996305a2e75a@kernel.org
+Signed-off-by: Maxime Ripard <mripard@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/tests/drm_modes_test.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/drivers/gpu/drm/tests/drm_modes_test.c b/drivers/gpu/drm/tests/drm_modes_test.c
+index 6ed51f99e133c..7ba646d87856f 100644
+--- a/drivers/gpu/drm/tests/drm_modes_test.c
++++ b/drivers/gpu/drm/tests/drm_modes_test.c
+@@ -40,6 +40,7 @@ static void drm_test_modes_analog_tv_ntsc_480i(struct kunit *test)
+ {
+       struct drm_test_modes_priv *priv = test->priv;
+       struct drm_display_mode *mode;
++      int ret;
+       mode = drm_analog_tv_mode(priv->drm,
+                                 DRM_MODE_TV_MODE_NTSC,
+@@ -47,6 +48,9 @@ static void drm_test_modes_analog_tv_ntsc_480i(struct kunit *test)
+                                 true);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
++      ret = drm_kunit_add_mode_destroy_action(test, mode);
++      KUNIT_ASSERT_EQ(test, ret, 0);
++
+       KUNIT_EXPECT_EQ(test, drm_mode_vrefresh(mode), 60);
+       KUNIT_EXPECT_EQ(test, mode->hdisplay, 720);
+@@ -70,6 +74,7 @@ static void drm_test_modes_analog_tv_ntsc_480i_inlined(struct kunit *test)
+ {
+       struct drm_test_modes_priv *priv = test->priv;
+       struct drm_display_mode *expected, *mode;
++      int ret;
+       expected = drm_analog_tv_mode(priv->drm,
+                                     DRM_MODE_TV_MODE_NTSC,
+@@ -77,9 +82,15 @@ static void drm_test_modes_analog_tv_ntsc_480i_inlined(struct kunit *test)
+                                     true);
+       KUNIT_ASSERT_NOT_NULL(test, expected);
++      ret = drm_kunit_add_mode_destroy_action(test, expected);
++      KUNIT_ASSERT_EQ(test, ret, 0);
++
+       mode = drm_mode_analog_ntsc_480i(priv->drm);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
++      ret = drm_kunit_add_mode_destroy_action(test, mode);
++      KUNIT_ASSERT_EQ(test, ret, 0);
++
+       KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected, mode));
+ }
+@@ -87,6 +98,7 @@ static void drm_test_modes_analog_tv_pal_576i(struct kunit *test)
+ {
+       struct drm_test_modes_priv *priv = test->priv;
+       struct drm_display_mode *mode;
++      int ret;
+       mode = drm_analog_tv_mode(priv->drm,
+                                 DRM_MODE_TV_MODE_PAL,
+@@ -94,6 +106,9 @@ static void drm_test_modes_analog_tv_pal_576i(struct kunit *test)
+                                 true);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
++      ret = drm_kunit_add_mode_destroy_action(test, mode);
++      KUNIT_ASSERT_EQ(test, ret, 0);
++
+       KUNIT_EXPECT_EQ(test, drm_mode_vrefresh(mode), 50);
+       KUNIT_EXPECT_EQ(test, mode->hdisplay, 720);
+@@ -117,6 +132,7 @@ static void drm_test_modes_analog_tv_pal_576i_inlined(struct kunit *test)
+ {
+       struct drm_test_modes_priv *priv = test->priv;
+       struct drm_display_mode *expected, *mode;
++      int ret;
+       expected = drm_analog_tv_mode(priv->drm,
+                                     DRM_MODE_TV_MODE_PAL,
+@@ -124,9 +140,15 @@ static void drm_test_modes_analog_tv_pal_576i_inlined(struct kunit *test)
+                                     true);
+       KUNIT_ASSERT_NOT_NULL(test, expected);
++      ret = drm_kunit_add_mode_destroy_action(test, expected);
++      KUNIT_ASSERT_EQ(test, ret, 0);
++
+       mode = drm_mode_analog_pal_576i(priv->drm);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
++      ret = drm_kunit_add_mode_destroy_action(test, mode);
++      KUNIT_ASSERT_EQ(test, ret, 0);
++
+       KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected, mode));
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.12/drm-tests-modeset-fix-drm_display_mode-memory-leak.patch b/queue-6.12/drm-tests-modeset-fix-drm_display_mode-memory-leak.patch
new file mode 100644 (file)
index 0000000..fb54f0f
--- /dev/null
@@ -0,0 +1,43 @@
+From 7e26f2a6191fd119378fbea6d5a983d0ba2f994f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 09:34:08 +0200
+Subject: drm/tests: modeset: Fix drm_display_mode memory leak
+
+From: Maxime Ripard <mripard@kernel.org>
+
+[ Upstream commit dacafdcc7789cfeb0f0552716db56f210238225d ]
+
+drm_mode_find_dmt() returns a drm_display_mode that needs to be
+destroyed later one. The drm_test_pick_cmdline_res_1920_1080_60() test
+never does however, which leads to a memory leak.
+
+Let's make sure it's freed.
+
+Reported-by: Philipp Stanner <phasta@mailbox.org>
+Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/
+Fixes: 8fc0380f6ba7 ("drm/client: Add some tests for drm_connector_pick_cmdline_mode()")
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-2-996305a2e75a@kernel.org
+Signed-off-by: Maxime Ripard <mripard@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/tests/drm_client_modeset_test.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/gpu/drm/tests/drm_client_modeset_test.c b/drivers/gpu/drm/tests/drm_client_modeset_test.c
+index 7516f6cb36e4e..3e9518d7b8b7e 100644
+--- a/drivers/gpu/drm/tests/drm_client_modeset_test.c
++++ b/drivers/gpu/drm/tests/drm_client_modeset_test.c
+@@ -95,6 +95,9 @@ static void drm_test_pick_cmdline_res_1920_1080_60(struct kunit *test)
+       expected_mode = drm_mode_find_dmt(priv->drm, 1920, 1080, 60, false);
+       KUNIT_ASSERT_NOT_NULL(test, expected_mode);
++      ret = drm_kunit_add_mode_destroy_action(test, expected_mode);
++      KUNIT_ASSERT_EQ(test, ret, 0);
++
+       KUNIT_ASSERT_TRUE(test,
+                         drm_mode_parse_command_line_for_connector(cmdline,
+                                                                   connector,
+-- 
+2.39.5
+
diff --git a/queue-6.12/drm-tests-probe-helper-fix-drm_display_mode-memory-l.patch b/queue-6.12/drm-tests-probe-helper-fix-drm_display_mode-memory-l.patch
new file mode 100644 (file)
index 0000000..cb7d39a
--- /dev/null
@@ -0,0 +1,63 @@
+From 246e68c896d3c096eab5604c89e7e3b585b05969 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 09:34:13 +0200
+Subject: drm/tests: probe-helper: Fix drm_display_mode memory leak
+
+From: Maxime Ripard <mripard@kernel.org>
+
+[ Upstream commit 8b6f2e28431b2f9f84073bff50353aeaf25559d0 ]
+
+drm_analog_tv_mode() and its variants return a drm_display_mode that
+needs to be destroyed later one. The
+drm_test_connector_helper_tv_get_modes_check() test never does however,
+which leads to a memory leak.
+
+Let's make sure it's freed.
+
+Reported-by: Philipp Stanner <phasta@mailbox.org>
+Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/
+Fixes: 1e4a91db109f ("drm/probe-helper: Provide a TV get_modes helper")
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-7-996305a2e75a@kernel.org
+Signed-off-by: Maxime Ripard <mripard@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/tests/drm_probe_helper_test.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/tests/drm_probe_helper_test.c b/drivers/gpu/drm/tests/drm_probe_helper_test.c
+index bc09ff38aca18..db0e4f5df275e 100644
+--- a/drivers/gpu/drm/tests/drm_probe_helper_test.c
++++ b/drivers/gpu/drm/tests/drm_probe_helper_test.c
+@@ -98,7 +98,7 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test)
+       struct drm_connector *connector = &priv->connector;
+       struct drm_cmdline_mode *cmdline = &connector->cmdline_mode;
+       struct drm_display_mode *mode;
+-      const struct drm_display_mode *expected;
++      struct drm_display_mode *expected;
+       size_t len;
+       int ret;
+@@ -134,6 +134,9 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test)
+               KUNIT_EXPECT_TRUE(test, drm_mode_equal(mode, expected));
+               KUNIT_EXPECT_TRUE(test, mode->type & DRM_MODE_TYPE_PREFERRED);
++
++              ret = drm_kunit_add_mode_destroy_action(test, expected);
++              KUNIT_ASSERT_EQ(test, ret, 0);
+       }
+       if (params->num_expected_modes >= 2) {
+@@ -145,6 +148,9 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test)
+               KUNIT_EXPECT_TRUE(test, drm_mode_equal(mode, expected));
+               KUNIT_EXPECT_FALSE(test, mode->type & DRM_MODE_TYPE_PREFERRED);
++
++              ret = drm_kunit_add_mode_destroy_action(test, expected);
++              KUNIT_ASSERT_EQ(test, ret, 0);
+       }
+       mutex_unlock(&priv->drm->mode_config.mutex);
+-- 
+2.39.5
+
diff --git a/queue-6.12/drm-xe-hw_engine-define-sysfs_ops-on-all-directories.patch b/queue-6.12/drm-xe-hw_engine-define-sysfs_ops-on-all-directories.patch
new file mode 100644 (file)
index 0000000..ca178b1
--- /dev/null
@@ -0,0 +1,179 @@
+From 72563a7af0e14879d1ac9cfc48b7f0469a496bf1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Mar 2025 17:56:47 +0530
+Subject: drm/xe/hw_engine: define sysfs_ops on all directories
+
+From: Tejas Upadhyay <tejas.upadhyay@intel.com>
+
+[ Upstream commit a5c71fd5b69b9da77e5e0b268e69e256932ba49c ]
+
+Sysfs_ops needs to be defined on all directories which
+can have attr files with set/get method. Add sysfs_ops
+to even those directories which is currently empty but
+would have attr files with set/get method in future.
+Leave .default with default sysfs_ops as it will never
+have setter method.
+
+V2(Himal/Rodrigo):
+ - use single sysfs_ops for all dir and attr with set/get
+ - add default ops as ./default does not need runtime pm at all
+
+Fixes: 3f0e14651ab0 ("drm/xe: Runtime PM wake on every sysfs call")
+Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20250327122647.886637-1-tejas.upadhyay@intel.com
+Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
+(cherry picked from commit 40780b9760b561e093508d07b8b9b06c94ab201e)
+Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 108 +++++++++---------
+ 1 file changed, 52 insertions(+), 56 deletions(-)
+
+diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+index b53e8d2accdbd..a440442b4d727 100644
+--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
++++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+@@ -32,14 +32,61 @@ bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max)
+       return timeout >= min && timeout <= max;
+ }
+-static void kobj_xe_hw_engine_release(struct kobject *kobj)
++static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
+ {
+       kfree(kobj);
+ }
++static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj,
++                                                struct attribute *attr,
++                                                char *buf)
++{
++      struct xe_device *xe = kobj_to_xe(kobj);
++      struct kobj_attribute *kattr;
++      ssize_t ret = -EIO;
++
++      kattr = container_of(attr, struct kobj_attribute, attr);
++      if (kattr->show) {
++              xe_pm_runtime_get(xe);
++              ret = kattr->show(kobj, kattr, buf);
++              xe_pm_runtime_put(xe);
++      }
++
++      return ret;
++}
++
++static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj,
++                                                 struct attribute *attr,
++                                                 const char *buf,
++                                                 size_t count)
++{
++      struct xe_device *xe = kobj_to_xe(kobj);
++      struct kobj_attribute *kattr;
++      ssize_t ret = -EIO;
++
++      kattr = container_of(attr, struct kobj_attribute, attr);
++      if (kattr->store) {
++              xe_pm_runtime_get(xe);
++              ret = kattr->store(kobj, kattr, buf, count);
++              xe_pm_runtime_put(xe);
++      }
++
++      return ret;
++}
++
++static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = {
++      .show = xe_hw_engine_class_sysfs_attr_show,
++      .store = xe_hw_engine_class_sysfs_attr_store,
++};
++
+ static const struct kobj_type kobj_xe_hw_engine_type = {
+-      .release = kobj_xe_hw_engine_release,
+-      .sysfs_ops = &kobj_sysfs_ops
++      .release = xe_hw_engine_sysfs_kobj_release,
++      .sysfs_ops = &xe_hw_engine_class_sysfs_ops,
++};
++
++static const struct kobj_type kobj_xe_hw_engine_type_def = {
++      .release = xe_hw_engine_sysfs_kobj_release,
++      .sysfs_ops = &kobj_sysfs_ops,
+ };
+ static ssize_t job_timeout_max_store(struct kobject *kobj,
+@@ -543,7 +590,7 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
+       if (!kobj)
+               return -ENOMEM;
+-      kobject_init(kobj, &kobj_xe_hw_engine_type);
++      kobject_init(kobj, &kobj_xe_hw_engine_type_def);
+       err = kobject_add(kobj, parent, "%s", ".defaults");
+       if (err)
+               goto err_object;
+@@ -559,57 +606,6 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
+       return err;
+ }
+-static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
+-{
+-      kfree(kobj);
+-}
+-
+-static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj,
+-                                                struct attribute *attr,
+-                                                char *buf)
+-{
+-      struct xe_device *xe = kobj_to_xe(kobj);
+-      struct kobj_attribute *kattr;
+-      ssize_t ret = -EIO;
+-
+-      kattr = container_of(attr, struct kobj_attribute, attr);
+-      if (kattr->show) {
+-              xe_pm_runtime_get(xe);
+-              ret = kattr->show(kobj, kattr, buf);
+-              xe_pm_runtime_put(xe);
+-      }
+-
+-      return ret;
+-}
+-
+-static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj,
+-                                                 struct attribute *attr,
+-                                                 const char *buf,
+-                                                 size_t count)
+-{
+-      struct xe_device *xe = kobj_to_xe(kobj);
+-      struct kobj_attribute *kattr;
+-      ssize_t ret = -EIO;
+-
+-      kattr = container_of(attr, struct kobj_attribute, attr);
+-      if (kattr->store) {
+-              xe_pm_runtime_get(xe);
+-              ret = kattr->store(kobj, kattr, buf, count);
+-              xe_pm_runtime_put(xe);
+-      }
+-
+-      return ret;
+-}
+-
+-static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = {
+-      .show = xe_hw_engine_class_sysfs_attr_show,
+-      .store = xe_hw_engine_class_sysfs_attr_store,
+-};
+-
+-static const struct kobj_type xe_hw_engine_sysfs_kobj_type = {
+-      .release = xe_hw_engine_sysfs_kobj_release,
+-      .sysfs_ops = &xe_hw_engine_class_sysfs_ops,
+-};
+ static void hw_engine_class_sysfs_fini(void *arg)
+ {
+@@ -640,7 +636,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
+       if (!kobj)
+               return -ENOMEM;
+-      kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type);
++      kobject_init(kobj, &kobj_xe_hw_engine_type);
+       err = kobject_add(kobj, gt->sysfs, "engines");
+       if (err)
+-- 
+2.39.5
+
diff --git a/queue-6.12/gpiolib-of-fix-the-choice-for-ingenic-nand-quirk.patch b/queue-6.12/gpiolib-of-fix-the-choice-for-ingenic-nand-quirk.patch
new file mode 100644 (file)
index 0000000..26f40d0
--- /dev/null
@@ -0,0 +1,38 @@
+From 20a9f0b448136b981645e1c66231b457079df6f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Apr 2025 15:20:00 +0300
+Subject: gpiolib: of: Fix the choice for Ingenic NAND quirk
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit 2b9c536430126c233552cdcd6ec9d5077454ece4 ]
+
+The Ingenic NAND quirk has been added under CONFIG_LCD_HX8357 ifdeffery
+which sounds quite wrong. Fix the choice for Ingenic NAND quirk
+by wrapping it into own ifdeffery related to the respective driver.
+
+Fixes: 3a7fd473bd5d ("mtd: rawnand: ingenic: move the GPIO quirk to gpiolib-of.c")
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20250402122058.1517393-2-andriy.shevchenko@linux.intel.com
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpiolib-of.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
+index 880f1efcaca53..e543129d36050 100644
+--- a/drivers/gpio/gpiolib-of.c
++++ b/drivers/gpio/gpiolib-of.c
+@@ -193,6 +193,8 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np,
+                */
+               { "himax,hx8357",       "gpios-reset",  false },
+               { "himax,hx8369",       "gpios-reset",  false },
++#endif
++#if IS_ENABLED(CONFIG_MTD_NAND_JZ4780)
+               /*
+                * The rb-gpios semantics was undocumented and qi,lb60 (along with
+                * the ingenic driver) got it wrong. The active state encodes the
+-- 
+2.39.5
+
diff --git a/queue-6.12/iommu-exynos-fix-suspend-resume-with-identity-domain.patch b/queue-6.12/iommu-exynos-fix-suspend-resume-with-identity-domain.patch
new file mode 100644 (file)
index 0000000..dcde45a
--- /dev/null
@@ -0,0 +1,52 @@
+From 358c19a01093095e43bafe1f32ea72c829d46b83 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Apr 2025 22:27:31 +0200
+Subject: iommu/exynos: Fix suspend/resume with IDENTITY domain
+
+From: Marek Szyprowski <m.szyprowski@samsung.com>
+
+[ Upstream commit 99deffc409b69000ac4877486e69ec6516becd53 ]
+
+Commit bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe
+path") changed the sequence of probing the SYSMMU controller devices and
+calls to arm_iommu_attach_device(), what results in resuming SYSMMU
+controller earlier, when it is still set to IDENTITY mapping. Such change
+revealed the bug in IDENTITY handling in the exynos-iommu driver. When
+SYSMMU controller is set to IDENTITY mapping, data->domain is NULL, so
+adjust checks in suspend & resume callbacks to handle this case
+correctly.
+
+Fixes: b3d14960e629 ("iommu/exynos: Implement an IDENTITY domain")
+Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Link: https://lore.kernel.org/r/20250401202731.2810474-1-m.szyprowski@samsung.com
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/exynos-iommu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
+index c666ecab955d2..7465dbb6fa80c 100644
+--- a/drivers/iommu/exynos-iommu.c
++++ b/drivers/iommu/exynos-iommu.c
+@@ -832,7 +832,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev)
+               struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
+               mutex_lock(&owner->rpm_lock);
+-              if (&data->domain->domain != &exynos_identity_domain) {
++              if (data->domain) {
+                       dev_dbg(data->sysmmu, "saving state\n");
+                       __sysmmu_disable(data);
+               }
+@@ -850,7 +850,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev)
+               struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
+               mutex_lock(&owner->rpm_lock);
+-              if (&data->domain->domain != &exynos_identity_domain) {
++              if (data->domain) {
+                       dev_dbg(data->sysmmu, "restoring state\n");
+                       __sysmmu_enable(data);
+               }
+-- 
+2.39.5
+
diff --git a/queue-6.12/iommu-mediatek-fix-null-pointer-deference-in-mtk_iom.patch b/queue-6.12/iommu-mediatek-fix-null-pointer-deference-in-mtk_iom.patch
new file mode 100644 (file)
index 0000000..1b5ce77
--- /dev/null
@@ -0,0 +1,87 @@
+From 0ff66a9484c481648869d65615bce93bd3f36c57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Apr 2025 12:22:12 +0200
+Subject: iommu/mediatek: Fix NULL pointer deference in mtk_iommu_device_group
+
+From: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
+
+[ Upstream commit 38e8844005e6068f336a3ad45451a562a0040ca1 ]
+
+Currently, mtk_iommu calls during probe iommu_device_register before
+the hw_list from driver data is initialized. Since iommu probing issue
+fix, it leads to NULL pointer dereference in mtk_iommu_device_group when
+hw_list is accessed with list_first_entry (not null safe).
+
+So, change the call order to ensure iommu_device_register is called
+after the driver data are initialized.
+
+Fixes: 9e3a2a643653 ("iommu/mediatek: Adapt sharing and non-sharing pgtable case")
+Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path")
+Reviewed-by: Yong Wu <yong.wu@mediatek.com>
+Tested-by: Chen-Yu Tsai <wenst@chromium.org> # MT8183 Juniper, MT8186 Tentacruel
+Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
+Link: https://lore.kernel.org/r/20250403-fix-mtk-iommu-error-v2-1-fe8b18f8b0a8@collabora.com
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/mtk_iommu.c | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
+index 6a2707fe7a78c..32deab732209e 100644
+--- a/drivers/iommu/mtk_iommu.c
++++ b/drivers/iommu/mtk_iommu.c
+@@ -1371,15 +1371,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
+       platform_set_drvdata(pdev, data);
+       mutex_init(&data->mutex);
+-      ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
+-                                   "mtk-iommu.%pa", &ioaddr);
+-      if (ret)
+-              goto out_link_remove;
+-
+-      ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
+-      if (ret)
+-              goto out_sysfs_remove;
+-
+       if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
+               list_add_tail(&data->list, data->plat_data->hw_list);
+               data->hw_list = data->plat_data->hw_list;
+@@ -1389,19 +1380,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
+               data->hw_list = &data->hw_list_head;
+       }
++      ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
++                                   "mtk-iommu.%pa", &ioaddr);
++      if (ret)
++              goto out_list_del;
++
++      ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
++      if (ret)
++              goto out_sysfs_remove;
++
+       if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
+               ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
+               if (ret)
+-                      goto out_list_del;
++                      goto out_device_unregister;
+       }
+       return ret;
+-out_list_del:
+-      list_del(&data->list);
++out_device_unregister:
+       iommu_device_unregister(&data->iommu);
+ out_sysfs_remove:
+       iommu_device_sysfs_remove(&data->iommu);
+-out_link_remove:
++out_list_del:
++      list_del(&data->list);
+       if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
+               device_link_remove(data->smicomm_dev, dev);
+ out_runtime_disable:
+-- 
+2.39.5
+
diff --git a/queue-6.12/ipv6-align-behavior-across-nexthops-during-path-sele.patch b/queue-6.12/ipv6-align-behavior-across-nexthops-during-path-sele.patch
new file mode 100644 (file)
index 0000000..8271579
--- /dev/null
@@ -0,0 +1,63 @@
+From 797d38f57fc28f362a483ce5e394b0d3e3df8f70 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 11:43:16 +0300
+Subject: ipv6: Align behavior across nexthops during path selection
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 6933cd4714861eea6848f18396a119d741f25fc3 ]
+
+A nexthop is only chosen when the calculated multipath hash falls in the
+nexthop's hash region (i.e., the hash is smaller than the nexthop's hash
+threshold) and when the nexthop is assigned a non-negative score by
+rt6_score_route().
+
+Commit 4d0ab3a6885e ("ipv6: Start path selection from the first
+nexthop") introduced an unintentional difference between the first
+nexthop and the rest when the score is negative.
+
+When the first nexthop matches, but has a negative score, the code will
+currently evaluate subsequent nexthops until one is found with a
+non-negative score. On the other hand, when a different nexthop matches,
+but has a negative score, the code will fallback to the nexthop with
+which the selection started ('match').
+
+Align the behavior across all nexthops and fallback to 'match' when the
+first nexthop matches, but has a negative score.
+
+Fixes: 3d709f69a3e7 ("ipv6: Use hash-threshold instead of modulo-N")
+Fixes: 4d0ab3a6885e ("ipv6: Start path selection from the first nexthop")
+Reported-by: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+Closes: https://lore.kernel.org/netdev/67efef607bc41_1ddca82948c@willemb.c.googlers.com.notmuch/
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/20250408084316.243559-1-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/route.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 987492dcb07ca..bae8ece3e881e 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -470,10 +470,10 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
+               goto out;
+       hash = fl6->mp_hash;
+-      if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) &&
+-          rt6_score_route(first->fib6_nh, first->fib6_flags, oif,
+-                          strict) >= 0) {
+-              match = first;
++      if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound)) {
++              if (rt6_score_route(first->fib6_nh, first->fib6_flags, oif,
++                                  strict) >= 0)
++                      match = first;
+               goto out;
+       }
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-ethtool-don-t-call-.cleanup_data-when-prepare_da.patch b/queue-6.12/net-ethtool-don-t-call-.cleanup_data-when-prepare_da.patch
new file mode 100644 (file)
index 0000000..649708e
--- /dev/null
@@ -0,0 +1,78 @@
+From e331ea7d583b0f77e7454003fb9f9010d0258ed2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Apr 2025 15:05:10 +0200
+Subject: net: ethtool: Don't call .cleanup_data when prepare_data fails
+
+From: Maxime Chevallier <maxime.chevallier@bootlin.com>
+
+[ Upstream commit 4f038a6a02d20859a3479293cbf172b0f14cbdd6 ]
+
+There's a consistent pattern where the .cleanup_data() callback is
+called when .prepare_data() fails, when it should really be called to
+clean after a successful .prepare_data() as per the documentation.
+
+Rewrite the error-handling paths to make sure we don't cleanup
+un-prepared data.
+
+Fixes: c781ff12a2f3 ("ethtool: Allow network drivers to dump arbitrary EEPROM data")
+Reviewed-by: Kory Maincent <kory.maincent@bootlin.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
+Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
+Link: https://patch.msgid.link/20250407130511.75621-1-maxime.chevallier@bootlin.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ethtool/netlink.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
+index e233dfc8ca4be..a52be67139d0a 100644
+--- a/net/ethtool/netlink.c
++++ b/net/ethtool/netlink.c
+@@ -490,7 +490,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
+       ret = ops->prepare_data(req_info, reply_data, info);
+       rtnl_unlock();
+       if (ret < 0)
+-              goto err_cleanup;
++              goto err_dev;
+       ret = ops->reply_size(req_info, reply_data);
+       if (ret < 0)
+               goto err_cleanup;
+@@ -548,7 +548,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
+       ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, info);
+       rtnl_unlock();
+       if (ret < 0)
+-              goto out;
++              goto out_cancel;
+       ret = ethnl_fill_reply_header(skb, dev, ctx->ops->hdr_attr);
+       if (ret < 0)
+               goto out;
+@@ -557,6 +557,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
+ out:
+       if (ctx->ops->cleanup_data)
+               ctx->ops->cleanup_data(ctx->reply_data);
++out_cancel:
+       ctx->reply_data->dev = NULL;
+       if (ret < 0)
+               genlmsg_cancel(skb, ehdr);
+@@ -760,7 +761,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
+       ethnl_init_reply_data(reply_data, ops, dev);
+       ret = ops->prepare_data(req_info, reply_data, &info);
+       if (ret < 0)
+-              goto err_cleanup;
++              goto err_rep;
+       ret = ops->reply_size(req_info, reply_data);
+       if (ret < 0)
+               goto err_cleanup;
+@@ -795,6 +796,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
+ err_cleanup:
+       if (ops->cleanup_data)
+               ops->cleanup_data(reply_data);
++err_rep:
+       kfree(reply_data);
+       kfree(req_info);
+       return;
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-libwx-handle-page_pool_dev_alloc_pages-error.patch b/queue-6.12/net-libwx-handle-page_pool_dev_alloc_pages-error.patch
new file mode 100644 (file)
index 0000000..a75567a
--- /dev/null
@@ -0,0 +1,44 @@
+From 8acf93575795d035b9db106a9083490f7daefb5a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Apr 2025 13:49:52 -0500
+Subject: net: libwx: handle page_pool_dev_alloc_pages error
+
+From: Chenyuan Yang <chenyuan0y@gmail.com>
+
+[ Upstream commit 7f1ff1b38a7c8b872382b796023419d87d78c47e ]
+
+page_pool_dev_alloc_pages could return NULL. There was a WARN_ON(!page)
+but it would still proceed to use the NULL pointer and then crash.
+
+This is similar to commit 001ba0902046
+("net: fec: handle page_pool_dev_alloc_pages error").
+
+This is found by our static analysis tool KNighter.
+
+Signed-off-by: Chenyuan Yang <chenyuan0y@gmail.com>
+Fixes: 3c47e8ae113a ("net: libwx: Support to receive packets in NAPI")
+Reviewed-by: Joe Damato <jdamato@fastly.com>
+Link: https://patch.msgid.link/20250407184952.2111299-1-chenyuan0y@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/wangxun/libwx/wx_lib.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+index 2b3d6586f44a5..71c891d14fb62 100644
+--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
++++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+@@ -309,7 +309,8 @@ static bool wx_alloc_mapped_page(struct wx_ring *rx_ring,
+               return true;
+       page = page_pool_dev_alloc_pages(rx_ring->page_pool);
+-      WARN_ON(!page);
++      if (unlikely(!page))
++              return false;
+       dma = page_pool_get_dma_addr(page);
+       bi->page_dma = dma;
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-phy-allow-mdio-bus-pm-ops-to-start-stop-state-ma.patch b/queue-6.12/net-phy-allow-mdio-bus-pm-ops-to-start-stop-state-ma.patch
new file mode 100644 (file)
index 0000000..b4f690d
--- /dev/null
@@ -0,0 +1,163 @@
+From a7e1926fe6383cde19231b413580d47b7139a2f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Apr 2025 12:40:42 +0300
+Subject: net: phy: allow MDIO bus PM ops to start/stop state machine for
+ phylink-controlled PHY
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit fc75ea20ffb452652f0d4033f38fe88d7cfdae35 ]
+
+DSA has 2 kinds of drivers:
+
+1. Those who call dsa_switch_suspend() and dsa_switch_resume() from
+   their device PM ops: qca8k-8xxx, bcm_sf2, microchip ksz
+2. Those who don't: all others. The above methods should be optional.
+
+For type 1, dsa_switch_suspend() calls dsa_user_suspend() -> phylink_stop(),
+and dsa_switch_resume() calls dsa_user_resume() -> phylink_start().
+These seem good candidates for setting mac_managed_pm = true because
+that is essentially its definition [1], but that does not seem to be the
+biggest problem for now, and is not what this change focuses on.
+
+Talking strictly about the 2nd category of DSA drivers here (which
+do not have MAC managed PM, meaning that for their attached PHYs,
+mdio_bus_phy_suspend() and mdio_bus_phy_resume() should run in full),
+I have noticed that the following warning from mdio_bus_phy_resume() is
+triggered:
+
+       WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY &&
+               phydev->state != PHY_UP);
+
+because the PHY state machine is running.
+
+It's running as a result of a previous dsa_user_open() -> ... ->
+phylink_start() -> phy_start() having been initiated by the user.
+
+The previous mdio_bus_phy_suspend() was supposed to have called
+phy_stop_machine(), but it didn't. So this is why the PHY is in state
+PHY_NOLINK by the time mdio_bus_phy_resume() runs.
+
+mdio_bus_phy_suspend() did not call phy_stop_machine() because for
+phylink, the phydev->adjust_link function pointer is NULL. This seems a
+technicality introduced by commit fddd91016d16 ("phylib: fix PAL state
+machine restart on resume"). That commit was written before phylink
+existed, and was intended to avoid crashing with consumer drivers which
+don't use the PHY state machine - phylink always does, when using a PHY.
+But phylink itself has historically not been developed with
+suspend/resume in mind, and apparently not tested too much in that
+scenario, allowing this bug to exist unnoticed for so long. Plus, prior
+to the WARN_ON(), it would have likely been invisible.
+
+This issue is not in fact restricted to type 2 DSA drivers (according to
+the above ad-hoc classification), but can be extrapolated to any MAC
+driver with phylink and MDIO-bus-managed PHY PM ops. DSA is just where
+the issue was reported. Assuming mac_managed_pm is set correctly, a
+quick search indicates the following other drivers might be affected:
+
+$ grep -Zlr PHYLINK_NETDEV drivers/ | xargs -0 grep -L mac_managed_pm
+drivers/net/ethernet/atheros/ag71xx.c
+drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+drivers/net/ethernet/freescale/ucc_geth.c
+drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
+drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+drivers/net/ethernet/marvell/mvneta.c
+drivers/net/ethernet/marvell/prestera/prestera_main.c
+drivers/net/ethernet/mediatek/mtk_eth_soc.c
+drivers/net/ethernet/altera/altera_tse_main.c
+drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+drivers/net/ethernet/meta/fbnic/fbnic_phylink.c
+drivers/net/ethernet/tehuti/tn40_phy.c
+drivers/net/ethernet/mscc/ocelot_net.c
+
+Make the existing conditions dependent on the PHY device having a
+phydev->phy_link_change() implementation equal to the default
+phy_link_change() provided by phylib. Otherwise, we implicitly know that
+the phydev has the phylink-provided phylink_phy_change() callback, and
+when phylink is used, the PHY state machine always needs to be stopped/
+started on the suspend/resume path. The code is structured as such that
+if phydev->phy_link_change() is absent, it is a matter of time until the
+kernel will crash - no need to further complicate the test.
+
+Thus, for the situation where the PM is not managed by the MAC, we will
+make the MDIO bus PM ops treat identically the phylink-controlled PHYs
+with the phylib-controlled PHYs where an adjust_link() callback is
+supplied. In both cases, the MDIO bus PM ops should stop and restart the
+PHY state machine.
+
+[1] https://lore.kernel.org/netdev/Z-1tiW9zjcoFkhwc@shell.armlinux.org.uk/
+
+Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state")
+Reported-by: Wei Fang <wei.fang@nxp.com>
+Tested-by: Wei Fang <wei.fang@nxp.com>
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://patch.msgid.link/20250407094042.2155633-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phy_device.c | 31 +++++++++++++++++++++++++++++--
+ 1 file changed, 29 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 44aa67fd544dc..8af44224480f1 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -302,6 +302,33 @@ static void phy_link_change(struct phy_device *phydev, bool up)
+               phydev->mii_ts->link_state(phydev->mii_ts, phydev);
+ }
++/**
++ * phy_uses_state_machine - test whether consumer driver uses PAL state machine
++ * @phydev: the target PHY device structure
++ *
++ * Ultimately, this aims to indirectly determine whether the PHY is attached
++ * to a consumer which uses the state machine by calling phy_start() and
++ * phy_stop().
++ *
++ * When the PHY driver consumer uses phylib, it must have previously called
++ * phy_connect_direct() or one of its derivatives, so that phy_prepare_link()
++ * has set up a hook for monitoring state changes.
++ *
++ * When the PHY driver is used by the MAC driver consumer through phylink (the
++ * only other provider of a phy_link_change() method), using the PHY state
++ * machine is not optional.
++ *
++ * Return: true if consumer calls phy_start() and phy_stop(), false otherwise.
++ */
++static bool phy_uses_state_machine(struct phy_device *phydev)
++{
++      if (phydev->phy_link_change == phy_link_change)
++              return phydev->attached_dev && phydev->adjust_link;
++
++      /* phydev->phy_link_change is implicitly phylink_phy_change() */
++      return true;
++}
++
+ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
+ {
+       struct device_driver *drv = phydev->mdio.dev.driver;
+@@ -368,7 +395,7 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev)
+        * may call phy routines that try to grab the same lock, and that may
+        * lead to a deadlock.
+        */
+-      if (phydev->attached_dev && phydev->adjust_link)
++      if (phy_uses_state_machine(phydev))
+               phy_stop_machine(phydev);
+       if (!mdio_bus_phy_may_suspend(phydev))
+@@ -422,7 +449,7 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
+               }
+       }
+-      if (phydev->attached_dev && phydev->adjust_link)
++      if (phy_uses_state_machine(phydev))
+               phy_start_machine(phydev);
+       return 0;
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-phy-move-phy_link_change-prior-to-mdio_bus_phy_m.patch b/queue-6.12/net-phy-move-phy_link_change-prior-to-mdio_bus_phy_m.patch
new file mode 100644 (file)
index 0000000..8016886
--- /dev/null
@@ -0,0 +1,76 @@
+From f270095ff9e571d3e8fa4125c90a5dc9486efe50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Apr 2025 12:38:59 +0300
+Subject: net: phy: move phy_link_change() prior to mdio_bus_phy_may_suspend()
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit f40a673d6b4a128fe95dd9b8c3ed02da50a6a862 ]
+
+In an upcoming change, mdio_bus_phy_may_suspend() will need to
+distinguish a phylib-based PHY client from a phylink PHY client.
+For that, it will need to compare the phydev->phy_link_change() function
+pointer with the eponymous phy_link_change() provided by phylib.
+
+To avoid forward function declarations, the default PHY link state
+change method should be moved upwards. There is no functional change
+associated with this patch, it is only to reduce the noise from a real
+bug fix.
+
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Link: https://patch.msgid.link/20250407093900.2155112-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: fc75ea20ffb4 ("net: phy: allow MDIO bus PM ops to start/stop state machine for phylink-controlled PHY")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phy_device.c | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 119dfa2d6643a..44aa67fd544dc 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -289,6 +289,19 @@ static bool phy_drv_wol_enabled(struct phy_device *phydev)
+       return wol.wolopts != 0;
+ }
++static void phy_link_change(struct phy_device *phydev, bool up)
++{
++      struct net_device *netdev = phydev->attached_dev;
++
++      if (up)
++              netif_carrier_on(netdev);
++      else
++              netif_carrier_off(netdev);
++      phydev->adjust_link(netdev);
++      if (phydev->mii_ts && phydev->mii_ts->link_state)
++              phydev->mii_ts->link_state(phydev->mii_ts, phydev);
++}
++
+ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
+ {
+       struct device_driver *drv = phydev->mdio.dev.driver;
+@@ -1101,19 +1114,6 @@ struct phy_device *phy_find_first(struct mii_bus *bus)
+ }
+ EXPORT_SYMBOL(phy_find_first);
+-static void phy_link_change(struct phy_device *phydev, bool up)
+-{
+-      struct net_device *netdev = phydev->attached_dev;
+-
+-      if (up)
+-              netif_carrier_on(netdev);
+-      else
+-              netif_carrier_off(netdev);
+-      phydev->adjust_link(netdev);
+-      if (phydev->mii_ts && phydev->mii_ts->link_state)
+-              phydev->mii_ts->link_state(phydev->mii_ts, phydev);
+-}
+-
+ /**
+  * phy_prepare_link - prepares the PHY layer to monitor link status
+  * @phydev: target phy_device struct
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-ppp-add-bound-checking-for-skb-data-on-ppp_sync_.patch b/queue-6.12/net-ppp-add-bound-checking-for-skb-data-on-ppp_sync_.patch
new file mode 100644 (file)
index 0000000..d3393d5
--- /dev/null
@@ -0,0 +1,73 @@
+From a70e76abebc12b71419d34a228784e4bb8151991 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 17:55:08 +0200
+Subject: net: ppp: Add bound checking for skb data on ppp_sync_txmung
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Arnaud Lecomte <contact@arnaud-lcm.com>
+
+[ Upstream commit aabc6596ffb377c4c9c8f335124b92ea282c9821 ]
+
+Ensure we have enough data in linear buffer from skb before accessing
+initial bytes. This prevents potential out-of-bounds accesses
+when processing short packets.
+
+When ppp_sync_txmung receives an incoming package with an empty
+payload:
+(remote) gef➤  p *(struct pppoe_hdr *) (skb->head + skb->network_header)
+$18 = {
+       type = 0x1,
+       ver = 0x1,
+       code = 0x0,
+       sid = 0x2,
+        length = 0x0,
+       tag = 0xffff8880371cdb96
+}
+
+from the skb struct (trimmed)
+      tail = 0x16,
+      end = 0x140,
+      head = 0xffff88803346f400 "4",
+      data = 0xffff88803346f416 ":\377",
+      truesize = 0x380,
+      len = 0x0,
+      data_len = 0x0,
+      mac_len = 0xe,
+      hdr_len = 0x0,
+
+it is not safe to access data[2].
+
+Reported-by: syzbot+29fc8991b0ecb186cf40@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=29fc8991b0ecb186cf40
+Tested-by: syzbot+29fc8991b0ecb186cf40@syzkaller.appspotmail.com
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Arnaud Lecomte <contact@arnaud-lcm.com>
+Link: https://patch.msgid.link/20250408-bound-checking-ppp_txmung-v2-1-94bb6e1b92d0@arnaud-lcm.com
+[pabeni@redhat.com: fixed subj typo]
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ppp/ppp_synctty.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
+index 644e99fc3623f..9c4932198931f 100644
+--- a/drivers/net/ppp/ppp_synctty.c
++++ b/drivers/net/ppp/ppp_synctty.c
+@@ -506,6 +506,11 @@ ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb)
+       unsigned char *data;
+       int islcp;
++      /* Ensure we can safely access protocol field and LCP code */
++      if (!pskb_may_pull(skb, 3)) {
++              kfree_skb(skb);
++              return NULL;
++      }
+       data  = skb->data;
+       proto = get_unaligned_be16(data);
+-- 
+2.39.5
+
diff --git a/queue-6.12/net-tls-explicitly-disallow-disconnect.patch b/queue-6.12/net-tls-explicitly-disallow-disconnect.patch
new file mode 100644 (file)
index 0000000..d8d5971
--- /dev/null
@@ -0,0 +1,69 @@
+From ad6c6cd42fdb10155cfa852a97eb6653c6b5891f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Apr 2025 11:03:33 -0700
+Subject: net: tls: explicitly disallow disconnect
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 5071a1e606b30c0c11278d3c6620cd6a24724cf6 ]
+
+syzbot discovered that it can disconnect a TLS socket and then
+run into all sort of unexpected corner cases. I have a vague
+recollection of Eric pointing this out to us a long time ago.
+Supporting disconnect is really hard, for one thing if offload
+is enabled we'd need to wait for all packets to be _acked_.
+Disconnect is not commonly used, disallow it.
+
+The immediate problem syzbot run into is the warning in the strp,
+but that's just the easiest bug to trigger:
+
+  WARNING: CPU: 0 PID: 5834 at net/tls/tls_strp.c:486 tls_strp_msg_load+0x72e/0xa80 net/tls/tls_strp.c:486
+  RIP: 0010:tls_strp_msg_load+0x72e/0xa80 net/tls/tls_strp.c:486
+  Call Trace:
+   <TASK>
+   tls_rx_rec_wait+0x280/0xa60 net/tls/tls_sw.c:1363
+   tls_sw_recvmsg+0x85c/0x1c30 net/tls/tls_sw.c:2043
+   inet6_recvmsg+0x2c9/0x730 net/ipv6/af_inet6.c:678
+   sock_recvmsg_nosec net/socket.c:1023 [inline]
+   sock_recvmsg+0x109/0x280 net/socket.c:1045
+   __sys_recvfrom+0x202/0x380 net/socket.c:2237
+
+Fixes: 3c4d7559159b ("tls: kernel TLS support")
+Reported-by: syzbot+b4cd76826045a1eb93c1@syzkaller.appspotmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
+Link: https://patch.msgid.link/20250404180334.3224206-1-kuba@kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_main.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
+index 6b4b9f2749a6f..0acf313deb01f 100644
+--- a/net/tls/tls_main.c
++++ b/net/tls/tls_main.c
+@@ -809,6 +809,11 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
+       return do_tls_setsockopt(sk, optname, optval, optlen);
+ }
++static int tls_disconnect(struct sock *sk, int flags)
++{
++      return -EOPNOTSUPP;
++}
++
+ struct tls_context *tls_ctx_create(struct sock *sk)
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
+@@ -904,6 +909,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
+       prot[TLS_BASE][TLS_BASE] = *base;
+       prot[TLS_BASE][TLS_BASE].setsockopt     = tls_setsockopt;
+       prot[TLS_BASE][TLS_BASE].getsockopt     = tls_getsockopt;
++      prot[TLS_BASE][TLS_BASE].disconnect     = tls_disconnect;
+       prot[TLS_BASE][TLS_BASE].close          = tls_sk_proto_close;
+       prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
+-- 
+2.39.5
+
diff --git a/queue-6.12/net_sched-sch_sfq-move-the-limit-validation.patch b/queue-6.12/net_sched-sch_sfq-move-the-limit-validation.patch
new file mode 100644 (file)
index 0000000..cca2193
--- /dev/null
@@ -0,0 +1,85 @@
+From 53379af57776b65e79b2bc87e3d3993ed71da68c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Apr 2025 13:24:08 -0700
+Subject: net_sched: sch_sfq: move the limit validation
+
+From: Octavian Purdila <tavip@google.com>
+
+[ Upstream commit b3bf8f63e6179076b57c9de660c9f80b5abefe70 ]
+
+It is not sufficient to directly validate the limit on the data that
+the user passes as it can be updated based on how the other parameters
+are changed.
+
+Move the check at the end of the configuration update process to also
+catch scenarios where the limit is indirectly updated, for example
+with the following configurations:
+
+tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 depth 1
+tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 divisor 1
+
+This fixes the following syzkaller reported crash:
+
+------------[ cut here ]------------
+UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:203:6
+index 65535 is out of range for type 'struct sfq_head[128]'
+CPU: 1 UID: 0 PID: 3037 Comm: syz.2.16 Not tainted 6.14.0-rc2-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:94 [inline]
+ dump_stack_lvl+0x201/0x300 lib/dump_stack.c:120
+ ubsan_epilogue lib/ubsan.c:231 [inline]
+ __ubsan_handle_out_of_bounds+0xf5/0x120 lib/ubsan.c:429
+ sfq_link net/sched/sch_sfq.c:203 [inline]
+ sfq_dec+0x53c/0x610 net/sched/sch_sfq.c:231
+ sfq_dequeue+0x34e/0x8c0 net/sched/sch_sfq.c:493
+ sfq_reset+0x17/0x60 net/sched/sch_sfq.c:518
+ qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035
+ tbf_reset+0x41/0x110 net/sched/sch_tbf.c:339
+ qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035
+ dev_reset_queue+0x100/0x1b0 net/sched/sch_generic.c:1311
+ netdev_for_each_tx_queue include/linux/netdevice.h:2590 [inline]
+ dev_deactivate_many+0x7e5/0xe70 net/sched/sch_generic.c:1375
+
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Fixes: 10685681bafc ("net_sched: sch_sfq: don't allow 1 packet limit")
+Signed-off-by: Octavian Purdila <tavip@google.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_sfq.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
+index 7714ae94e0521..58b42dcf8f201 100644
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -661,10 +661,6 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
+               if (!p)
+                       return -ENOMEM;
+       }
+-      if (ctl->limit == 1) {
+-              NL_SET_ERR_MSG_MOD(extack, "invalid limit");
+-              return -EINVAL;
+-      }
+       sch_tree_lock(sch);
+@@ -705,6 +701,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
+               limit = min_t(u32, ctl->limit, maxdepth * maxflows);
+               maxflows = min_t(u32, maxflows, limit);
+       }
++      if (limit == 1) {
++              sch_tree_unlock(sch);
++              kfree(p);
++              NL_SET_ERR_MSG_MOD(extack, "invalid limit");
++              return -EINVAL;
++      }
+       /* commit configuration */
+       q->limit = limit;
+-- 
+2.39.5
+
diff --git a/queue-6.12/net_sched-sch_sfq-use-a-temporary-work-area-for-vali.patch b/queue-6.12/net_sched-sch_sfq-use-a-temporary-work-area-for-vali.patch
new file mode 100644 (file)
index 0000000..bbe048a
--- /dev/null
@@ -0,0 +1,124 @@
+From 9c3fd40dd511f0c20dd105609fbc09f935622f4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Apr 2025 13:24:07 -0700
+Subject: net_sched: sch_sfq: use a temporary work area for validating
+ configuration
+
+From: Octavian Purdila <tavip@google.com>
+
+[ Upstream commit 8c0cea59d40cf6dd13c2950437631dd614fbade6 ]
+
+Many configuration parameters have influence on others (e.g. divisor
+-> flows -> limit, depth -> limit) and so it is difficult to correctly
+do all of the validation before applying the configuration. And if a
+validation error is detected late it is difficult to roll back a
+partially applied configuration.
+
+To avoid these issues use a temporary work area to update and validate
+the configuration and only then apply the configuration to the
+internal state.
+
+Signed-off-by: Octavian Purdila <tavip@google.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: b3bf8f63e617 ("net_sched: sch_sfq: move the limit validation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_sfq.c | 56 +++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 44 insertions(+), 12 deletions(-)
+
+diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
+index 65d5b59da5830..7714ae94e0521 100644
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -631,6 +631,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
+       struct red_parms *p = NULL;
+       struct sk_buff *to_free = NULL;
+       struct sk_buff *tail = NULL;
++      unsigned int maxflows;
++      unsigned int quantum;
++      unsigned int divisor;
++      int perturb_period;
++      u8 headdrop;
++      u8 maxdepth;
++      int limit;
++      u8 flags;
++
+       if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
+               return -EINVAL;
+@@ -656,36 +665,59 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
+               NL_SET_ERR_MSG_MOD(extack, "invalid limit");
+               return -EINVAL;
+       }
++
+       sch_tree_lock(sch);
++
++      limit = q->limit;
++      divisor = q->divisor;
++      headdrop = q->headdrop;
++      maxdepth = q->maxdepth;
++      maxflows = q->maxflows;
++      perturb_period = q->perturb_period;
++      quantum = q->quantum;
++      flags = q->flags;
++
++      /* update and validate configuration */
+       if (ctl->quantum)
+-              q->quantum = ctl->quantum;
+-      WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
++              quantum = ctl->quantum;
++      perturb_period = ctl->perturb_period * HZ;
+       if (ctl->flows)
+-              q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
++              maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+       if (ctl->divisor) {
+-              q->divisor = ctl->divisor;
+-              q->maxflows = min_t(u32, q->maxflows, q->divisor);
++              divisor = ctl->divisor;
++              maxflows = min_t(u32, maxflows, divisor);
+       }
+       if (ctl_v1) {
+               if (ctl_v1->depth)
+-                      q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
++                      maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+               if (p) {
+-                      swap(q->red_parms, p);
+-                      red_set_parms(q->red_parms,
++                      red_set_parms(p,
+                                     ctl_v1->qth_min, ctl_v1->qth_max,
+                                     ctl_v1->Wlog,
+                                     ctl_v1->Plog, ctl_v1->Scell_log,
+                                     NULL,
+                                     ctl_v1->max_P);
+               }
+-              q->flags = ctl_v1->flags;
+-              q->headdrop = ctl_v1->headdrop;
++              flags = ctl_v1->flags;
++              headdrop = ctl_v1->headdrop;
+       }
+       if (ctl->limit) {
+-              q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
+-              q->maxflows = min_t(u32, q->maxflows, q->limit);
++              limit = min_t(u32, ctl->limit, maxdepth * maxflows);
++              maxflows = min_t(u32, maxflows, limit);
+       }
++      /* commit configuration */
++      q->limit = limit;
++      q->divisor = divisor;
++      q->headdrop = headdrop;
++      q->maxdepth = maxdepth;
++      q->maxflows = maxflows;
++      WRITE_ONCE(q->perturb_period, perturb_period);
++      q->quantum = quantum;
++      q->flags = flags;
++      if (p)
++              swap(q->red_parms, p);
++
+       qlen = sch->q.qlen;
+       while (sch->q.qlen > q->limit) {
+               dropped += sfq_drop(sch, &to_free);
+-- 
+2.39.5
+
diff --git a/queue-6.12/nft_set_pipapo-fix-incorrect-avx2-match-of-5th-field.patch b/queue-6.12/nft_set_pipapo-fix-incorrect-avx2-match-of-5th-field.patch
new file mode 100644 (file)
index 0000000..11cb6e9
--- /dev/null
@@ -0,0 +1,53 @@
+From 5bac21ef679581de6ad46ebd929720c6dec12e8f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Apr 2025 19:40:18 +0200
+Subject: nft_set_pipapo: fix incorrect avx2 match of 5th field octet
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit e042ed950d4e176379ba4c0722146cd96fb38aa2 ]
+
+Given a set element like:
+
+       icmpv6 . dead:beef:00ff::1
+
+The value of 'ff' is irrelevant, any address will be matched
+as long as the other octets are the same.
+
+This is because of too-early register clobbering:
+ymm7 is reloaded with new packet data (pkt[9])  but it still holds data
+of an earlier load that wasn't processed yet.
+
+The existing tests in nft_concat_range.sh selftests do exercise this code
+path, but do not trigger incorrect matching due to the network prefix
+limitation.
+
+Fixes: 7400b063969b ("nft_set_pipapo: Introduce AVX2-based lookup implementation")
+Reported-by: sontu mazumdar <sontu21@gmail.com>
+Closes: https://lore.kernel.org/netfilter/CANgxkqwnMH7fXra+VUfODT-8+qFLgskq3set1cAzqqJaV4iEZg@mail.gmail.com/T/#t
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_set_pipapo_avx2.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
+index b8d3c3213efee..c15db28c5ebc4 100644
+--- a/net/netfilter/nft_set_pipapo_avx2.c
++++ b/net/netfilter/nft_set_pipapo_avx2.c
+@@ -994,8 +994,9 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
+               NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt,  8,  pkt[8], bsize);
+               NFT_PIPAPO_AVX2_AND(6, 2, 3);
++              NFT_PIPAPO_AVX2_AND(3, 4, 7);
+               NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt,  9,  pkt[9], bsize);
+-              NFT_PIPAPO_AVX2_AND(0, 4, 5);
++              NFT_PIPAPO_AVX2_AND(0, 3, 5);
+               NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 10, pkt[10], bsize);
+               NFT_PIPAPO_AVX2_AND(2, 6, 7);
+               NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 11, pkt[11], bsize);
+-- 
+2.39.5
+
diff --git a/queue-6.12/nvmet-fcloop-swap-list_add_tail-arguments.patch b/queue-6.12/nvmet-fcloop-swap-list_add_tail-arguments.patch
new file mode 100644 (file)
index 0000000..10cd730
--- /dev/null
@@ -0,0 +1,38 @@
+From 5c96383b2bca8bb9b22ff8a36ca14a441e27b4de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 17:29:03 +0200
+Subject: nvmet-fcloop: swap list_add_tail arguments
+
+From: Daniel Wagner <wagi@kernel.org>
+
+[ Upstream commit 2b5f0c5bc819af2b0759a8fcddc1b39102735c0f ]
+
+The newly element to be added to the list is the first argument of
+list_add_tail. This fix is missing dcfad4ab4d67 ("nvmet-fcloop: swap
+the list_add_tail arguments").
+
+Fixes: 437c0b824dbd ("nvme-fcloop: add target to host LS request support")
+Signed-off-by: Daniel Wagner <wagi@kernel.org>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/target/fcloop.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
+index e1abb27927ff7..da195d61a9664 100644
+--- a/drivers/nvme/target/fcloop.c
++++ b/drivers/nvme/target/fcloop.c
+@@ -478,7 +478,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
+       if (targetport) {
+               tport = targetport->private;
+               spin_lock(&tport->lock);
+-              list_add_tail(&tport->ls_list, &tls_req->ls_list);
++              list_add_tail(&tls_req->ls_list, &tport->ls_list);
+               spin_unlock(&tport->lock);
+               queue_work(nvmet_wq, &tport->ls_work);
+       }
+-- 
+2.39.5
+
diff --git a/queue-6.12/objtool-fix-insn_context_switch-handling-in-validate.patch b/queue-6.12/objtool-fix-insn_context_switch-handling-in-validate.patch
new file mode 100644 (file)
index 0000000..27f20de
--- /dev/null
@@ -0,0 +1,53 @@
+From 5da3d7e8191dfcc760bf8991c745dda3f89b32eb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Apr 2025 00:02:13 -0700
+Subject: objtool: Fix INSN_CONTEXT_SWITCH handling in validate_unret()
+
+From: Josh Poimboeuf <jpoimboe@kernel.org>
+
+[ Upstream commit a8df7d0ef92eca28c610206c6748daf537ac0586 ]
+
+The !CONFIG_IA32_EMULATION version of xen_entry_SYSCALL_compat() ends
+with a SYSCALL instruction which is classified by objtool as
+INSN_CONTEXT_SWITCH.
+
+Unlike validate_branch(), validate_unret() doesn't consider
+INSN_CONTEXT_SWITCH in a non-function to be a dead end, so it keeps
+going past the end of xen_entry_SYSCALL_compat(), resulting in the
+following warning:
+
+  vmlinux.o: warning: objtool: xen_reschedule_interrupt+0x2a: RET before UNTRAIN
+
+Fix that by adding INSN_CONTEXT_SWITCH handling to validate_unret() to
+match what validate_branch() is already doing.
+
+Fixes: a09a6e2399ba ("objtool: Add entry UNRET validation")
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://lore.kernel.org/r/f5eda46fd09f15b1f5cde3d9ae3b92b958342add.1744095216.git.jpoimboe@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/objtool/check.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/tools/objtool/check.c b/tools/objtool/check.c
+index 286a2c0af02aa..127862fa05c61 100644
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -3990,6 +3990,11 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
+                       WARN_INSN(insn, "RET before UNTRAIN");
+                       return 1;
++              case INSN_CONTEXT_SWITCH:
++                      if (insn_func(insn))
++                              break;
++                      return 0;
++
+               case INSN_NOP:
+                       if (insn->retpoline_safe)
+                               return 0;
+-- 
+2.39.5
+
diff --git a/queue-6.12/octeontx2-pf-qos-fix-vf-root-node-parent-queue-index.patch b/queue-6.12/octeontx2-pf-qos-fix-vf-root-node-parent-queue-index.patch
new file mode 100644 (file)
index 0000000..945244b
--- /dev/null
@@ -0,0 +1,57 @@
+From 8de7e6196270f78cc45e5e8032b0314ab14388c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Apr 2025 12:33:41 +0530
+Subject: octeontx2-pf: qos: fix VF root node parent queue index
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit b7db94734e785e380b0db0f9295e07024f4d42a0 ]
+
+The current code configures the Physical Function (PF) root node at TL1
+and the Virtual Function (VF) root node at TL2.
+
+This ensure at any given point of time PF traffic gets more priority.
+
+                    PF root node
+                      TL1
+                     /  \
+                    TL2  TL2 VF root node
+                    /     \
+                   TL3    TL3
+                   /       \
+                  TL4      TL4
+                  /         \
+                 SMQ        SMQ
+
+Due to a bug in the current code, the TL2 parent queue index on the
+VF interface is not being configured, leading to 'SMQ Flush' errors
+
+Fixes: 5e6808b4c68d ("octeontx2-pf: Add support for HTB offload")
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250407070341.2765426-1-hkelam@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/qos.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+index 0f844c14485a0..35acc07bd9648 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+@@ -165,6 +165,11 @@ static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf,
+               otx2_config_sched_shaping(pfvf, node, cfg, &num_regs);
+       } else if (level == NIX_TXSCH_LVL_TL2) {
++              /* configure parent txschq */
++              cfg->reg[num_regs] = NIX_AF_TL2X_PARENT(node->schq);
++              cfg->regval[num_regs] = (u64)hw->tx_link << 16;
++              num_regs++;
++
+               /* configure link cfg */
+               if (level == pfvf->qos.link_cfg_lvl) {
+                       cfg->reg[num_regs] = NIX_AF_TL3_TL2X_LINKX_CFG(node->schq, hw->tx_link);
+-- 
+2.39.5
+
diff --git a/queue-6.12/perf-core-add-aux_pause-aux_resume-aux_start_paused.patch b/queue-6.12/perf-core-add-aux_pause-aux_resume-aux_start_paused.patch
new file mode 100644 (file)
index 0000000..a3dacac
--- /dev/null
@@ -0,0 +1,336 @@
+From e8cab07dfefee17286ace53dc41dbac24c4ed0ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 18:59:08 +0300
+Subject: perf/core: Add aux_pause, aux_resume, aux_start_paused
+
+From: Adrian Hunter <adrian.hunter@intel.com>
+
+[ Upstream commit 18d92bb57c39504d9da11c6ef604f58eb1d5a117 ]
+
+Hardware traces, such as instruction traces, can produce a vast amount of
+trace data, so being able to reduce tracing to more specific circumstances
+can be useful.
+
+The ability to pause or resume tracing when another event happens, can do
+that.
+
+Add ability for an event to "pause" or "resume" AUX area tracing.
+
+Add aux_pause bit to perf_event_attr to indicate that, if the event
+happens, the associated AUX area tracing should be paused. Ditto
+aux_resume. Do not allow aux_pause and aux_resume to be set together.
+
+Add aux_start_paused bit to perf_event_attr to indicate to an AUX area
+event that it should start in a "paused" state.
+
+Add aux_paused to struct hw_perf_event for AUX area events to keep track of
+the "paused" state. aux_paused is initialized to aux_start_paused.
+
+Add PERF_EF_PAUSE and PERF_EF_RESUME modes for ->stop() and ->start()
+callbacks. Call as needed, during __perf_event_output(). Add
+aux_in_pause_resume to struct perf_buffer to prevent races with the NMI
+handler. Pause/resume in NMI context will miss out if it coincides with
+another pause/resume.
+
+To use aux_pause or aux_resume, an event must be in a group with the AUX
+area event as the group leader.
+
+Example (requires Intel PT and tools patches also):
+
+ $ perf record --kcore -e intel_pt/aux-action=start-paused/k,syscalls:sys_enter_newuname/aux-action=resume/,syscalls:sys_exit_newuname/aux-action=pause/ uname
+ Linux
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 0.043 MB perf.data ]
+ $ perf script --call-trace
+ uname   30805 [000] 24001.058782799: name: 0x7ffc9c1865b0
+ uname   30805 [000] 24001.058784424:  psb offs: 0
+ uname   30805 [000] 24001.058784424:  cbr: 39 freq: 3904 MHz (139%)
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])        debug_smp_processor_id
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])        __x64_sys_newuname
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])            down_read
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                __cond_resched
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_add
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                    in_lock_functions
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_sub
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])            up_read
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_add
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                    in_lock_functions
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                preempt_count_sub
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])            _copy_to_user
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])        syscall_exit_to_user_mode
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])            syscall_exit_work
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                perf_syscall_exit
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                    debug_smp_processor_id
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                    perf_trace_buf_alloc
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_swevent_get_recursion_context
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            debug_smp_processor_id
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        debug_smp_processor_id
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                    perf_tp_event
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_trace_buf_update
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            tracing_gen_ctx_irq_test
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_swevent_event
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            __perf_event_account_interrupt
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                __this_cpu_preempt_check
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            perf_event_output_forward
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                perf_event_aux_pause
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                    ring_buffer_get
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                        __rcu_read_lock
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                        __rcu_read_unlock
+ uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                    pt_event_stop
+ uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        debug_smp_processor_id
+ uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        debug_smp_processor_id
+ uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        native_write_msr
+ uname   30805 [000] 24001.058785463: ([kernel.kallsyms])                                        native_write_msr
+ uname   30805 [000] 24001.058785639: 0x0
+
+Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: James Clark <james.clark@arm.com>
+Link: https://lkml.kernel.org/r/20241022155920.17511-3-adrian.hunter@intel.com
+Stable-dep-of: 56799bc03565 ("perf: Fix hang while freeing sigtrap event")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/perf_event.h      | 28 ++++++++++++
+ include/uapi/linux/perf_event.h | 11 ++++-
+ kernel/events/core.c            | 75 +++++++++++++++++++++++++++++++--
+ kernel/events/internal.h        |  1 +
+ 4 files changed, 110 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
+index 347901525a46a..19551d664bce2 100644
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -170,6 +170,12 @@ struct hw_perf_event {
+               };
+               struct { /* aux / Intel-PT */
+                       u64             aux_config;
++                      /*
++                       * For AUX area events, aux_paused cannot be a state
++                       * flag because it can be updated asynchronously to
++                       * state.
++                       */
++                      unsigned int    aux_paused;
+               };
+               struct { /* software */
+                       struct hrtimer  hrtimer;
+@@ -294,6 +300,7 @@ struct perf_event_pmu_context;
+ #define PERF_PMU_CAP_NO_EXCLUDE                       0x0040
+ #define PERF_PMU_CAP_AUX_OUTPUT                       0x0080
+ #define PERF_PMU_CAP_EXTENDED_HW_TYPE         0x0100
++#define PERF_PMU_CAP_AUX_PAUSE                        0x0200
+ /**
+  * pmu::scope
+@@ -384,6 +391,8 @@ struct pmu {
+ #define PERF_EF_START 0x01            /* start the counter when adding    */
+ #define PERF_EF_RELOAD        0x02            /* reload the counter when starting */
+ #define PERF_EF_UPDATE        0x04            /* update the counter when stopping */
++#define PERF_EF_PAUSE 0x08            /* AUX area event, pause tracing */
++#define PERF_EF_RESUME        0x10            /* AUX area event, resume tracing */
+       /*
+        * Adds/Removes a counter to/from the PMU, can be done inside a
+@@ -423,6 +432,18 @@ struct pmu {
+        *
+        * ->start() with PERF_EF_RELOAD will reprogram the counter
+        *  value, must be preceded by a ->stop() with PERF_EF_UPDATE.
++       *
++       * ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
++       * overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
++       * PERF_EF_RESUME.
++       *
++       * ->start() with PERF_EF_RESUME will start as simply as possible but
++       * only if the counter is not otherwise stopped. Will not overlap
++       * another ->start() with PERF_EF_RESUME nor ->stop() with
++       * PERF_EF_PAUSE.
++       *
++       * Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
++       * ->stop()/->start() invocations, just not itself.
+        */
+       void (*start)                   (struct perf_event *event, int flags);
+       void (*stop)                    (struct perf_event *event, int flags);
+@@ -1685,6 +1706,13 @@ static inline bool has_aux(struct perf_event *event)
+       return event->pmu->setup_aux;
+ }
++static inline bool has_aux_action(struct perf_event *event)
++{
++      return event->attr.aux_sample_size ||
++             event->attr.aux_pause ||
++             event->attr.aux_resume;
++}
++
+ static inline bool is_write_backward(struct perf_event *event)
+ {
+       return !!event->attr.write_backward;
+diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
+index 4842c36fdf801..0524d541d4e3d 100644
+--- a/include/uapi/linux/perf_event.h
++++ b/include/uapi/linux/perf_event.h
+@@ -511,7 +511,16 @@ struct perf_event_attr {
+       __u16   sample_max_stack;
+       __u16   __reserved_2;
+       __u32   aux_sample_size;
+-      __u32   __reserved_3;
++
++      union {
++              __u32   aux_action;
++              struct {
++                      __u32   aux_start_paused :  1, /* start AUX area tracing paused */
++                              aux_pause        :  1, /* on overflow, pause AUX area tracing */
++                              aux_resume       :  1, /* on overflow, resume AUX area tracing */
++                              __reserved_3     : 29;
++              };
++      };
+       /*
+        * User provided data if sigtrap=1, passed back to user via
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index b5ccf52bb71ba..bee6f88d0556b 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -2146,7 +2146,7 @@ static void perf_put_aux_event(struct perf_event *event)
+ static bool perf_need_aux_event(struct perf_event *event)
+ {
+-      return !!event->attr.aux_output || !!event->attr.aux_sample_size;
++      return event->attr.aux_output || has_aux_action(event);
+ }
+ static int perf_get_aux_event(struct perf_event *event,
+@@ -2171,6 +2171,10 @@ static int perf_get_aux_event(struct perf_event *event,
+           !perf_aux_output_match(event, group_leader))
+               return 0;
++      if ((event->attr.aux_pause || event->attr.aux_resume) &&
++          !(group_leader->pmu->capabilities & PERF_PMU_CAP_AUX_PAUSE))
++              return 0;
++
+       if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
+               return 0;
+@@ -8029,6 +8033,49 @@ void perf_prepare_header(struct perf_event_header *header,
+       WARN_ON_ONCE(header->size & 7);
+ }
++static void __perf_event_aux_pause(struct perf_event *event, bool pause)
++{
++      if (pause) {
++              if (!event->hw.aux_paused) {
++                      event->hw.aux_paused = 1;
++                      event->pmu->stop(event, PERF_EF_PAUSE);
++              }
++      } else {
++              if (event->hw.aux_paused) {
++                      event->hw.aux_paused = 0;
++                      event->pmu->start(event, PERF_EF_RESUME);
++              }
++      }
++}
++
++static void perf_event_aux_pause(struct perf_event *event, bool pause)
++{
++      struct perf_buffer *rb;
++
++      if (WARN_ON_ONCE(!event))
++              return;
++
++      rb = ring_buffer_get(event);
++      if (!rb)
++              return;
++
++      scoped_guard (irqsave) {
++              /*
++               * Guard against self-recursion here. Another event could trip
++               * this same from NMI context.
++               */
++              if (READ_ONCE(rb->aux_in_pause_resume))
++                      break;
++
++              WRITE_ONCE(rb->aux_in_pause_resume, 1);
++              barrier();
++              __perf_event_aux_pause(event, pause);
++              barrier();
++              WRITE_ONCE(rb->aux_in_pause_resume, 0);
++      }
++      ring_buffer_put(rb);
++}
++
+ static __always_inline int
+ __perf_event_output(struct perf_event *event,
+                   struct perf_sample_data *data,
+@@ -9832,9 +9879,12 @@ static int __perf_event_overflow(struct perf_event *event,
+       ret = __perf_event_account_interrupt(event, throttle);
++      if (event->attr.aux_pause)
++              perf_event_aux_pause(event->aux_event, true);
++
+       if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
+           !bpf_overflow_handler(event, data, regs))
+-              return ret;
++              goto out;
+       /*
+        * XXX event_limit might not quite work as expected on inherited
+@@ -9896,6 +9946,9 @@ static int __perf_event_overflow(struct perf_event *event,
+               event->pending_wakeup = 1;
+               irq_work_queue(&event->pending_irq);
+       }
++out:
++      if (event->attr.aux_resume)
++              perf_event_aux_pause(event->aux_event, false);
+       return ret;
+ }
+@@ -12312,11 +12365,25 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+       }
+       if (event->attr.aux_output &&
+-          !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
++          (!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT) ||
++           event->attr.aux_pause || event->attr.aux_resume)) {
+               err = -EOPNOTSUPP;
+               goto err_pmu;
+       }
++      if (event->attr.aux_pause && event->attr.aux_resume) {
++              err = -EINVAL;
++              goto err_pmu;
++      }
++
++      if (event->attr.aux_start_paused) {
++              if (!(pmu->capabilities & PERF_PMU_CAP_AUX_PAUSE)) {
++                      err = -EOPNOTSUPP;
++                      goto err_pmu;
++              }
++              event->hw.aux_paused = 1;
++      }
++
+       if (cgroup_fd != -1) {
+               err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
+               if (err)
+@@ -13112,7 +13179,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
+        * Grouping is not supported for kernel events, neither is 'AUX',
+        * make sure the caller's intentions are adjusted.
+        */
+-      if (attr->aux_output)
++      if (attr->aux_output || attr->aux_action)
+               return ERR_PTR(-EINVAL);
+       event = perf_event_alloc(attr, cpu, task, NULL, NULL,
+diff --git a/kernel/events/internal.h b/kernel/events/internal.h
+index e072d995d670f..249288d82b8dc 100644
+--- a/kernel/events/internal.h
++++ b/kernel/events/internal.h
+@@ -52,6 +52,7 @@ struct perf_buffer {
+       void                            (*free_aux)(void *);
+       refcount_t                      aux_refcount;
+       int                             aux_in_sampling;
++      int                             aux_in_pause_resume;
+       void                            **aux_pages;
+       void                            *aux_priv;
+-- 
+2.39.5
+
diff --git a/queue-6.12/perf-core-simplify-the-perf_event_alloc-error-path.patch b/queue-6.12/perf-core-simplify-the-perf_event_alloc-error-path.patch
new file mode 100644 (file)
index 0000000..6ce6439
--- /dev/null
@@ -0,0 +1,328 @@
+From ce9c49bfdf59d19fbfd78cc899cf08c158f93847 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Nov 2024 14:39:13 +0100
+Subject: perf/core: Simplify the perf_event_alloc() error path
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit c70ca298036c58a88686ff388d3d367e9d21acf0 ]
+
+The error cleanup sequence in perf_event_alloc() is a subset of the
+existing _free_event() function (it must of course be).
+
+Split this out into __free_event() and simplify the error path.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Ravi Bangoria <ravi.bangoria@amd.com>
+Link: https://lore.kernel.org/r/20241104135517.967889521@infradead.org
+Stable-dep-of: 56799bc03565 ("perf: Fix hang while freeing sigtrap event")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/perf_event.h |  16 +++--
+ kernel/events/core.c       | 138 ++++++++++++++++++-------------------
+ 2 files changed, 78 insertions(+), 76 deletions(-)
+
+diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
+index 19551d664bce2..db6d281644447 100644
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -673,13 +673,15 @@ struct swevent_hlist {
+       struct rcu_head                 rcu_head;
+ };
+-#define PERF_ATTACH_CONTEXT   0x01
+-#define PERF_ATTACH_GROUP     0x02
+-#define PERF_ATTACH_TASK      0x04
+-#define PERF_ATTACH_TASK_DATA 0x08
+-#define PERF_ATTACH_ITRACE    0x10
+-#define PERF_ATTACH_SCHED_CB  0x20
+-#define PERF_ATTACH_CHILD     0x40
++#define PERF_ATTACH_CONTEXT   0x0001
++#define PERF_ATTACH_GROUP     0x0002
++#define PERF_ATTACH_TASK      0x0004
++#define PERF_ATTACH_TASK_DATA 0x0008
++#define PERF_ATTACH_ITRACE    0x0010
++#define PERF_ATTACH_SCHED_CB  0x0020
++#define PERF_ATTACH_CHILD     0x0040
++#define PERF_ATTACH_EXCLUSIVE 0x0080
++#define PERF_ATTACH_CALLCHAIN 0x0100
+ struct bpf_prog;
+ struct perf_cgroup;
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index bee6f88d0556b..255bae926f10a 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -5262,6 +5262,8 @@ static int exclusive_event_init(struct perf_event *event)
+                       return -EBUSY;
+       }
++      event->attach_state |= PERF_ATTACH_EXCLUSIVE;
++
+       return 0;
+ }
+@@ -5269,14 +5271,13 @@ static void exclusive_event_destroy(struct perf_event *event)
+ {
+       struct pmu *pmu = event->pmu;
+-      if (!is_exclusive_pmu(pmu))
+-              return;
+-
+       /* see comment in exclusive_event_init() */
+       if (event->attach_state & PERF_ATTACH_TASK)
+               atomic_dec(&pmu->exclusive_cnt);
+       else
+               atomic_inc(&pmu->exclusive_cnt);
++
++      event->attach_state &= ~PERF_ATTACH_EXCLUSIVE;
+ }
+ static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
+@@ -5335,40 +5336,20 @@ static void perf_pending_task_sync(struct perf_event *event)
+       rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE);
+ }
+-static void _free_event(struct perf_event *event)
++/* vs perf_event_alloc() error */
++static void __free_event(struct perf_event *event)
+ {
+-      irq_work_sync(&event->pending_irq);
+-      irq_work_sync(&event->pending_disable_irq);
+-      perf_pending_task_sync(event);
++      if (event->attach_state & PERF_ATTACH_CALLCHAIN)
++              put_callchain_buffers();
+-      unaccount_event(event);
++      kfree(event->addr_filter_ranges);
+-      security_perf_event_free(event);
+-
+-      if (event->rb) {
+-              /*
+-               * Can happen when we close an event with re-directed output.
+-               *
+-               * Since we have a 0 refcount, perf_mmap_close() will skip
+-               * over us; possibly making our ring_buffer_put() the last.
+-               */
+-              mutex_lock(&event->mmap_mutex);
+-              ring_buffer_attach(event, NULL);
+-              mutex_unlock(&event->mmap_mutex);
+-      }
++      if (event->attach_state & PERF_ATTACH_EXCLUSIVE)
++              exclusive_event_destroy(event);
+       if (is_cgroup_event(event))
+               perf_detach_cgroup(event);
+-      if (!event->parent) {
+-              if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+-                      put_callchain_buffers();
+-      }
+-
+-      perf_event_free_bpf_prog(event);
+-      perf_addr_filters_splice(event, NULL);
+-      kfree(event->addr_filter_ranges);
+-
+       if (event->destroy)
+               event->destroy(event);
+@@ -5379,22 +5360,58 @@ static void _free_event(struct perf_event *event)
+       if (event->hw.target)
+               put_task_struct(event->hw.target);
+-      if (event->pmu_ctx)
++      if (event->pmu_ctx) {
++              /*
++               * put_pmu_ctx() needs an event->ctx reference, because of
++               * epc->ctx.
++               */
++              WARN_ON_ONCE(!event->ctx);
++              WARN_ON_ONCE(event->pmu_ctx->ctx != event->ctx);
+               put_pmu_ctx(event->pmu_ctx);
++      }
+       /*
+-       * perf_event_free_task() relies on put_ctx() being 'last', in particular
+-       * all task references must be cleaned up.
++       * perf_event_free_task() relies on put_ctx() being 'last', in
++       * particular all task references must be cleaned up.
+        */
+       if (event->ctx)
+               put_ctx(event->ctx);
+-      exclusive_event_destroy(event);
+-      module_put(event->pmu->module);
++      if (event->pmu)
++              module_put(event->pmu->module);
+       call_rcu(&event->rcu_head, free_event_rcu);
+ }
++/* vs perf_event_alloc() success */
++static void _free_event(struct perf_event *event)
++{
++      irq_work_sync(&event->pending_irq);
++      irq_work_sync(&event->pending_disable_irq);
++      perf_pending_task_sync(event);
++
++      unaccount_event(event);
++
++      security_perf_event_free(event);
++
++      if (event->rb) {
++              /*
++               * Can happen when we close an event with re-directed output.
++               *
++               * Since we have a 0 refcount, perf_mmap_close() will skip
++               * over us; possibly making our ring_buffer_put() the last.
++               */
++              mutex_lock(&event->mmap_mutex);
++              ring_buffer_attach(event, NULL);
++              mutex_unlock(&event->mmap_mutex);
++      }
++
++      perf_event_free_bpf_prog(event);
++      perf_addr_filters_splice(event, NULL);
++
++      __free_event(event);
++}
++
+ /*
+  * Used to free events which have a known refcount of 1, such as in error paths
+  * where the event isn't exposed yet and inherited events.
+@@ -12014,8 +12031,10 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
+                       event->destroy(event);
+       }
+-      if (ret)
++      if (ret) {
++              event->pmu = NULL;
+               module_put(pmu->module);
++      }
+       return ret;
+ }
+@@ -12343,7 +12362,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+        * See perf_output_read().
+        */
+       if (has_inherit_and_sample_read(attr) && !(attr->sample_type & PERF_SAMPLE_TID))
+-              goto err_ns;
++              goto err;
+       if (!has_branch_stack(event))
+               event->attr.branch_sample_type = 0;
+@@ -12351,7 +12370,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+       pmu = perf_init_event(event);
+       if (IS_ERR(pmu)) {
+               err = PTR_ERR(pmu);
+-              goto err_ns;
++              goto err;
+       }
+       /*
+@@ -12361,25 +12380,25 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+        */
+       if (pmu->task_ctx_nr == perf_invalid_context && (task || cgroup_fd != -1)) {
+               err = -EINVAL;
+-              goto err_pmu;
++              goto err;
+       }
+       if (event->attr.aux_output &&
+           (!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT) ||
+            event->attr.aux_pause || event->attr.aux_resume)) {
+               err = -EOPNOTSUPP;
+-              goto err_pmu;
++              goto err;
+       }
+       if (event->attr.aux_pause && event->attr.aux_resume) {
+               err = -EINVAL;
+-              goto err_pmu;
++              goto err;
+       }
+       if (event->attr.aux_start_paused) {
+               if (!(pmu->capabilities & PERF_PMU_CAP_AUX_PAUSE)) {
+                       err = -EOPNOTSUPP;
+-                      goto err_pmu;
++                      goto err;
+               }
+               event->hw.aux_paused = 1;
+       }
+@@ -12387,12 +12406,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+       if (cgroup_fd != -1) {
+               err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
+               if (err)
+-                      goto err_pmu;
++                      goto err;
+       }
+       err = exclusive_event_init(event);
+       if (err)
+-              goto err_pmu;
++              goto err;
+       if (has_addr_filter(event)) {
+               event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters,
+@@ -12400,7 +12419,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+                                                   GFP_KERNEL);
+               if (!event->addr_filter_ranges) {
+                       err = -ENOMEM;
+-                      goto err_per_task;
++                      goto err;
+               }
+               /*
+@@ -12425,41 +12444,22 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
+                       err = get_callchain_buffers(attr->sample_max_stack);
+                       if (err)
+-                              goto err_addr_filters;
++                              goto err;
++                      event->attach_state |= PERF_ATTACH_CALLCHAIN;
+               }
+       }
+       err = security_perf_event_alloc(event);
+       if (err)
+-              goto err_callchain_buffer;
++              goto err;
+       /* symmetric to unaccount_event() in _free_event() */
+       account_event(event);
+       return event;
+-err_callchain_buffer:
+-      if (!event->parent) {
+-              if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+-                      put_callchain_buffers();
+-      }
+-err_addr_filters:
+-      kfree(event->addr_filter_ranges);
+-
+-err_per_task:
+-      exclusive_event_destroy(event);
+-
+-err_pmu:
+-      if (is_cgroup_event(event))
+-              perf_detach_cgroup(event);
+-      if (event->destroy)
+-              event->destroy(event);
+-      module_put(pmu->module);
+-err_ns:
+-      if (event->hw.target)
+-              put_task_struct(event->hw.target);
+-      call_rcu(&event->rcu_head, free_event_rcu);
+-
++err:
++      __free_event(event);
+       return ERR_PTR(err);
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.12/perf-fix-hang-while-freeing-sigtrap-event.patch b/queue-6.12/perf-fix-hang-while-freeing-sigtrap-event.patch
new file mode 100644 (file)
index 0000000..99f9414
--- /dev/null
@@ -0,0 +1,260 @@
+From 4cec7ae4bf7ba81b02ad3cfc0bcab5e8ba1b295f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Mar 2025 14:54:46 +0100
+Subject: perf: Fix hang while freeing sigtrap event
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+[ Upstream commit 56799bc035658738f362acec3e7647bb84e68933 ]
+
+Perf can hang while freeing a sigtrap event if a related deferred
+signal hadn't managed to be sent before the file got closed:
+
+perf_event_overflow()
+   task_work_add(perf_pending_task)
+
+fput()
+   task_work_add(____fput())
+
+task_work_run()
+    ____fput()
+        perf_release()
+            perf_event_release_kernel()
+                _free_event()
+                    perf_pending_task_sync()
+                        task_work_cancel() -> FAILED
+                        rcuwait_wait_event()
+
+Once task_work_run() is running, the list of pending callbacks is
+removed from the task_struct and from this point on task_work_cancel()
+can't remove any pending and not yet started work items, hence the
+task_work_cancel() failure and the hang on rcuwait_wait_event().
+
+Task work could be changed to remove one work at a time, so a work
+running on the current task can always cancel a pending one, however
+the wait / wake design is still subject to inverted dependencies when
+remote targets are involved, as pictured by Oleg:
+
+T1                                                      T2
+
+fd = perf_event_open(pid => T2->pid);                  fd = perf_event_open(pid => T1->pid);
+close(fd)                                              close(fd)
+    <IRQ>                                                  <IRQ>
+    perf_event_overflow()                                  perf_event_overflow()
+       task_work_add(perf_pending_task)                        task_work_add(perf_pending_task)
+    </IRQ>                                                 </IRQ>
+    fput()                                                 fput()
+        task_work_add(____fput())                              task_work_add(____fput())
+
+    task_work_run()                                        task_work_run()
+        ____fput()                                             ____fput()
+            perf_release()                                         perf_release()
+                perf_event_release_kernel()                            perf_event_release_kernel()
+                    _free_event()                                          _free_event()
+                        perf_pending_task_sync()                               perf_pending_task_sync()
+                            rcuwait_wait_event()                                   rcuwait_wait_event()
+
+Therefore the only option left is to acquire the event reference count
+upon queueing the perf task work and release it from the task work, just
+like it was done before 3a5465418f5f ("perf: Fix event leak upon exec and file release")
+but without the leaks it fixed.
+
+Some adjustments are necessary to make it work:
+
+* A child event might dereference its parent upon freeing. Care must be
+  taken to release the parent last.
+
+* Some places assuming the event doesn't have any reference held and
+  therefore can be freed right away must instead put the reference and
+  let the reference counting to its job.
+
+Reported-by: "Yi Lai" <yi1.lai@linux.intel.com>
+Closes: https://lore.kernel.org/all/Zx9Losv4YcJowaP%2F@ly-workstation/
+Reported-by: syzbot+3c4321e10eea460eb606@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/673adf75.050a0220.87769.0024.GAE@google.com/
+Fixes: 3a5465418f5f ("perf: Fix event leak upon exec and file release")
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20250304135446.18905-1-frederic@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/perf_event.h |  1 -
+ kernel/events/core.c       | 64 +++++++++++---------------------------
+ 2 files changed, 18 insertions(+), 47 deletions(-)
+
+diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
+index db6d281644447..0997077bcc52a 100644
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -833,7 +833,6 @@ struct perf_event {
+       struct irq_work                 pending_disable_irq;
+       struct callback_head            pending_task;
+       unsigned int                    pending_work;
+-      struct rcuwait                  pending_work_wait;
+       atomic_t                        event_limit;
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 255bae926f10a..97af53c43608e 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -5312,30 +5312,6 @@ static bool exclusive_event_installable(struct perf_event *event,
+ static void perf_addr_filters_splice(struct perf_event *event,
+                                      struct list_head *head);
+-static void perf_pending_task_sync(struct perf_event *event)
+-{
+-      struct callback_head *head = &event->pending_task;
+-
+-      if (!event->pending_work)
+-              return;
+-      /*
+-       * If the task is queued to the current task's queue, we
+-       * obviously can't wait for it to complete. Simply cancel it.
+-       */
+-      if (task_work_cancel(current, head)) {
+-              event->pending_work = 0;
+-              local_dec(&event->ctx->nr_no_switch_fast);
+-              return;
+-      }
+-
+-      /*
+-       * All accesses related to the event are within the same RCU section in
+-       * perf_pending_task(). The RCU grace period before the event is freed
+-       * will make sure all those accesses are complete by then.
+-       */
+-      rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE);
+-}
+-
+ /* vs perf_event_alloc() error */
+ static void __free_event(struct perf_event *event)
+ {
+@@ -5388,7 +5364,6 @@ static void _free_event(struct perf_event *event)
+ {
+       irq_work_sync(&event->pending_irq);
+       irq_work_sync(&event->pending_disable_irq);
+-      perf_pending_task_sync(event);
+       unaccount_event(event);
+@@ -5481,10 +5456,17 @@ static void perf_remove_from_owner(struct perf_event *event)
+ static void put_event(struct perf_event *event)
+ {
++      struct perf_event *parent;
++
+       if (!atomic_long_dec_and_test(&event->refcount))
+               return;
++      parent = event->parent;
+       _free_event(event);
++
++      /* Matches the refcount bump in inherit_event() */
++      if (parent)
++              put_event(parent);
+ }
+ /*
+@@ -5568,11 +5550,6 @@ int perf_event_release_kernel(struct perf_event *event)
+               if (tmp == child) {
+                       perf_remove_from_context(child, DETACH_GROUP);
+                       list_move(&child->child_list, &free_list);
+-                      /*
+-                       * This matches the refcount bump in inherit_event();
+-                       * this can't be the last reference.
+-                       */
+-                      put_event(event);
+               } else {
+                       var = &ctx->refcount;
+               }
+@@ -5598,7 +5575,8 @@ int perf_event_release_kernel(struct perf_event *event)
+               void *var = &child->ctx->refcount;
+               list_del(&child->child_list);
+-              free_event(child);
++              /* Last reference unless ->pending_task work is pending */
++              put_event(child);
+               /*
+                * Wake any perf_event_free_task() waiting for this event to be
+@@ -5609,7 +5587,11 @@ int perf_event_release_kernel(struct perf_event *event)
+       }
+ no_ctx:
+-      put_event(event); /* Must be the 'last' reference */
++      /*
++       * Last reference unless ->pending_task work is pending on this event
++       * or any of its children.
++       */
++      put_event(event);
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+@@ -6994,12 +6976,6 @@ static void perf_pending_task(struct callback_head *head)
+       struct perf_event *event = container_of(head, struct perf_event, pending_task);
+       int rctx;
+-      /*
+-       * All accesses to the event must belong to the same implicit RCU read-side
+-       * critical section as the ->pending_work reset. See comment in
+-       * perf_pending_task_sync().
+-       */
+-      rcu_read_lock();
+       /*
+        * If we 'fail' here, that's OK, it means recursion is already disabled
+        * and we won't recurse 'further'.
+@@ -7010,9 +6986,8 @@ static void perf_pending_task(struct callback_head *head)
+               event->pending_work = 0;
+               perf_sigtrap(event);
+               local_dec(&event->ctx->nr_no_switch_fast);
+-              rcuwait_wake_up(&event->pending_work_wait);
+       }
+-      rcu_read_unlock();
++      put_event(event);
+       if (rctx >= 0)
+               perf_swevent_put_recursion_context(rctx);
+@@ -9935,6 +9910,7 @@ static int __perf_event_overflow(struct perf_event *event,
+                   !task_work_add(current, &event->pending_task, notify_mode)) {
+                       event->pending_work = pending_id;
+                       local_inc(&event->ctx->nr_no_switch_fast);
++                      WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
+                       event->pending_addr = 0;
+                       if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
+@@ -12283,7 +12259,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
+       init_irq_work(&event->pending_irq, perf_pending_irq);
+       event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
+       init_task_work(&event->pending_task, perf_pending_task);
+-      rcuwait_init(&event->pending_work_wait);
+       mutex_init(&event->mmap_mutex);
+       raw_spin_lock_init(&event->addr_filters.lock);
+@@ -13426,8 +13401,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
+                * Kick perf_poll() for is_event_hup();
+                */
+               perf_event_wakeup(parent_event);
+-              free_event(event);
+-              put_event(parent_event);
++              put_event(event);
+               return;
+       }
+@@ -13545,13 +13519,11 @@ static void perf_free_event(struct perf_event *event,
+       list_del_init(&event->child_list);
+       mutex_unlock(&parent->child_mutex);
+-      put_event(parent);
+-
+       raw_spin_lock_irq(&ctx->lock);
+       perf_group_detach(event);
+       list_del_event(event, ctx);
+       raw_spin_unlock_irq(&ctx->lock);
+-      free_event(event);
++      put_event(event);
+ }
+ /*
+-- 
+2.39.5
+
diff --git a/queue-6.12/selftests-futex-futex_waitv-wouldblock-test-should-f.patch b/queue-6.12/selftests-futex-futex_waitv-wouldblock-test-should-f.patch
new file mode 100644 (file)
index 0000000..984af98
--- /dev/null
@@ -0,0 +1,42 @@
+From 0ddbdb68cfc6faece946f50915074a9e390d96d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Apr 2025 22:12:20 +0000
+Subject: selftests/futex: futex_waitv wouldblock test should fail
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Edward Liaw <edliaw@google.com>
+
+[ Upstream commit 7d50e00fef2832e98d7e06bbfc85c1d66ee110ca ]
+
+Testcase should fail if -EWOULDBLOCK is not returned when expected value
+differs from actual value from the waiter.
+
+Link: https://lore.kernel.org/r/20250404221225.1596324-1-edliaw@google.com
+Fixes: 9d57f7c79748920636f8293d2f01192d702fe390 ("selftests: futex: Test sys_futex_waitv() wouldblock")
+Signed-off-by: Edward Liaw <edliaw@google.com>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: André Almeida <andrealmeid@igalia.com>
+Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../testing/selftests/futex/functional/futex_wait_wouldblock.c  | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+index 7d7a6a06cdb75..2d8230da90642 100644
+--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
++++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+@@ -98,7 +98,7 @@ int main(int argc, char *argv[])
+       info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+       res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
+       if (!res || errno != EWOULDBLOCK) {
+-              ksft_test_result_pass("futex_waitv returned: %d %s\n",
++              ksft_test_result_fail("futex_waitv returned: %d %s\n",
+                                     res ? errno : res,
+                                     res ? strerror(errno) : "");
+               ret = RET_FAIL;
+-- 
+2.39.5
+
index 5d6c73e1ad90b057c5dba21c5dea20b9cff79395..f1e56f9a6776a6942282dff9aa37e1a32cc97ee3 100644 (file)
@@ -1 +1,44 @@
 asoc-intel-adl-add-2xrt1316-audio-configuration.patch
+cgroup-cpuset-fix-incorrect-isolated_cpus-update-in-.patch
+cgroup-cpuset-fix-error-handling-in-remote_partition.patch
+cgroup-cpuset-revert-allow-suppression-of-sched-doma.patch
+cgroup-cpuset-enforce-at-most-one-rebuild_sched_doma.patch
+cgroup-cpuset-further-optimize-code-if-config_cpuset.patch
+cgroup-cpuset-fix-race-between-newly-created-partiti.patch
+gpiolib-of-fix-the-choice-for-ingenic-nand-quirk.patch
+selftests-futex-futex_waitv-wouldblock-test-should-f.patch
+ublk-refactor-recovery-configuration-flag-helpers.patch
+ublk-fix-handling-recovery-reissue-in-ublk_abort_que.patch
+drm-i915-disable-rpg-during-live-selftest.patch
+x86-acpi-don-t-limit-cpus-to-1-for-xen-pv-guests-due.patch
+drm-xe-hw_engine-define-sysfs_ops-on-all-directories.patch
+ata-pata_pxa-fix-potential-null-pointer-dereference-.patch
+objtool-fix-insn_context_switch-handling-in-validate.patch
+tipc-fix-memory-leak-in-tipc_link_xmit.patch
+codel-remove-sch-q.qlen-check-before-qdisc_tree_redu.patch
+net-tls-explicitly-disallow-disconnect.patch
+octeontx2-pf-qos-fix-vf-root-node-parent-queue-index.patch
+tc-ensure-we-have-enough-buffer-space-when-sending-f.patch
+net-ethtool-don-t-call-.cleanup_data-when-prepare_da.patch
+drm-tests-modeset-fix-drm_display_mode-memory-leak.patch
+drm-tests-helpers-create-kunit-helper-to-destroy-a-d.patch
+drm-tests-cmdline-fix-drm_display_mode-memory-leak.patch
+drm-tests-modes-fix-drm_display_mode-memory-leak.patch
+drm-tests-probe-helper-fix-drm_display_mode-memory-l.patch
+net-libwx-handle-page_pool_dev_alloc_pages-error.patch
+ata-sata_sx4-add-error-handling-in-pdc20621_i2c_read.patch
+drm-i915-huc-fix-fence-not-released-on-early-probe-e.patch
+nvmet-fcloop-swap-list_add_tail-arguments.patch
+net_sched-sch_sfq-use-a-temporary-work-area-for-vali.patch
+net_sched-sch_sfq-move-the-limit-validation.patch
+smb-client-fix-uaf-in-decryption-with-multichannel.patch
+net-phy-move-phy_link_change-prior-to-mdio_bus_phy_m.patch
+net-phy-allow-mdio-bus-pm-ops-to-start-stop-state-ma.patch
+ipv6-align-behavior-across-nexthops-during-path-sele.patch
+net-ppp-add-bound-checking-for-skb-data-on-ppp_sync_.patch
+nft_set_pipapo-fix-incorrect-avx2-match-of-5th-field.patch
+iommu-exynos-fix-suspend-resume-with-identity-domain.patch
+iommu-mediatek-fix-null-pointer-deference-in-mtk_iom.patch
+perf-core-add-aux_pause-aux_resume-aux_start_paused.patch
+perf-core-simplify-the-perf_event_alloc-error-path.patch
+perf-fix-hang-while-freeing-sigtrap-event.patch
diff --git a/queue-6.12/smb-client-fix-uaf-in-decryption-with-multichannel.patch b/queue-6.12/smb-client-fix-uaf-in-decryption-with-multichannel.patch
new file mode 100644 (file)
index 0000000..7d8a04b
--- /dev/null
@@ -0,0 +1,164 @@
+From 66dc2f6bcc583b14cb6c928dbd13f7ef08ddd693 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Apr 2025 11:14:21 -0300
+Subject: smb: client: fix UAF in decryption with multichannel
+
+From: Paulo Alcantara <pc@manguebit.com>
+
+[ Upstream commit 9502dd5c7029902f4a425bf959917a5a9e7c0e50 ]
+
+After commit f7025d861694 ("smb: client: allocate crypto only for
+primary server") and commit b0abcd65ec54 ("smb: client: fix UAF in
+async decryption"), the channels started reusing AEAD TFM from primary
+channel to perform synchronous decryption, but that can't done as
+there could be multiple cifsd threads (one per channel) simultaneously
+accessing it to perform decryption.
+
+This fixes the following KASAN splat when running fstest generic/249
+with 'vers=3.1.1,multichannel,max_channels=4,seal' against Windows
+Server 2022:
+
+BUG: KASAN: slab-use-after-free in gf128mul_4k_lle+0xba/0x110
+Read of size 8 at addr ffff8881046c18a0 by task cifsd/986
+CPU: 3 UID: 0 PID: 986 Comm: cifsd Not tainted 6.15.0-rc1 #1
+PREEMPT(voluntary)
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-3.fc41
+04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x5d/0x80
+ print_report+0x156/0x528
+ ? gf128mul_4k_lle+0xba/0x110
+ ? __virt_addr_valid+0x145/0x300
+ ? __phys_addr+0x46/0x90
+ ? gf128mul_4k_lle+0xba/0x110
+ kasan_report+0xdf/0x1a0
+ ? gf128mul_4k_lle+0xba/0x110
+ gf128mul_4k_lle+0xba/0x110
+ ghash_update+0x189/0x210
+ shash_ahash_update+0x295/0x370
+ ? __pfx_shash_ahash_update+0x10/0x10
+ ? __pfx_shash_ahash_update+0x10/0x10
+ ? __pfx_extract_iter_to_sg+0x10/0x10
+ ? ___kmalloc_large_node+0x10e/0x180
+ ? __asan_memset+0x23/0x50
+ crypto_ahash_update+0x3c/0xc0
+ gcm_hash_assoc_remain_continue+0x93/0xc0
+ crypt_message+0xe09/0xec0 [cifs]
+ ? __pfx_crypt_message+0x10/0x10 [cifs]
+ ? _raw_spin_unlock+0x23/0x40
+ ? __pfx_cifs_readv_from_socket+0x10/0x10 [cifs]
+ decrypt_raw_data+0x229/0x380 [cifs]
+ ? __pfx_decrypt_raw_data+0x10/0x10 [cifs]
+ ? __pfx_cifs_read_iter_from_socket+0x10/0x10 [cifs]
+ smb3_receive_transform+0x837/0xc80 [cifs]
+ ? __pfx_smb3_receive_transform+0x10/0x10 [cifs]
+ ? __pfx___might_resched+0x10/0x10
+ ? __pfx_smb3_is_transform_hdr+0x10/0x10 [cifs]
+ cifs_demultiplex_thread+0x692/0x1570 [cifs]
+ ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs]
+ ? rcu_is_watching+0x20/0x50
+ ? rcu_lockdep_current_cpu_online+0x62/0xb0
+ ? find_held_lock+0x32/0x90
+ ? kvm_sched_clock_read+0x11/0x20
+ ? local_clock_noinstr+0xd/0xd0
+ ? trace_irq_enable.constprop.0+0xa8/0xe0
+ ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs]
+ kthread+0x1fe/0x380
+ ? kthread+0x10f/0x380
+ ? __pfx_kthread+0x10/0x10
+ ? local_clock_noinstr+0xd/0xd0
+ ? ret_from_fork+0x1b/0x60
+ ? local_clock+0x15/0x30
+ ? lock_release+0x29b/0x390
+ ? rcu_is_watching+0x20/0x50
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork+0x31/0x60
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+
+Tested-by: David Howells <dhowells@redhat.com>
+Reported-by: Steve French <stfrench@microsoft.com>
+Closes: https://lore.kernel.org/r/CAH2r5mu6Yc0-RJXM3kFyBYUB09XmXBrNodOiCVR4EDrmxq5Szg@mail.gmail.com
+Fixes: f7025d861694 ("smb: client: allocate crypto only for primary server")
+Fixes: b0abcd65ec54 ("smb: client: fix UAF in async decryption")
+Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/client/cifsencrypt.c | 16 +++++-----------
+ fs/smb/client/smb2ops.c     |  6 +++---
+ fs/smb/client/smb2pdu.c     | 11 ++---------
+ 3 files changed, 10 insertions(+), 23 deletions(-)
+
+diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c
+index 7a43daacc8159..7c61c1e944c7a 100644
+--- a/fs/smb/client/cifsencrypt.c
++++ b/fs/smb/client/cifsencrypt.c
+@@ -702,18 +702,12 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server)
+       cifs_free_hash(&server->secmech.md5);
+       cifs_free_hash(&server->secmech.sha512);
+-      if (!SERVER_IS_CHAN(server)) {
+-              if (server->secmech.enc) {
+-                      crypto_free_aead(server->secmech.enc);
+-                      server->secmech.enc = NULL;
+-              }
+-
+-              if (server->secmech.dec) {
+-                      crypto_free_aead(server->secmech.dec);
+-                      server->secmech.dec = NULL;
+-              }
+-      } else {
++      if (server->secmech.enc) {
++              crypto_free_aead(server->secmech.enc);
+               server->secmech.enc = NULL;
++      }
++      if (server->secmech.dec) {
++              crypto_free_aead(server->secmech.dec);
+               server->secmech.dec = NULL;
+       }
+ }
+diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
+index 516be8c0b2a9b..590b70d71694b 100644
+--- a/fs/smb/client/smb2ops.c
++++ b/fs/smb/client/smb2ops.c
+@@ -4576,9 +4576,9 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
+                       return rc;
+               }
+       } else {
+-              if (unlikely(!server->secmech.dec))
+-                      return -EIO;
+-
++              rc = smb3_crypto_aead_allocate(server);
++              if (unlikely(rc))
++                      return rc;
+               tfm = server->secmech.dec;
+       }
+diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
+index 75b13175a2e78..1a7b82664255a 100644
+--- a/fs/smb/client/smb2pdu.c
++++ b/fs/smb/client/smb2pdu.c
+@@ -1269,15 +1269,8 @@ SMB2_negotiate(const unsigned int xid,
+                       cifs_server_dbg(VFS, "Missing expected negotiate contexts\n");
+       }
+-      if (server->cipher_type && !rc) {
+-              if (!SERVER_IS_CHAN(server)) {
+-                      rc = smb3_crypto_aead_allocate(server);
+-              } else {
+-                      /* For channels, just reuse the primary server crypto secmech. */
+-                      server->secmech.enc = server->primary_server->secmech.enc;
+-                      server->secmech.dec = server->primary_server->secmech.dec;
+-              }
+-      }
++      if (server->cipher_type && !rc)
++              rc = smb3_crypto_aead_allocate(server);
+ neg_exit:
+       free_rsp_buf(resp_buftype, rsp);
+       return rc;
+-- 
+2.39.5
+
diff --git a/queue-6.12/tc-ensure-we-have-enough-buffer-space-when-sending-f.patch b/queue-6.12/tc-ensure-we-have-enough-buffer-space-when-sending-f.patch
new file mode 100644 (file)
index 0000000..df20ae0
--- /dev/null
@@ -0,0 +1,162 @@
+From fd9378d998f2601c51d6855ccf4497ac7f9dbe4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Apr 2025 12:55:34 +0200
+Subject: tc: Ensure we have enough buffer space when sending filter netlink
+ notifications
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Toke Høiland-Jørgensen <toke@redhat.com>
+
+[ Upstream commit 369609fc6272c2f6ad666ba4fd913f3baf32908f ]
+
+The tfilter_notify() and tfilter_del_notify() functions assume that
+NLMSG_GOODSIZE is always enough to dump the filter chain. This is not
+always the case, which can lead to silent notify failures (because the
+return code of tfilter_notify() is not always checked). In particular,
+this can lead to NLM_F_ECHO not being honoured even though an action
+succeeds, which forces userspace to create workarounds[0].
+
+Fix this by increasing the message size if dumping the filter chain into
+the allocated skb fails. Use the size of the incoming skb as a size hint
+if set, so we can start at a larger value when appropriate.
+
+To trigger this, run the following commands:
+
+ # ip link add type veth
+ # tc qdisc replace dev veth0 root handle 1: fq_codel
+ # tc -echo filter add dev veth0 parent 1: u32 match u32 0 0 $(for i in $(seq 32); do echo action pedit munge ip dport set 22; done)
+
+Before this fix, tc just returns:
+
+Not a filter(cmd 2)
+
+After the fix, we get the correct echo:
+
+added filter dev veth0 parent 1: protocol all pref 49152 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 terminal flowid not_in_hw
+  match 00000000/00000000 at 0
+       action order 1:  pedit action pass keys 1
+       index 1 ref 1 bind 1
+       key #0  at 20: val 00000016 mask ffff0000
+[repeated 32 times]
+
+[0] https://github.com/openvswitch/ovs/commit/106ef21860c935e5e0017a88bf42b94025c4e511
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Frode Nordahl <frode.nordahl@canonical.com>
+Closes: https://bugs.launchpad.net/ubuntu/+source/openvswitch/+bug/2018500
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Link: https://patch.msgid.link/20250407105542.16601-1-toke@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_api.c | 66 ++++++++++++++++++++++++++++++---------------
+ 1 file changed, 45 insertions(+), 21 deletions(-)
+
+diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
+index 998ea3b5badfc..a3bab5e27e71b 100644
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -2051,6 +2051,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
+       struct tcmsg *tcm;
+       struct nlmsghdr  *nlh;
+       unsigned char *b = skb_tail_pointer(skb);
++      int ret = -EMSGSIZE;
+       nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+       if (!nlh)
+@@ -2095,11 +2096,45 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
+       return skb->len;
++cls_op_not_supp:
++      ret = -EOPNOTSUPP;
+ out_nlmsg_trim:
+ nla_put_failure:
+-cls_op_not_supp:
+       nlmsg_trim(skb, b);
+-      return -1;
++      return ret;
++}
++
++static struct sk_buff *tfilter_notify_prep(struct net *net,
++                                         struct sk_buff *oskb,
++                                         struct nlmsghdr *n,
++                                         struct tcf_proto *tp,
++                                         struct tcf_block *block,
++                                         struct Qdisc *q, u32 parent,
++                                         void *fh, int event,
++                                         u32 portid, bool rtnl_held,
++                                         struct netlink_ext_ack *extack)
++{
++      unsigned int size = oskb ? max(NLMSG_GOODSIZE, oskb->len) : NLMSG_GOODSIZE;
++      struct sk_buff *skb;
++      int ret;
++
++retry:
++      skb = alloc_skb(size, GFP_KERNEL);
++      if (!skb)
++              return ERR_PTR(-ENOBUFS);
++
++      ret = tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
++                          n->nlmsg_seq, n->nlmsg_flags, event, false,
++                          rtnl_held, extack);
++      if (ret <= 0) {
++              kfree_skb(skb);
++              if (ret == -EMSGSIZE) {
++                      size += NLMSG_GOODSIZE;
++                      goto retry;
++              }
++              return ERR_PTR(-EINVAL);
++      }
++      return skb;
+ }
+ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+@@ -2115,16 +2150,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+       if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+               return 0;
+-      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+-      if (!skb)
+-              return -ENOBUFS;
+-
+-      if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
+-                        n->nlmsg_seq, n->nlmsg_flags, event,
+-                        false, rtnl_held, extack) <= 0) {
+-              kfree_skb(skb);
+-              return -EINVAL;
+-      }
++      skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, event,
++                                portid, rtnl_held, extack);
++      if (IS_ERR(skb))
++              return PTR_ERR(skb);
+       if (unicast)
+               err = rtnl_unicast(skb, net, portid);
+@@ -2147,16 +2176,11 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
+       if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+               return tp->ops->delete(tp, fh, last, rtnl_held, extack);
+-      skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+-      if (!skb)
+-              return -ENOBUFS;
+-
+-      if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
+-                        n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
+-                        false, rtnl_held, extack) <= 0) {
++      skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh,
++                                RTM_DELTFILTER, portid, rtnl_held, extack);
++      if (IS_ERR(skb)) {
+               NL_SET_ERR_MSG(extack, "Failed to build del event notification");
+-              kfree_skb(skb);
+-              return -EINVAL;
++              return PTR_ERR(skb);
+       }
+       err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
+-- 
+2.39.5
+
diff --git a/queue-6.12/tipc-fix-memory-leak-in-tipc_link_xmit.patch b/queue-6.12/tipc-fix-memory-leak-in-tipc_link_xmit.patch
new file mode 100644 (file)
index 0000000..3b8a18b
--- /dev/null
@@ -0,0 +1,40 @@
+From fb80ff152a6b29870c5fdf14efa9ebae2579894d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Apr 2025 09:24:31 +0000
+Subject: tipc: fix memory leak in tipc_link_xmit
+
+From: Tung Nguyen <tung.quang.nguyen@est.tech>
+
+[ Upstream commit 69ae94725f4fc9e75219d2d69022029c5b24bc9a ]
+
+In case the backlog transmit queue for system-importance messages is overloaded,
+tipc_link_xmit() returns -ENOBUFS but the skb list is not purged. This leads to
+memory leak and failure when a skb is allocated.
+
+This commit fixes this issue by purging the skb list before tipc_link_xmit()
+returns.
+
+Fixes: 365ad353c256 ("tipc: reduce risk of user starvation during link congestion")
+Signed-off-by: Tung Nguyen <tung.quang.nguyen@est.tech>
+Link: https://patch.msgid.link/20250403092431.514063-1-tung.quang.nguyen@est.tech
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tipc/link.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/tipc/link.c b/net/tipc/link.c
+index 5c2088a469cea..5689e1f485479 100644
+--- a/net/tipc/link.c
++++ b/net/tipc/link.c
+@@ -1046,6 +1046,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
+       if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) {
+               if (imp == TIPC_SYSTEM_IMPORTANCE) {
+                       pr_warn("%s<%s>, link overflow", link_rst_msg, l->name);
++                      __skb_queue_purge(list);
+                       return -ENOBUFS;
+               }
+               rc = link_schedule_user(l, hdr);
+-- 
+2.39.5
+
diff --git a/queue-6.12/ublk-fix-handling-recovery-reissue-in-ublk_abort_que.patch b/queue-6.12/ublk-fix-handling-recovery-reissue-in-ublk_abort_que.patch
new file mode 100644 (file)
index 0000000..b67fc77
--- /dev/null
@@ -0,0 +1,91 @@
+From 6250772c615286c89bd110e3fe5b0f58244d23ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Apr 2025 09:14:41 +0800
+Subject: ublk: fix handling recovery & reissue in ublk_abort_queue()
+
+From: Ming Lei <ming.lei@redhat.com>
+
+[ Upstream commit 6ee6bd5d4fce502a5b5a2ea805e9ff16e6aa890f ]
+
+Commit 8284066946e6 ("ublk: grab request reference when the request is handled
+by userspace") doesn't grab request reference in case of recovery reissue.
+Then the request can be requeued & re-dispatch & failed when canceling
+uring command.
+
+If it is one zc request, the request can be freed before io_uring
+returns the zc buffer back, then cause kernel panic:
+
+[  126.773061] BUG: kernel NULL pointer dereference, address: 00000000000000c8
+[  126.773657] #PF: supervisor read access in kernel mode
+[  126.774052] #PF: error_code(0x0000) - not-present page
+[  126.774455] PGD 0 P4D 0
+[  126.774698] Oops: Oops: 0000 [#1] SMP NOPTI
+[  126.775034] CPU: 13 UID: 0 PID: 1612 Comm: kworker/u64:55 Not tainted 6.14.0_blk+ #182 PREEMPT(full)
+[  126.775676] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-1.fc39 04/01/2014
+[  126.776275] Workqueue: iou_exit io_ring_exit_work
+[  126.776651] RIP: 0010:ublk_io_release+0x14/0x130 [ublk_drv]
+
+Fixes it by always grabbing request reference for aborting the request.
+
+Reported-by: Caleb Sander Mateos <csander@purestorage.com>
+Closes: https://lore.kernel.org/linux-block/CADUfDZodKfOGUeWrnAxcZiLT+puaZX8jDHoj_sfHZCOZwhzz6A@mail.gmail.com/
+Fixes: 8284066946e6 ("ublk: grab request reference when the request is handled by userspace")
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20250409011444.2142010-2-ming.lei@redhat.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/ublk_drv.c | 30 ++++++++++++++++++++++++++----
+ 1 file changed, 26 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
+index dd328d40c7de5..38b9e485e520d 100644
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -1081,6 +1081,25 @@ static void ublk_complete_rq(struct kref *ref)
+       __ublk_complete_rq(req);
+ }
++static void ublk_do_fail_rq(struct request *req)
++{
++      struct ublk_queue *ubq = req->mq_hctx->driver_data;
++
++      if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
++              blk_mq_requeue_request(req, false);
++      else
++              __ublk_complete_rq(req);
++}
++
++static void ublk_fail_rq_fn(struct kref *ref)
++{
++      struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
++                      ref);
++      struct request *req = blk_mq_rq_from_pdu(data);
++
++      ublk_do_fail_rq(req);
++}
++
+ /*
+  * Since __ublk_rq_task_work always fails requests immediately during
+  * exiting, __ublk_fail_req() is only called from abort context during
+@@ -1094,10 +1113,13 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
+ {
+       WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
+-      if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
+-              blk_mq_requeue_request(req, false);
+-      else
+-              ublk_put_req_ref(ubq, req);
++      if (ublk_need_req_ref(ubq)) {
++              struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
++
++              kref_put(&data->ref, ublk_fail_rq_fn);
++      } else {
++              ublk_do_fail_rq(req);
++      }
+ }
+ static void ubq_complete_io_cmd(struct ublk_io *io, int res,
+-- 
+2.39.5
+
diff --git a/queue-6.12/ublk-refactor-recovery-configuration-flag-helpers.patch b/queue-6.12/ublk-refactor-recovery-configuration-flag-helpers.patch
new file mode 100644 (file)
index 0000000..76b0bdc
--- /dev/null
@@ -0,0 +1,189 @@
+From 6b548abc33fd25278d5394f234f6573d8152f44a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Oct 2024 12:24:15 -0600
+Subject: ublk: refactor recovery configuration flag helpers
+
+From: Uday Shankar <ushankar@purestorage.com>
+
+[ Upstream commit 3b939b8f715e014adcc48f7827fe9417252f0833 ]
+
+ublk currently supports the following behaviors on ublk server exit:
+
+A: outstanding I/Os get errors, subsequently issued I/Os get errors
+B: outstanding I/Os get errors, subsequently issued I/Os queue
+C: outstanding I/Os get reissued, subsequently issued I/Os queue
+
+and the following behaviors for recovery of preexisting block devices by
+a future incarnation of the ublk server:
+
+1: ublk devices stopped on ublk server exit (no recovery possible)
+2: ublk devices are recoverable using start/end_recovery commands
+
+The userspace interface allows selection of combinations of these
+behaviors using flags specified at device creation time, namely:
+
+default behavior: A + 1
+UBLK_F_USER_RECOVERY: B + 2
+UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_REISSUE: C + 2
+
+We can't easily change the userspace interface to allow independent
+selection of one of {A, B, C} and one of {1, 2}, but we can refactor the
+internal helpers which test for the flags. Replace the existing helpers
+with the following set:
+
+ublk_nosrv_should_reissue_outstanding: tests for behavior C
+ublk_nosrv_[dev_]should_queue_io: tests for behavior B
+ublk_nosrv_should_stop_dev: tests for behavior 1
+
+Signed-off-by: Uday Shankar <ushankar@purestorage.com>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20241007182419.3263186-3-ushankar@purestorage.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: 6ee6bd5d4fce ("ublk: fix handling recovery & reissue in ublk_abort_queue()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/ublk_drv.c | 62 +++++++++++++++++++++++++++-------------
+ 1 file changed, 42 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
+index 79b7bd8bfd458..dd328d40c7de5 100644
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -681,22 +681,44 @@ static int ublk_max_cmd_buf_size(void)
+       return __ublk_queue_cmd_buf_size(UBLK_MAX_QUEUE_DEPTH);
+ }
+-static inline bool ublk_queue_can_use_recovery_reissue(
+-              struct ublk_queue *ubq)
++/*
++ * Should I/O outstanding to the ublk server when it exits be reissued?
++ * If not, outstanding I/O will get errors.
++ */
++static inline bool ublk_nosrv_should_reissue_outstanding(struct ublk_device *ub)
+ {
+-      return (ubq->flags & UBLK_F_USER_RECOVERY) &&
+-                      (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE);
++      return (ub->dev_info.flags & UBLK_F_USER_RECOVERY) &&
++             (ub->dev_info.flags & UBLK_F_USER_RECOVERY_REISSUE);
+ }
+-static inline bool ublk_queue_can_use_recovery(
+-              struct ublk_queue *ubq)
++/*
++ * Should I/O issued while there is no ublk server queue? If not, I/O
++ * issued while there is no ublk server will get errors.
++ */
++static inline bool ublk_nosrv_dev_should_queue_io(struct ublk_device *ub)
++{
++      return ub->dev_info.flags & UBLK_F_USER_RECOVERY;
++}
++
++/*
++ * Same as ublk_nosrv_dev_should_queue_io, but uses a queue-local copy
++ * of the device flags for smaller cache footprint - better for fast
++ * paths.
++ */
++static inline bool ublk_nosrv_should_queue_io(struct ublk_queue *ubq)
+ {
+       return ubq->flags & UBLK_F_USER_RECOVERY;
+ }
+-static inline bool ublk_can_use_recovery(struct ublk_device *ub)
++/*
++ * Should ublk devices be stopped (i.e. no recovery possible) when the
++ * ublk server exits? If not, devices can be used again by a future
++ * incarnation of a ublk server via the start_recovery/end_recovery
++ * commands.
++ */
++static inline bool ublk_nosrv_should_stop_dev(struct ublk_device *ub)
+ {
+-      return ub->dev_info.flags & UBLK_F_USER_RECOVERY;
++      return !(ub->dev_info.flags & UBLK_F_USER_RECOVERY);
+ }
+ static void ublk_free_disk(struct gendisk *disk)
+@@ -1072,7 +1094,7 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
+ {
+       WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
+-      if (ublk_queue_can_use_recovery_reissue(ubq))
++      if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
+               blk_mq_requeue_request(req, false);
+       else
+               ublk_put_req_ref(ubq, req);
+@@ -1100,7 +1122,7 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
+               struct request *rq)
+ {
+       /* We cannot process this rq so just requeue it. */
+-      if (ublk_queue_can_use_recovery(ubq))
++      if (ublk_nosrv_dev_should_queue_io(ubq->dev))
+               blk_mq_requeue_request(rq, false);
+       else
+               blk_mq_end_request(rq, BLK_STS_IOERR);
+@@ -1245,10 +1267,10 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
+               struct ublk_device *ub = ubq->dev;
+               if (ublk_abort_requests(ub, ubq)) {
+-                      if (ublk_can_use_recovery(ub))
+-                              schedule_work(&ub->quiesce_work);
+-                      else
++                      if (ublk_nosrv_should_stop_dev(ub))
+                               schedule_work(&ub->stop_work);
++                      else
++                              schedule_work(&ub->quiesce_work);
+               }
+               return BLK_EH_DONE;
+       }
+@@ -1277,7 +1299,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
+        * Note: force_abort is guaranteed to be seen because it is set
+        * before request queue is unqiuesced.
+        */
+-      if (ublk_queue_can_use_recovery(ubq) && unlikely(ubq->force_abort))
++      if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort))
+               return BLK_STS_IOERR;
+       if (unlikely(ubq->canceling)) {
+@@ -1517,10 +1539,10 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
+       ublk_cancel_cmd(ubq, io, issue_flags);
+       if (need_schedule) {
+-              if (ublk_can_use_recovery(ub))
+-                      schedule_work(&ub->quiesce_work);
+-              else
++              if (ublk_nosrv_should_stop_dev(ub))
+                       schedule_work(&ub->stop_work);
++              else
++                      schedule_work(&ub->quiesce_work);
+       }
+ }
+@@ -1640,7 +1662,7 @@ static void ublk_stop_dev(struct ublk_device *ub)
+       mutex_lock(&ub->mutex);
+       if (ub->dev_info.state == UBLK_S_DEV_DEAD)
+               goto unlock;
+-      if (ublk_can_use_recovery(ub)) {
++      if (ublk_nosrv_dev_should_queue_io(ub)) {
+               if (ub->dev_info.state == UBLK_S_DEV_LIVE)
+                       __ublk_quiesce_dev(ub);
+               ublk_unquiesce_dev(ub);
+@@ -2738,7 +2760,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
+       int i;
+       mutex_lock(&ub->mutex);
+-      if (!ublk_can_use_recovery(ub))
++      if (ublk_nosrv_should_stop_dev(ub))
+               goto out_unlock;
+       if (!ub->nr_queues_ready)
+               goto out_unlock;
+@@ -2791,7 +2813,7 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
+                       __func__, ub->dev_info.nr_hw_queues, header->dev_id);
+       mutex_lock(&ub->mutex);
+-      if (!ublk_can_use_recovery(ub))
++      if (ublk_nosrv_should_stop_dev(ub))
+               goto out_unlock;
+       if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
+-- 
+2.39.5
+
diff --git a/queue-6.12/x86-acpi-don-t-limit-cpus-to-1-for-xen-pv-guests-due.patch b/queue-6.12/x86-acpi-don-t-limit-cpus-to-1-for-xen-pv-guests-due.patch
new file mode 100644 (file)
index 0000000..2d0e387
--- /dev/null
@@ -0,0 +1,67 @@
+From 1b5c7f79a08f8e345a7f4b56daa396991f28d3d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Apr 2025 15:24:27 +0200
+Subject: x86/acpi: Don't limit CPUs to 1 for Xen PV guests due to disabled
+ ACPI
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Petr Vaněk <arkamar@atlas.cz>
+
+[ Upstream commit 8b37357a78d7fa13d88ea822b35b40137da1c85e ]
+
+Xen disables ACPI for PV guests in DomU, which causes acpi_mps_check() to
+return 1 when CONFIG_X86_MPPARSE is not set. As a result, the local APIC is
+disabled and the guest is later limited to a single vCPU, despite being
+configured with more.
+
+This regression was introduced in version 6.9 in commit 7c0edad3643f
+("x86/cpu/topology: Rework possible CPU management"), which added an
+early check that limits CPUs to 1 if apic_is_disabled.
+
+Update the acpi_mps_check() logic to return 0 early when running as a Xen
+PV guest in DomU, preventing APIC from being disabled in this specific case
+and restoring correct multi-vCPU behaviour.
+
+Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management")
+Signed-off-by: Petr Vaněk <arkamar@atlas.cz>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/all/20250407132445.6732-2-arkamar@atlas.cz
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/acpi/boot.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
+index c70b86f1f2954..63adda8a143f9 100644
+--- a/arch/x86/kernel/acpi/boot.c
++++ b/arch/x86/kernel/acpi/boot.c
+@@ -23,6 +23,8 @@
+ #include <linux/serial_core.h>
+ #include <linux/pgtable.h>
++#include <xen/xen.h>
++
+ #include <asm/e820/api.h>
+ #include <asm/irqdomain.h>
+ #include <asm/pci_x86.h>
+@@ -1730,6 +1732,15 @@ int __init acpi_mps_check(void)
+ {
+ #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE)
+ /* mptable code is not built-in*/
++
++      /*
++       * Xen disables ACPI in PV DomU guests but it still emulates APIC and
++       * supports SMP. Returning early here ensures that APIC is not disabled
++       * unnecessarily and the guest is not limited to a single vCPU.
++       */
++      if (xen_pv_domain() && !xen_initial_domain())
++              return 0;
++
+       if (acpi_disabled || acpi_noirq) {
+               pr_warn("MPS support code is not built-in, using acpi=off or acpi=noirq or pci=noacpi may have problem\n");
+               return 1;
+-- 
+2.39.5
+