]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 16 Sep 2023 11:41:54 +0000 (13:41 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 16 Sep 2023 11:41:54 +0000 (13:41 +0200)
added patches:
multi-gen-lru-avoid-race-in-inc_min_seq.patch
net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch

queue-6.1/multi-gen-lru-avoid-race-in-inc_min_seq.patch [new file with mode: 0644]
queue-6.1/net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch [new file with mode: 0644]
queue-6.1/series

diff --git a/queue-6.1/multi-gen-lru-avoid-race-in-inc_min_seq.patch b/queue-6.1/multi-gen-lru-avoid-race-in-inc_min_seq.patch
new file mode 100644 (file)
index 0000000..ea74ac1
--- /dev/null
@@ -0,0 +1,89 @@
+From bb5e7f234eacf34b65be67ebb3613e3b8cf11b87 Mon Sep 17 00:00:00 2001
+From: Kalesh Singh <kaleshsingh@google.com>
+Date: Tue, 1 Aug 2023 19:56:03 -0700
+Subject: Multi-gen LRU: avoid race in inc_min_seq()
+
+From: Kalesh Singh <kaleshsingh@google.com>
+
+commit bb5e7f234eacf34b65be67ebb3613e3b8cf11b87 upstream.
+
+inc_max_seq() will try to inc_min_seq() if nr_gens == MAX_NR_GENS. This
+is because the generations are reused (the last oldest now empty
+generation will become the next youngest generation).
+
+inc_min_seq() is retried until successful, dropping the lru_lock
+and yielding the CPU on each failure, and retaking the lock before
+trying again:
+
+        while (!inc_min_seq(lruvec, type, can_swap)) {
+                spin_unlock_irq(&lruvec->lru_lock);
+                cond_resched();
+                spin_lock_irq(&lruvec->lru_lock);
+        }
+
+However, the initial condition that required incrementing the min_seq
+(nr_gens == MAX_NR_GENS) is not retested. This can change by another
+call to inc_max_seq() from run_aging() with force_scan=true from the
+debugfs interface.
+
+Since the eviction stalls when the nr_gens == MIN_NR_GENS, avoid
+unnecessarily incrementing the min_seq by rechecking the number of
+generations before each attempt.
+
+This issue was uncovered in previous discussion on the list by Yu Zhao
+and Aneesh Kumar [1].
+
+[1] https://lore.kernel.org/linux-mm/CAOUHufbO7CaVm=xjEb1avDhHVvnC8pJmGyKcFf2iY_dpf+zR3w@mail.gmail.com/
+
+Link: https://lkml.kernel.org/r/20230802025606.346758-2-kaleshsingh@google.com
+Fixes: d6c3af7d8a2b ("mm: multi-gen LRU: debugfs interface")
+Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
+Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com> [mediatek]
+Tested-by: Charan Teja Kalla <quic_charante@quicinc.com>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: Aneesh Kumar K V <aneesh.kumar@linux.ibm.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: Brian Geffon <bgeffon@google.com>
+Cc: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
+Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
+Cc: Matthias Brugger <matthias.bgg@gmail.com>
+Cc: Oleksandr Natalenko <oleksandr@natalenko.name>
+Cc: Qi Zheng <zhengqi.arch@bytedance.com>
+Cc: Steven Barrett <steven@liquorix.net>
+Cc: Suleiman Souhlal <suleiman@google.com>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmscan.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -4331,6 +4331,7 @@ static void inc_max_seq(struct lruvec *l
+       int type, zone;
+       struct lru_gen_struct *lrugen = &lruvec->lrugen;
++restart:
+       spin_lock_irq(&lruvec->lru_lock);
+       VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
+@@ -4341,11 +4342,12 @@ static void inc_max_seq(struct lruvec *l
+               VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
+-              while (!inc_min_seq(lruvec, type, can_swap)) {
+-                      spin_unlock_irq(&lruvec->lru_lock);
+-                      cond_resched();
+-                      spin_lock_irq(&lruvec->lru_lock);
+-              }
++              if (inc_min_seq(lruvec, type, can_swap))
++                      continue;
++
++              spin_unlock_irq(&lruvec->lru_lock);
++              cond_resched();
++              goto restart;
+       }
+       /*
diff --git a/queue-6.1/net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch b/queue-6.1/net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch
new file mode 100644 (file)
index 0000000..0454b6b
--- /dev/null
@@ -0,0 +1,120 @@
+From 314ded538e5f22e7610b1bf621402024a180ec80 Mon Sep 17 00:00:00 2001
+From: Saeed Mahameed <saeedm@nvidia.com>
+Date: Thu, 8 Jun 2023 12:00:54 -0700
+Subject: net/mlx5: Free IRQ rmap and notifier on kernel shutdown
+
+From: Saeed Mahameed <saeedm@nvidia.com>
+
+commit 314ded538e5f22e7610b1bf621402024a180ec80 upstream.
+
+The kernel IRQ system needs the irq affinity notifier to be clear
+before attempting to free the irq, see WARN_ON log below.
+
+On a normal driver unload we don't have this issue since we do the
+complete cleanup of the irq resources.
+
+To fix this, put the important resources cleanup in a helper function
+and use it in both normal driver unload and shutdown flows.
+
+[ 4497.498434] ------------[ cut here ]------------
+[ 4497.498726] WARNING: CPU: 0 PID: 9 at kernel/irq/manage.c:2034 free_irq+0x295/0x340
+[ 4497.499193] Modules linked in:
+[ 4497.499386] CPU: 0 PID: 9 Comm: kworker/0:1 Tainted: G        W          6.4.0-rc4+ #10
+[ 4497.499876] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014
+[ 4497.500518] Workqueue: events do_poweroff
+[ 4497.500849] RIP: 0010:free_irq+0x295/0x340
+[ 4497.501132] Code: 85 c0 0f 84 1d ff ff ff 48 89 ef ff d0 0f 1f 00 e9 10 ff ff ff 0f 0b e9 72 ff ff ff 49 8d 7f 28 ff d0 0f 1f 00 e9 df fd ff ff <0f> 0b 48 c7 80 c0 008
+[ 4497.502269] RSP: 0018:ffffc90000053da0 EFLAGS: 00010282
+[ 4497.502589] RAX: ffff888100949600 RBX: ffff88810330b948 RCX: 0000000000000000
+[ 4497.503035] RDX: ffff888100949600 RSI: ffff888100400490 RDI: 0000000000000023
+[ 4497.503472] RBP: ffff88810330c7e0 R08: ffff8881004005d0 R09: ffffffff8273a260
+[ 4497.503923] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8881009ae000
+[ 4497.504359] R13: ffff8881009ae148 R14: 0000000000000000 R15: ffff888100949600
+[ 4497.504804] FS:  0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
+[ 4497.505302] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 4497.505671] CR2: 00007fce98806298 CR3: 000000000262e005 CR4: 0000000000370ef0
+[ 4497.506104] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 4497.506540] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 4497.507002] Call Trace:
+[ 4497.507158]  <TASK>
+[ 4497.507299]  ? free_irq+0x295/0x340
+[ 4497.507522]  ? __warn+0x7c/0x130
+[ 4497.507740]  ? free_irq+0x295/0x340
+[ 4497.507963]  ? report_bug+0x171/0x1a0
+[ 4497.508197]  ? handle_bug+0x3c/0x70
+[ 4497.508417]  ? exc_invalid_op+0x17/0x70
+[ 4497.508662]  ? asm_exc_invalid_op+0x1a/0x20
+[ 4497.508926]  ? free_irq+0x295/0x340
+[ 4497.509146]  mlx5_irq_pool_free_irqs+0x48/0x90
+[ 4497.509421]  mlx5_irq_table_free_irqs+0x38/0x50
+[ 4497.509714]  mlx5_core_eq_free_irqs+0x27/0x40
+[ 4497.509984]  shutdown+0x7b/0x100
+[ 4497.510184]  pci_device_shutdown+0x30/0x60
+[ 4497.510440]  device_shutdown+0x14d/0x240
+[ 4497.510698]  kernel_power_off+0x30/0x70
+[ 4497.510938]  process_one_work+0x1e6/0x3e0
+[ 4497.511183]  worker_thread+0x49/0x3b0
+[ 4497.511407]  ? __pfx_worker_thread+0x10/0x10
+[ 4497.511679]  kthread+0xe0/0x110
+[ 4497.511879]  ? __pfx_kthread+0x10/0x10
+[ 4497.512114]  ret_from_fork+0x29/0x50
+[ 4497.512342]  </TASK>
+
+Fixes: 9c2d08010963 ("net/mlx5: Free irqs only on shutdown callback")
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Mathieu Tortuyaux <mtortuyaux@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c |   26 ++++++++++++++++------
+ 1 file changed, 20 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+@@ -123,18 +123,32 @@ out:
+       return ret;
+ }
+-static void irq_release(struct mlx5_irq *irq)
++/* mlx5_system_free_irq - Free an IRQ
++ * @irq: IRQ to free
++ *
++ * Free the IRQ and other resources such as rmap from the system.
++ * BUT doesn't free or remove reference from mlx5.
++ * This function is very important for the shutdown flow, where we need to
++ * cleanup system resoruces but keep mlx5 objects alive,
++ * see mlx5_irq_table_free_irqs().
++ */
++static void mlx5_system_free_irq(struct mlx5_irq *irq)
+ {
+-      struct mlx5_irq_pool *pool = irq->pool;
+-
+-      xa_erase(&pool->irqs, irq->index);
+       /* free_irq requires that affinity_hint and rmap will be cleared
+        * before calling it. This is why there is asymmetry with set_rmap
+        * which should be called after alloc_irq but before request_irq.
+        */
+       irq_update_affinity_hint(irq->irqn, NULL);
+-      free_cpumask_var(irq->mask);
+       free_irq(irq->irqn, &irq->nh);
++}
++
++static void irq_release(struct mlx5_irq *irq)
++{
++      struct mlx5_irq_pool *pool = irq->pool;
++
++      xa_erase(&pool->irqs, irq->index);
++      mlx5_system_free_irq(irq);
++      free_cpumask_var(irq->mask);
+       kfree(irq);
+ }
+@@ -597,7 +611,7 @@ static void mlx5_irq_pool_free_irqs(stru
+       unsigned long index;
+       xa_for_each(&pool->irqs, index, irq)
+-              free_irq(irq->irqn, &irq->nh);
++              mlx5_system_free_irq(irq);
+ }
+ static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
index 62aca297ab02ba35de0afff78863bd84233d5b0d..95769c11723b56e300381f1da9fbd97bf5fec4cd 100644 (file)
@@ -133,3 +133,5 @@ net-hns3-fix-invalid-mutex-between-tc-qdisc-and-dcb-.patch
 net-hns3-fix-the-port-information-display-when-sfp-i.patch
 net-hns3-remove-gso-partial-feature-bit.patch
 sh-boards-fix-ceu-buffer-size-passed-to-dma_declare_.patch
+multi-gen-lru-avoid-race-in-inc_min_seq.patch
+net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch