From: Greg Kroah-Hartman Date: Sat, 16 Sep 2023 11:41:54 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v5.10.195~51 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4e9d0441378f8d4d9d5a75cfb5650351d58ffb4a;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: multi-gen-lru-avoid-race-in-inc_min_seq.patch net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch --- diff --git a/queue-6.1/multi-gen-lru-avoid-race-in-inc_min_seq.patch b/queue-6.1/multi-gen-lru-avoid-race-in-inc_min_seq.patch new file mode 100644 index 00000000000..ea74ac131d9 --- /dev/null +++ b/queue-6.1/multi-gen-lru-avoid-race-in-inc_min_seq.patch @@ -0,0 +1,89 @@ +From bb5e7f234eacf34b65be67ebb3613e3b8cf11b87 Mon Sep 17 00:00:00 2001 +From: Kalesh Singh +Date: Tue, 1 Aug 2023 19:56:03 -0700 +Subject: Multi-gen LRU: avoid race in inc_min_seq() + +From: Kalesh Singh + +commit bb5e7f234eacf34b65be67ebb3613e3b8cf11b87 upstream. + +inc_max_seq() will try to inc_min_seq() if nr_gens == MAX_NR_GENS. This +is because the generations are reused (the last oldest now empty +generation will become the next youngest generation). + +inc_min_seq() is retried until successful, dropping the lru_lock +and yielding the CPU on each failure, and retaking the lock before +trying again: + + while (!inc_min_seq(lruvec, type, can_swap)) { + spin_unlock_irq(&lruvec->lru_lock); + cond_resched(); + spin_lock_irq(&lruvec->lru_lock); + } + +However, the initial condition that required incrementing the min_seq +(nr_gens == MAX_NR_GENS) is not retested. This can change by another +call to inc_max_seq() from run_aging() with force_scan=true from the +debugfs interface. + +Since the eviction stalls when the nr_gens == MIN_NR_GENS, avoid +unnecessarily incrementing the min_seq by rechecking the number of +generations before each attempt. + +This issue was uncovered in previous discussion on the list by Yu Zhao +and Aneesh Kumar [1]. + +[1] https://lore.kernel.org/linux-mm/CAOUHufbO7CaVm=xjEb1avDhHVvnC8pJmGyKcFf2iY_dpf+zR3w@mail.gmail.com/ + +Link: https://lkml.kernel.org/r/20230802025606.346758-2-kaleshsingh@google.com +Fixes: d6c3af7d8a2b ("mm: multi-gen LRU: debugfs interface") +Signed-off-by: Kalesh Singh +Tested-by: AngeloGioacchino Del Regno [mediatek] +Tested-by: Charan Teja Kalla +Cc: Yu Zhao +Cc: Aneesh Kumar K V +Cc: Barry Song +Cc: Brian Geffon +Cc: Jan Alexander Steffens (heftig) +Cc: Lecopzer Chen +Cc: Matthias Brugger +Cc: Oleksandr Natalenko +Cc: Qi Zheng +Cc: Steven Barrett +Cc: Suleiman Souhlal +Cc: Suren Baghdasaryan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmscan.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4331,6 +4331,7 @@ static void inc_max_seq(struct lruvec *l + int type, zone; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + ++restart: + spin_lock_irq(&lruvec->lru_lock); + + VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); +@@ -4341,11 +4342,12 @@ static void inc_max_seq(struct lruvec *l + + VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap)); + +- while (!inc_min_seq(lruvec, type, can_swap)) { +- spin_unlock_irq(&lruvec->lru_lock); +- cond_resched(); +- spin_lock_irq(&lruvec->lru_lock); +- } ++ if (inc_min_seq(lruvec, type, can_swap)) ++ continue; ++ ++ spin_unlock_irq(&lruvec->lru_lock); ++ cond_resched(); ++ goto restart; + } + + /* diff --git a/queue-6.1/net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch b/queue-6.1/net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch new file mode 100644 index 00000000000..0454b6b6e39 --- /dev/null +++ b/queue-6.1/net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch @@ -0,0 +1,120 @@ +From 314ded538e5f22e7610b1bf621402024a180ec80 Mon Sep 17 00:00:00 2001 +From: Saeed Mahameed +Date: Thu, 8 Jun 2023 12:00:54 -0700 +Subject: net/mlx5: Free IRQ rmap and notifier on kernel shutdown + +From: Saeed Mahameed + +commit 314ded538e5f22e7610b1bf621402024a180ec80 upstream. + +The kernel IRQ system needs the irq affinity notifier to be clear +before attempting to free the irq, see WARN_ON log below. + +On a normal driver unload we don't have this issue since we do the +complete cleanup of the irq resources. + +To fix this, put the important resources cleanup in a helper function +and use it in both normal driver unload and shutdown flows. + +[ 4497.498434] ------------[ cut here ]------------ +[ 4497.498726] WARNING: CPU: 0 PID: 9 at kernel/irq/manage.c:2034 free_irq+0x295/0x340 +[ 4497.499193] Modules linked in: +[ 4497.499386] CPU: 0 PID: 9 Comm: kworker/0:1 Tainted: G W 6.4.0-rc4+ #10 +[ 4497.499876] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 +[ 4497.500518] Workqueue: events do_poweroff +[ 4497.500849] RIP: 0010:free_irq+0x295/0x340 +[ 4497.501132] Code: 85 c0 0f 84 1d ff ff ff 48 89 ef ff d0 0f 1f 00 e9 10 ff ff ff 0f 0b e9 72 ff ff ff 49 8d 7f 28 ff d0 0f 1f 00 e9 df fd ff ff <0f> 0b 48 c7 80 c0 008 +[ 4497.502269] RSP: 0018:ffffc90000053da0 EFLAGS: 00010282 +[ 4497.502589] RAX: ffff888100949600 RBX: ffff88810330b948 RCX: 0000000000000000 +[ 4497.503035] RDX: ffff888100949600 RSI: ffff888100400490 RDI: 0000000000000023 +[ 4497.503472] RBP: ffff88810330c7e0 R08: ffff8881004005d0 R09: ffffffff8273a260 +[ 4497.503923] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8881009ae000 +[ 4497.504359] R13: ffff8881009ae148 R14: 0000000000000000 R15: ffff888100949600 +[ 4497.504804] FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 +[ 4497.505302] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 4497.505671] CR2: 00007fce98806298 CR3: 000000000262e005 CR4: 0000000000370ef0 +[ 4497.506104] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 4497.506540] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 4497.507002] Call Trace: +[ 4497.507158] +[ 4497.507299] ? free_irq+0x295/0x340 +[ 4497.507522] ? __warn+0x7c/0x130 +[ 4497.507740] ? free_irq+0x295/0x340 +[ 4497.507963] ? report_bug+0x171/0x1a0 +[ 4497.508197] ? handle_bug+0x3c/0x70 +[ 4497.508417] ? exc_invalid_op+0x17/0x70 +[ 4497.508662] ? asm_exc_invalid_op+0x1a/0x20 +[ 4497.508926] ? free_irq+0x295/0x340 +[ 4497.509146] mlx5_irq_pool_free_irqs+0x48/0x90 +[ 4497.509421] mlx5_irq_table_free_irqs+0x38/0x50 +[ 4497.509714] mlx5_core_eq_free_irqs+0x27/0x40 +[ 4497.509984] shutdown+0x7b/0x100 +[ 4497.510184] pci_device_shutdown+0x30/0x60 +[ 4497.510440] device_shutdown+0x14d/0x240 +[ 4497.510698] kernel_power_off+0x30/0x70 +[ 4497.510938] process_one_work+0x1e6/0x3e0 +[ 4497.511183] worker_thread+0x49/0x3b0 +[ 4497.511407] ? __pfx_worker_thread+0x10/0x10 +[ 4497.511679] kthread+0xe0/0x110 +[ 4497.511879] ? __pfx_kthread+0x10/0x10 +[ 4497.512114] ret_from_fork+0x29/0x50 +[ 4497.512342] + +Fixes: 9c2d08010963 ("net/mlx5: Free irqs only on shutdown callback") +Signed-off-by: Saeed Mahameed +Reviewed-by: Shay Drory +Signed-off-by: Mathieu Tortuyaux +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 26 ++++++++++++++++------ + 1 file changed, 20 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +@@ -123,18 +123,32 @@ out: + return ret; + } + +-static void irq_release(struct mlx5_irq *irq) ++/* mlx5_system_free_irq - Free an IRQ ++ * @irq: IRQ to free ++ * ++ * Free the IRQ and other resources such as rmap from the system. ++ * BUT doesn't free or remove reference from mlx5. ++ * This function is very important for the shutdown flow, where we need to ++ * cleanup system resoruces but keep mlx5 objects alive, ++ * see mlx5_irq_table_free_irqs(). ++ */ ++static void mlx5_system_free_irq(struct mlx5_irq *irq) + { +- struct mlx5_irq_pool *pool = irq->pool; +- +- xa_erase(&pool->irqs, irq->index); + /* free_irq requires that affinity_hint and rmap will be cleared + * before calling it. This is why there is asymmetry with set_rmap + * which should be called after alloc_irq but before request_irq. + */ + irq_update_affinity_hint(irq->irqn, NULL); +- free_cpumask_var(irq->mask); + free_irq(irq->irqn, &irq->nh); ++} ++ ++static void irq_release(struct mlx5_irq *irq) ++{ ++ struct mlx5_irq_pool *pool = irq->pool; ++ ++ xa_erase(&pool->irqs, irq->index); ++ mlx5_system_free_irq(irq); ++ free_cpumask_var(irq->mask); + kfree(irq); + } + +@@ -597,7 +611,7 @@ static void mlx5_irq_pool_free_irqs(stru + unsigned long index; + + xa_for_each(&pool->irqs, index, irq) +- free_irq(irq->irqn, &irq->nh); ++ mlx5_system_free_irq(irq); + } + + static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table) diff --git a/queue-6.1/series b/queue-6.1/series index 62aca297ab0..95769c11723 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -133,3 +133,5 @@ net-hns3-fix-invalid-mutex-between-tc-qdisc-and-dcb-.patch net-hns3-fix-the-port-information-display-when-sfp-i.patch net-hns3-remove-gso-partial-feature-bit.patch sh-boards-fix-ceu-buffer-size-passed-to-dma_declare_.patch +multi-gen-lru-avoid-race-in-inc_min_seq.patch +net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch