From 2eee3af03df39a4f1903d2f83c5b3e9b2c1f4ba0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 11 Sep 2023 08:46:49 +0200 Subject: [PATCH] 6.1-stable patches added patches: md-free-resources-in-__md_stop.patch nfsv4.2-fix-a-potential-double-free-with-read_plus.patch nfsv4.2-rework-scratch-handling-for-read_plus-again.patch revert-drm-amd-display-do-not-set-drr-on-pipe-commit.patch tracing-zero-the-pipe-cpumask-on-alloc-to-avoid-spurious-ebusy.patch --- .../md-free-resources-in-__md_stop.patch | 84 ++++++++++++ ...potential-double-free-with-read_plus.patch | 42 ++++++ ...scratch-handling-for-read_plus-again.patch | 120 ++++++++++++++++++ ...isplay-do-not-set-drr-on-pipe-commit.patch | 105 +++++++++++++++ queue-6.1/series | 5 + ...ask-on-alloc-to-avoid-spurious-ebusy.patch | 53 ++++++++ 6 files changed, 409 insertions(+) create mode 100644 queue-6.1/md-free-resources-in-__md_stop.patch create mode 100644 queue-6.1/nfsv4.2-fix-a-potential-double-free-with-read_plus.patch create mode 100644 queue-6.1/nfsv4.2-rework-scratch-handling-for-read_plus-again.patch create mode 100644 queue-6.1/revert-drm-amd-display-do-not-set-drr-on-pipe-commit.patch create mode 100644 queue-6.1/tracing-zero-the-pipe-cpumask-on-alloc-to-avoid-spurious-ebusy.patch diff --git a/queue-6.1/md-free-resources-in-__md_stop.patch b/queue-6.1/md-free-resources-in-__md_stop.patch new file mode 100644 index 00000000000..be12a38e2e1 --- /dev/null +++ b/queue-6.1/md-free-resources-in-__md_stop.patch @@ -0,0 +1,84 @@ +From 3e453522593d74a87cf68a38e14aa36ebca1dbcd Mon Sep 17 00:00:00 2001 +From: Xiao Ni +Date: Wed, 22 Feb 2023 11:59:16 +0800 +Subject: md: Free resources in __md_stop + +From: Xiao Ni + +commit 3e453522593d74a87cf68a38e14aa36ebca1dbcd upstream. + +If md_run() fails after ->active_io is initialized, then percpu_ref_exit +is called in error path. However, later md_free_disk will call +percpu_ref_exit again which leads to a panic because of null pointer +dereference. It can also trigger this bug when resources are initialized +but are freed in error path, then will be freed again in md_free_disk. + +BUG: kernel NULL pointer dereference, address: 0000000000000038 +Oops: 0000 [#1] PREEMPT SMP +Workqueue: md_misc mddev_delayed_delete +RIP: 0010:free_percpu+0x110/0x630 +Call Trace: + + __percpu_ref_exit+0x44/0x70 + percpu_ref_exit+0x16/0x90 + md_free_disk+0x2f/0x80 + disk_release+0x101/0x180 + device_release+0x84/0x110 + kobject_put+0x12a/0x380 + kobject_put+0x160/0x380 + mddev_delayed_delete+0x19/0x30 + process_one_work+0x269/0x680 + worker_thread+0x266/0x640 + kthread+0x151/0x1b0 + ret_from_fork+0x1f/0x30 + +For creating raid device, md raid calls do_md_run->md_run, dm raid calls +md_run. We alloc those memory in md_run. For stopping raid device, md raid +calls do_md_stop->__md_stop, dm raid calls md_stop->__md_stop. So we can +free those memory resources in __md_stop. + +Fixes: 72adae23a72c ("md: Change active_io to percpu") +Reported-and-tested-by: Yu Kuai +Signed-off-by: Xiao Ni +Signed-off-by: Song Liu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/md.c | 13 +++++-------- + 1 file changed, 5 insertions(+), 8 deletions(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -6277,6 +6277,11 @@ static void __md_stop(struct mddev *mdde + mddev->to_remove = &md_redundancy_group; + module_put(pers->owner); + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); ++ ++ percpu_ref_exit(&mddev->writes_pending); ++ percpu_ref_exit(&mddev->active_io); ++ bioset_exit(&mddev->bio_set); ++ bioset_exit(&mddev->sync_set); + } + + void md_stop(struct mddev *mddev) +@@ -6288,9 +6293,6 @@ void md_stop(struct mddev *mddev) + */ + __md_stop_writes(mddev); + __md_stop(mddev); +- percpu_ref_exit(&mddev->active_io); +- bioset_exit(&mddev->bio_set); +- bioset_exit(&mddev->sync_set); + } + + EXPORT_SYMBOL_GPL(md_stop); +@@ -7857,11 +7859,6 @@ static void md_free_disk(struct gendisk + { + struct mddev *mddev = disk->private_data; + +- percpu_ref_exit(&mddev->writes_pending); +- percpu_ref_exit(&mddev->active_io); +- bioset_exit(&mddev->bio_set); +- bioset_exit(&mddev->sync_set); +- + mddev_free(mddev); + } + diff --git a/queue-6.1/nfsv4.2-fix-a-potential-double-free-with-read_plus.patch b/queue-6.1/nfsv4.2-fix-a-potential-double-free-with-read_plus.patch new file mode 100644 index 00000000000..e1c762ace9f --- /dev/null +++ b/queue-6.1/nfsv4.2-fix-a-potential-double-free-with-read_plus.patch @@ -0,0 +1,42 @@ +From 43439d858bbae244a510de47f9a55f667ca4ed52 Mon Sep 17 00:00:00 2001 +From: Anna Schumaker +Date: Tue, 16 May 2023 11:19:25 -0400 +Subject: NFSv4.2: Fix a potential double free with READ_PLUS + +From: Anna Schumaker + +commit 43439d858bbae244a510de47f9a55f667ca4ed52 upstream. + +kfree()-ing the scratch page isn't enough, we also need to set the pointer +back to NULL to avoid a double-free in the case of a resend. + +Fixes: fbd2a05f29a9 (NFSv4.2: Rework scratch handling for READ_PLUS) +Signed-off-by: Anna Schumaker +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfs/nfs4proc.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -5444,10 +5444,18 @@ static bool nfs4_read_plus_not_supported + return false; + } + +-static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) ++static inline void nfs4_read_plus_scratch_free(struct nfs_pgio_header *hdr) + { +- if (hdr->res.scratch) ++ if (hdr->res.scratch) { + kfree(hdr->res.scratch); ++ hdr->res.scratch = NULL; ++ } ++} ++ ++static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) ++{ ++ nfs4_read_plus_scratch_free(hdr); ++ + if (!nfs4_sequence_done(task, &hdr->res.seq_res)) + return -EAGAIN; + if (nfs4_read_stateid_changed(task, &hdr->args)) diff --git a/queue-6.1/nfsv4.2-rework-scratch-handling-for-read_plus-again.patch b/queue-6.1/nfsv4.2-rework-scratch-handling-for-read_plus-again.patch new file mode 100644 index 00000000000..8f32ab26a69 --- /dev/null +++ b/queue-6.1/nfsv4.2-rework-scratch-handling-for-read_plus-again.patch @@ -0,0 +1,120 @@ +From 303a78052091c81e9003915c521fdca1c7e117af Mon Sep 17 00:00:00 2001 +From: Anna Schumaker +Date: Fri, 9 Jun 2023 15:26:25 -0400 +Subject: NFSv4.2: Rework scratch handling for READ_PLUS (again) + +From: Anna Schumaker + +commit 303a78052091c81e9003915c521fdca1c7e117af upstream. + +I found that the read code might send multiple requests using the same +nfs_pgio_header, but nfs4_proc_read_setup() is only called once. This is +how we ended up occasionally double-freeing the scratch buffer, but also +means we set a NULL pointer but non-zero length to the xdr scratch +buffer. This results in an oops the first time decoding needs to copy +something to scratch, which frequently happens when decoding READ_PLUS +hole segments. + +I fix this by moving scratch handling into the pageio read code. I +provide a function to allocate scratch space for decoding read replies, +and free the scratch buffer when the nfs_pgio_header is freed. + +Fixes: fbd2a05f29a9 (NFSv4.2: Rework scratch handling for READ_PLUS) +Signed-off-by: Anna Schumaker +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfs/internal.h | 1 + + fs/nfs/nfs42.h | 1 + + fs/nfs/nfs42xdr.c | 2 +- + fs/nfs/nfs4proc.c | 13 +------------ + fs/nfs/read.c | 10 ++++++++++ + 5 files changed, 14 insertions(+), 13 deletions(-) + +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -484,6 +484,7 @@ struct nfs_pgio_completion_ops; + extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode, bool force_mds, + const struct nfs_pgio_completion_ops *compl_ops); ++extern bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size); + extern void nfs_read_prepare(struct rpc_task *task, void *calldata); + extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); + +--- a/fs/nfs/nfs42.h ++++ b/fs/nfs/nfs42.h +@@ -13,6 +13,7 @@ + * more? Need to consider not to pre-alloc too much for a compound. + */ + #define PNFS_LAYOUTSTATS_MAXDEV (4) ++#define READ_PLUS_SCRATCH_SIZE (16) + + /* nfs4.2proc.c */ + #ifdef CONFIG_NFS_V4_2 +--- a/fs/nfs/nfs42xdr.c ++++ b/fs/nfs/nfs42xdr.c +@@ -1351,7 +1351,7 @@ static int nfs4_xdr_dec_read_plus(struct + struct compound_hdr hdr; + int status; + +- xdr_set_scratch_buffer(xdr, res->scratch, sizeof(res->scratch)); ++ xdr_set_scratch_buffer(xdr, res->scratch, READ_PLUS_SCRATCH_SIZE); + + status = decode_compound_hdr(xdr, &hdr); + if (status) +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -5444,18 +5444,8 @@ static bool nfs4_read_plus_not_supported + return false; + } + +-static inline void nfs4_read_plus_scratch_free(struct nfs_pgio_header *hdr) +-{ +- if (hdr->res.scratch) { +- kfree(hdr->res.scratch); +- hdr->res.scratch = NULL; +- } +-} +- + static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) + { +- nfs4_read_plus_scratch_free(hdr); +- + if (!nfs4_sequence_done(task, &hdr->res.seq_res)) + return -EAGAIN; + if (nfs4_read_stateid_changed(task, &hdr->args)) +@@ -5475,8 +5465,7 @@ static bool nfs42_read_plus_support(stru + /* Note: We don't use READ_PLUS with pNFS yet */ + if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) { + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS]; +- hdr->res.scratch = kmalloc(32, GFP_KERNEL); +- return hdr->res.scratch != NULL; ++ return nfs_read_alloc_scratch(hdr, READ_PLUS_SCRATCH_SIZE); + } + return false; + } +--- a/fs/nfs/read.c ++++ b/fs/nfs/read.c +@@ -47,6 +47,8 @@ static struct nfs_pgio_header *nfs_readh + + static void nfs_readhdr_free(struct nfs_pgio_header *rhdr) + { ++ if (rhdr->res.scratch != NULL) ++ kfree(rhdr->res.scratch); + kmem_cache_free(nfs_rdata_cachep, rhdr); + } + +@@ -109,6 +111,14 @@ void nfs_pageio_reset_read_mds(struct nf + } + EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); + ++bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size) ++{ ++ WARN_ON(hdr->res.scratch != NULL); ++ hdr->res.scratch = kmalloc(size, GFP_KERNEL); ++ return hdr->res.scratch != NULL; ++} ++EXPORT_SYMBOL_GPL(nfs_read_alloc_scratch); ++ + static void nfs_readpage_release(struct nfs_page *req, int error) + { + struct inode *inode = d_inode(nfs_req_openctx(req)->dentry); diff --git a/queue-6.1/revert-drm-amd-display-do-not-set-drr-on-pipe-commit.patch b/queue-6.1/revert-drm-amd-display-do-not-set-drr-on-pipe-commit.patch new file mode 100644 index 00000000000..2035df45283 --- /dev/null +++ b/queue-6.1/revert-drm-amd-display-do-not-set-drr-on-pipe-commit.patch @@ -0,0 +1,105 @@ +From 360930985ec9f394c82ba0b235403b4a366d1560 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Michel=20D=C3=A4nzer?= +Date: Mon, 22 May 2023 15:08:23 +0200 +Subject: Revert "drm/amd/display: Do not set drr on pipe commit" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Michel Dänzer + +commit 360930985ec9f394c82ba0b235403b4a366d1560 upstream. + +This reverts commit e101bf95ea87ccc03ac2f48dfc0757c6364ff3c7. + +Caused a regression: + +Samsung Odyssey Neo G9, running at 5120x1440@240/VRR, connected to Navi +21 via DisplayPort, blanks and the GPU hangs while starting the Steam +game Assetto Corsa Competizione (via Proton 7.0). + +Example dmesg excerpt: + + amdgpu 0000:0c:00.0: [drm] ERROR [CRTC:82:crtc-0] flip_done timed out + NMI watchdog: Watchdog detected hard LOCKUP on cpu 6 + [...] + RIP: 0010:amdgpu_device_rreg.part.0+0x2f/0xf0 [amdgpu] + Code: 41 54 44 8d 24 b5 00 00 00 00 55 89 f5 53 48 89 fb 4c 3b a7 60 0b 00 00 73 6a 83 e2 02 74 29 4c 03 a3 68 0b 00 00 45 8b 24 24 <48> 8b 43 08 0f b7 70 3e 66 90 44 89 e0 5b 5d 41 5c 31 d2 31 c9 31 + RSP: 0000:ffffb39a119dfb88 EFLAGS: 00000086 + RAX: ffffffffc0eb96a0 RBX: ffff9e7963dc0000 RCX: 0000000000007fff + RDX: 0000000000000000 RSI: 0000000000004ff6 RDI: ffff9e7963dc0000 + RBP: 0000000000004ff6 R08: ffffb39a119dfc40 R09: 0000000000000010 + R10: ffffb39a119dfc40 R11: ffffb39a119dfc44 R12: 00000000000e05ae + R13: 0000000000000000 R14: ffff9e7963dc0010 R15: 0000000000000000 + FS: 000000001012f6c0(0000) GS:ffff9e805eb80000(0000) knlGS:000000007fd40000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00000000461ca000 CR3: 00000002a8a20000 CR4: 0000000000350ee0 + Call Trace: + + dm_read_reg_func+0x37/0xc0 [amdgpu] + generic_reg_get2+0x22/0x60 [amdgpu] + optc1_get_crtc_scanoutpos+0x6a/0xc0 [amdgpu] + dc_stream_get_scanoutpos+0x74/0x90 [amdgpu] + dm_crtc_get_scanoutpos+0x82/0xf0 [amdgpu] + amdgpu_display_get_crtc_scanoutpos+0x91/0x190 [amdgpu] + ? dm_read_reg_func+0x37/0xc0 [amdgpu] + amdgpu_get_vblank_counter_kms+0xb4/0x1a0 [amdgpu] + dm_pflip_high_irq+0x213/0x2f0 [amdgpu] + amdgpu_dm_irq_handler+0x8a/0x200 [amdgpu] + amdgpu_irq_dispatch+0xd4/0x220 [amdgpu] + amdgpu_ih_process+0x7f/0x110 [amdgpu] + amdgpu_irq_handler+0x1f/0x70 [amdgpu] + __handle_irq_event_percpu+0x46/0x1b0 + handle_irq_event+0x34/0x80 + handle_edge_irq+0x9f/0x240 + __common_interrupt+0x66/0x110 + common_interrupt+0x5c/0xd0 + asm_common_interrupt+0x22/0x40 + +Reviewed-by: Aurabindo Pillai +Acked-by: Alex Deucher +Signed-off-by: Michel Dänzer +Signed-off-by: Hamza Mahfooz +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 6 ------ + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 7 ------- + 2 files changed, 13 deletions(-) + +--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +@@ -2032,12 +2032,6 @@ void dcn20_optimize_bandwidth( + if (hubbub->funcs->program_compbuf_size) + hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true); + +- if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { +- dc_dmub_srv_p_state_delegate(dc, +- true, context); +- context->bw_ctx.bw.dcn.clk.p_state_change_support = true; +- } +- + dc->clk_mgr->funcs->update_clocks( + dc->clk_mgr, + context, +--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +@@ -987,18 +987,11 @@ void dcn30_set_disp_pattern_generator(co + void dcn30_prepare_bandwidth(struct dc *dc, + struct dc_state *context) + { +- if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { +- dc->optimized_required = true; +- context->bw_ctx.bw.dcn.clk.p_state_change_support = false; +- } +- + if (dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && + context->bw_ctx.bw.dcn.clk.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz); + + dcn20_prepare_bandwidth(dc, context); +- +- dc_dmub_srv_p_state_delegate(dc, false, context); + } + diff --git a/queue-6.1/series b/queue-6.1/series index 5ba0260d69d..84185385146 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -589,3 +589,8 @@ perf-x86-uncore-correct-the-number-of-chas-on-emr.patch serial-sc16is7xx-remove-obsolete-out_thread-label.patch serial-sc16is7xx-fix-regression-with-gpio-configurat.patch net-remove-duplicate-indirect_callable_declare-of-udp_ehashfn.patch +tracing-zero-the-pipe-cpumask-on-alloc-to-avoid-spurious-ebusy.patch +revert-drm-amd-display-do-not-set-drr-on-pipe-commit.patch +md-free-resources-in-__md_stop.patch +nfsv4.2-fix-a-potential-double-free-with-read_plus.patch +nfsv4.2-rework-scratch-handling-for-read_plus-again.patch diff --git a/queue-6.1/tracing-zero-the-pipe-cpumask-on-alloc-to-avoid-spurious-ebusy.patch b/queue-6.1/tracing-zero-the-pipe-cpumask-on-alloc-to-avoid-spurious-ebusy.patch new file mode 100644 index 00000000000..78e04392667 --- /dev/null +++ b/queue-6.1/tracing-zero-the-pipe-cpumask-on-alloc-to-avoid-spurious-ebusy.patch @@ -0,0 +1,53 @@ +From 3d07fa1dd19035eb0b13ae6697efd5caa9033e74 Mon Sep 17 00:00:00 2001 +From: Brian Foster +Date: Thu, 31 Aug 2023 08:55:00 -0400 +Subject: tracing: Zero the pipe cpumask on alloc to avoid spurious -EBUSY + +From: Brian Foster + +commit 3d07fa1dd19035eb0b13ae6697efd5caa9033e74 upstream. + +The pipe cpumask used to serialize opens between the main and percpu +trace pipes is not zeroed or initialized. This can result in +spurious -EBUSY returns if underlying memory is not fully zeroed. +This has been observed by immediate failure to read the main +trace_pipe file on an otherwise newly booted and idle system: + + # cat /sys/kernel/debug/tracing/trace_pipe + cat: /sys/kernel/debug/tracing/trace_pipe: Device or resource busy + +Zero the allocation of pipe_cpumask to avoid the problem. + +Link: https://lore.kernel.org/linux-trace-kernel/20230831125500.986862-1-bfoster@redhat.com + +Cc: stable@vger.kernel.org +Fixes: c2489bb7e6be ("tracing: Introduce pipe_cpumask to avoid race on trace_pipes") +Reviewed-by: Zheng Yejian +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Brian Foster +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -9395,7 +9395,7 @@ static struct trace_array *trace_array_c + if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) + goto out_free_tr; + +- if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) ++ if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) + goto out_free_tr; + + tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; +@@ -10267,7 +10267,7 @@ __init static int tracer_alloc_buffers(v + if (trace_create_savedcmd() < 0) + goto out_free_temp_buffer; + +- if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) ++ if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) + goto out_free_savedcmd; + + /* TODO: make the number of buffers hot pluggable with CPUS */ -- 2.47.3