From: Greg Kroah-Hartman Date: Tue, 11 Apr 2023 14:01:45 +0000 (+0200) Subject: 5.15-stable patches X-Git-Tag: v5.15.107~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a7d335185bbc70b3e53f295804a00bd09adea406;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: drm-nouveau-disp-support-more-modes-by-checking-with-lower-bpc.patch drm-panfrost-fix-the-panfrost_mmu_map_fault_addr-error-path.patch mm-swap-fix-swap_info_struct-race-between-swapoff-and-get_swap_pages.patch mm-vmalloc-avoid-warn_alloc-noise-caused-by-fatal-signal.patch ring-buffer-fix-race-while-reader-and-writer-are-on-the-same-page.patch --- diff --git a/queue-5.15/drm-nouveau-disp-support-more-modes-by-checking-with-lower-bpc.patch b/queue-5.15/drm-nouveau-disp-support-more-modes-by-checking-with-lower-bpc.patch new file mode 100644 index 00000000000..0fc73d4313d --- /dev/null +++ b/queue-5.15/drm-nouveau-disp-support-more-modes-by-checking-with-lower-bpc.patch @@ -0,0 +1,96 @@ +From 7f67aa097e875c87fba024e850cf405342300059 Mon Sep 17 00:00:00 2001 +From: Karol Herbst +Date: Fri, 31 Mar 2023 00:39:38 +0200 +Subject: drm/nouveau/disp: Support more modes by checking with lower bpc + +From: Karol Herbst + +commit 7f67aa097e875c87fba024e850cf405342300059 upstream. + +This allows us to advertise more modes especially on HDR displays. + +Fixes using 4K@60 modes on my TV and main display both using a HDMI to DP +adapter. Also fixes similar issues for users running into this. + +Cc: stable@vger.kernel.org # 5.10+ +Signed-off-by: Karol Herbst +Reviewed-by: Lyude Paul +Link: https://patchwork.freedesktop.org/patch/msgid/20230330223938.4025569-1-kherbst@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/nouveau/dispnv50/disp.c | 32 ++++++++++++++++++++++++++++++++ + drivers/gpu/drm/nouveau/nouveau_dp.c | 8 +++++--- + 2 files changed, 37 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c ++++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c +@@ -411,6 +411,35 @@ nv50_outp_atomic_check_view(struct drm_e + return 0; + } + ++static void ++nv50_outp_atomic_fix_depth(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state) ++{ ++ struct nv50_head_atom *asyh = nv50_head_atom(crtc_state); ++ struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); ++ struct drm_display_mode *mode = &asyh->state.adjusted_mode; ++ unsigned int max_rate, mode_rate; ++ ++ switch (nv_encoder->dcb->type) { ++ case DCB_OUTPUT_DP: ++ max_rate = nv_encoder->dp.link_nr * nv_encoder->dp.link_bw; ++ ++ /* we don't support more than 10 anyway */ ++ asyh->or.bpc = min_t(u8, asyh->or.bpc, 10); ++ ++ /* reduce the bpc until it works out */ ++ while (asyh->or.bpc > 6) { ++ mode_rate = DIV_ROUND_UP(mode->clock * asyh->or.bpc * 3, 8); ++ if (mode_rate <= max_rate) ++ break; ++ ++ asyh->or.bpc -= 2; ++ } ++ break; ++ default: ++ break; ++ } ++} ++ + static int + nv50_outp_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, +@@ -429,6 +458,9 @@ nv50_outp_atomic_check(struct drm_encode + if (crtc_state->mode_changed || crtc_state->connectors_changed) + asyh->or.bpc = connector->display_info.bpc; + ++ /* We might have to reduce the bpc */ ++ nv50_outp_atomic_fix_depth(encoder, crtc_state); ++ + return 0; + } + +--- a/drivers/gpu/drm/nouveau/nouveau_dp.c ++++ b/drivers/gpu/drm/nouveau/nouveau_dp.c +@@ -220,8 +220,6 @@ void nouveau_dp_irq(struct nouveau_drm * + } + + /* TODO: +- * - Use the minimum possible BPC here, once we add support for the max bpc +- * property. + * - Validate against the DP caps advertised by the GPU (we don't check these + * yet) + */ +@@ -233,7 +231,11 @@ nv50_dp_mode_valid(struct drm_connector + { + const unsigned int min_clock = 25000; + unsigned int max_rate, mode_rate, ds_max_dotclock, clock = mode->clock; +- const u8 bpp = connector->display_info.bpc * 3; ++ /* Check with the minmum bpc always, so we can advertise better modes. ++ * In particlar not doing this causes modes to be dropped on HDR ++ * displays as we might check with a bpc of 16 even. ++ */ ++ const u8 bpp = 6 * 3; + + if (mode->flags & DRM_MODE_FLAG_INTERLACE && !outp->caps.dp_interlace) + return MODE_NO_INTERLACE; diff --git a/queue-5.15/drm-panfrost-fix-the-panfrost_mmu_map_fault_addr-error-path.patch b/queue-5.15/drm-panfrost-fix-the-panfrost_mmu_map_fault_addr-error-path.patch new file mode 100644 index 00000000000..aec350e3f72 --- /dev/null +++ b/queue-5.15/drm-panfrost-fix-the-panfrost_mmu_map_fault_addr-error-path.patch @@ -0,0 +1,33 @@ +From 764a2ab9eb56e1200083e771aab16186836edf1d Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Fri, 21 May 2021 11:38:11 +0200 +Subject: drm/panfrost: Fix the panfrost_mmu_map_fault_addr() error path + +From: Boris Brezillon + +commit 764a2ab9eb56e1200083e771aab16186836edf1d upstream. + +Make sure all bo->base.pages entries are either NULL or pointing to a +valid page before calling drm_gem_shmem_put_pages(). + +Reported-by: Tomeu Vizoso +Cc: +Fixes: 187d2929206e ("drm/panfrost: Add support for GPU heap allocations") +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Link: https://patchwork.freedesktop.org/patch/msgid/20210521093811.1018992-1-boris.brezillon@collabora.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/panfrost/panfrost_mmu.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c ++++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c +@@ -469,6 +469,7 @@ static int panfrost_mmu_map_fault_addr(s + if (IS_ERR(pages[i])) { + mutex_unlock(&bo->base.pages_lock); + ret = PTR_ERR(pages[i]); ++ pages[i] = NULL; + goto err_pages; + } + } diff --git a/queue-5.15/mm-swap-fix-swap_info_struct-race-between-swapoff-and-get_swap_pages.patch b/queue-5.15/mm-swap-fix-swap_info_struct-race-between-swapoff-and-get_swap_pages.patch new file mode 100644 index 00000000000..c11fbb1761a --- /dev/null +++ b/queue-5.15/mm-swap-fix-swap_info_struct-race-between-swapoff-and-get_swap_pages.patch @@ -0,0 +1,119 @@ +From 6fe7d6b992113719e96744d974212df3fcddc76c Mon Sep 17 00:00:00 2001 +From: Rongwei Wang +Date: Tue, 4 Apr 2023 23:47:16 +0800 +Subject: mm/swap: fix swap_info_struct race between swapoff and get_swap_pages() + +From: Rongwei Wang + +commit 6fe7d6b992113719e96744d974212df3fcddc76c upstream. + +The si->lock must be held when deleting the si from the available list. +Otherwise, another thread can re-add the si to the available list, which +can lead to memory corruption. The only place we have found where this +happens is in the swapoff path. This case can be described as below: + +core 0 core 1 +swapoff + +del_from_avail_list(si) waiting + +try lock si->lock acquire swap_avail_lock + and re-add si into + swap_avail_head + +acquire si->lock but missing si already being added again, and continuing +to clear SWP_WRITEOK, etc. + +It can be easily found that a massive warning messages can be triggered +inside get_swap_pages() by some special cases, for example, we call +madvise(MADV_PAGEOUT) on blocks of touched memory concurrently, meanwhile, +run much swapon-swapoff operations (e.g. stress-ng-swap). + +However, in the worst case, panic can be caused by the above scene. In +swapoff(), the memory used by si could be kept in swap_info[] after +turning off a swap. This means memory corruption will not be caused +immediately until allocated and reset for a new swap in the swapon path. +A panic message caused: (with CONFIG_PLIST_DEBUG enabled) + +------------[ cut here ]------------ +top: 00000000e58a3003, n: 0000000013e75cda, p: 000000008cd4451a +prev: 0000000035b1e58a, n: 000000008cd4451a, p: 000000002150ee8d +next: 000000008cd4451a, n: 000000008cd4451a, p: 000000008cd4451a +WARNING: CPU: 21 PID: 1843 at lib/plist.c:60 plist_check_prev_next_node+0x50/0x70 +Modules linked in: rfkill(E) crct10dif_ce(E)... +CPU: 21 PID: 1843 Comm: stress-ng Kdump: ... 5.10.134+ +Hardware name: Alibaba Cloud ECS, BIOS 0.0.0 02/06/2015 +pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--) +pc : plist_check_prev_next_node+0x50/0x70 +lr : plist_check_prev_next_node+0x50/0x70 +sp : ffff0018009d3c30 +x29: ffff0018009d3c40 x28: ffff800011b32a98 +x27: 0000000000000000 x26: ffff001803908000 +x25: ffff8000128ea088 x24: ffff800011b32a48 +x23: 0000000000000028 x22: ffff001800875c00 +x21: ffff800010f9e520 x20: ffff001800875c00 +x19: ffff001800fdc6e0 x18: 0000000000000030 +x17: 0000000000000000 x16: 0000000000000000 +x15: 0736076307640766 x14: 0730073007380731 +x13: 0736076307640766 x12: 0730073007380731 +x11: 000000000004058d x10: 0000000085a85b76 +x9 : ffff8000101436e4 x8 : ffff800011c8ce08 +x7 : 0000000000000000 x6 : 0000000000000001 +x5 : ffff0017df9ed338 x4 : 0000000000000001 +x3 : ffff8017ce62a000 x2 : ffff0017df9ed340 +x1 : 0000000000000000 x0 : 0000000000000000 +Call trace: + plist_check_prev_next_node+0x50/0x70 + plist_check_head+0x80/0xf0 + plist_add+0x28/0x140 + add_to_avail_list+0x9c/0xf0 + _enable_swap_info+0x78/0xb4 + __do_sys_swapon+0x918/0xa10 + __arm64_sys_swapon+0x20/0x30 + el0_svc_common+0x8c/0x220 + do_el0_svc+0x2c/0x90 + el0_svc+0x1c/0x30 + el0_sync_handler+0xa8/0xb0 + el0_sync+0x148/0x180 +irq event stamp: 2082270 + +Now, si->lock locked before calling 'del_from_avail_list()' to make sure +other thread see the si had been deleted and SWP_WRITEOK cleared together, +will not reinsert again. + +This problem exists in versions after stable 5.10.y. + +Link: https://lkml.kernel.org/r/20230404154716.23058-1-rongwei.wang@linux.alibaba.com +Fixes: a2468cc9bfdff ("swap: choose swap device according to numa node") +Tested-by: Yongchen Yin +Signed-off-by: Rongwei Wang +Cc: Bagas Sanjaya +Cc: Matthew Wilcox (Oracle) +Cc: Aaron Lu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/swapfile.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -673,6 +673,7 @@ static void __del_from_avail_list(struct + { + int nid; + ++ assert_spin_locked(&p->lock); + for_each_node(nid) + plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]); + } +@@ -2565,8 +2566,8 @@ SYSCALL_DEFINE1(swapoff, const char __us + spin_unlock(&swap_lock); + goto out_dput; + } +- del_from_avail_list(p); + spin_lock(&p->lock); ++ del_from_avail_list(p); + if (p->prio < 0) { + struct swap_info_struct *si = p; + int nid; diff --git a/queue-5.15/mm-vmalloc-avoid-warn_alloc-noise-caused-by-fatal-signal.patch b/queue-5.15/mm-vmalloc-avoid-warn_alloc-noise-caused-by-fatal-signal.patch new file mode 100644 index 00000000000..a87f9b9ed8f --- /dev/null +++ b/queue-5.15/mm-vmalloc-avoid-warn_alloc-noise-caused-by-fatal-signal.patch @@ -0,0 +1,122 @@ +From f349b15e183d6956f1b63d6ff57849ff10c7edd5 Mon Sep 17 00:00:00 2001 +From: Yafang Shao +Date: Thu, 30 Mar 2023 16:26:25 +0000 +Subject: mm: vmalloc: avoid warn_alloc noise caused by fatal signal + +From: Yafang Shao + +commit f349b15e183d6956f1b63d6ff57849ff10c7edd5 upstream. + +There're some suspicious warn_alloc on my test serer, for example, + +[13366.518837] warn_alloc: 81 callbacks suppressed +[13366.518841] test_verifier: vmalloc error: size 4096, page order 0, failed to allocate pages, mode:0x500dc2(GFP_HIGHUSER|__GFP_ZERO|__GFP_ACCOUNT), nodemask=(null),cpuset=/,mems_allowed=0-1 +[13366.522240] CPU: 30 PID: 722463 Comm: test_verifier Kdump: loaded Tainted: G W O 6.2.0+ #638 +[13366.524216] Call Trace: +[13366.524702] +[13366.525148] dump_stack_lvl+0x6c/0x80 +[13366.525712] dump_stack+0x10/0x20 +[13366.526239] warn_alloc+0x119/0x190 +[13366.526783] ? alloc_pages_bulk_array_mempolicy+0x9e/0x2a0 +[13366.527470] __vmalloc_area_node+0x546/0x5b0 +[13366.528066] __vmalloc_node_range+0xc2/0x210 +[13366.528660] __vmalloc_node+0x42/0x50 +[13366.529186] ? bpf_prog_realloc+0x53/0xc0 +[13366.529743] __vmalloc+0x1e/0x30 +[13366.530235] bpf_prog_realloc+0x53/0xc0 +[13366.530771] bpf_patch_insn_single+0x80/0x1b0 +[13366.531351] bpf_jit_blind_constants+0xe9/0x1c0 +[13366.531932] ? __free_pages+0xee/0x100 +[13366.532457] ? free_large_kmalloc+0x58/0xb0 +[13366.533002] bpf_int_jit_compile+0x8c/0x5e0 +[13366.533546] bpf_prog_select_runtime+0xb4/0x100 +[13366.534108] bpf_prog_load+0x6b1/0xa50 +[13366.534610] ? perf_event_task_tick+0x96/0xb0 +[13366.535151] ? security_capable+0x3a/0x60 +[13366.535663] __sys_bpf+0xb38/0x2190 +[13366.536120] ? kvm_clock_get_cycles+0x9/0x10 +[13366.536643] __x64_sys_bpf+0x1c/0x30 +[13366.537094] do_syscall_64+0x38/0x90 +[13366.537554] entry_SYSCALL_64_after_hwframe+0x72/0xdc +[13366.538107] RIP: 0033:0x7f78310f8e29 +[13366.538561] Code: 01 00 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 17 e0 2c 00 f7 d8 64 89 01 48 +[13366.540286] RSP: 002b:00007ffe2a61fff8 EFLAGS: 00000206 ORIG_RAX: 0000000000000141 +[13366.541031] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f78310f8e29 +[13366.541749] RDX: 0000000000000080 RSI: 00007ffe2a6200b0 RDI: 0000000000000005 +[13366.542470] RBP: 00007ffe2a620010 R08: 00007ffe2a6202a0 R09: 00007ffe2a6200b0 +[13366.543183] R10: 00000000000f423e R11: 0000000000000206 R12: 0000000000407800 +[13366.543900] R13: 00007ffe2a620540 R14: 0000000000000000 R15: 0000000000000000 +[13366.544623] +[13366.545260] Mem-Info: +[13366.546121] active_anon:81319 inactive_anon:20733 isolated_anon:0 + active_file:69450 inactive_file:5624 isolated_file:0 + unevictable:0 dirty:10 writeback:0 + slab_reclaimable:69649 slab_unreclaimable:48930 + mapped:27400 shmem:12868 pagetables:4929 + sec_pagetables:0 bounce:0 + kernel_misc_reclaimable:0 + free:15870308 free_pcp:142935 free_cma:0 +[13366.551886] Node 0 active_anon:224836kB inactive_anon:33528kB active_file:175692kB inactive_file:13752kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:59248kB dirty:32kB writeback:0kB shmem:18252kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 0kB writeback_tmp:0kB kernel_stack:4616kB pagetables:10664kB sec_pagetables:0kB all_unreclaimable? no +[13366.555184] Node 1 active_anon:100440kB inactive_anon:49404kB active_file:102108kB inactive_file:8744kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:50352kB dirty:8kB writeback:0kB shmem:33220kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 0kB writeback_tmp:0kB kernel_stack:3896kB pagetables:9052kB sec_pagetables:0kB all_unreclaimable? no +[13366.558262] Node 0 DMA free:15360kB boost:0kB min:304kB low:380kB high:456kB reserved_highatomic:0KB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15992kB managed:15360kB mlocked:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB +[13366.560821] lowmem_reserve[]: 0 2735 31873 31873 31873 +[13366.561981] Node 0 DMA32 free:2790904kB boost:0kB min:56028kB low:70032kB high:84036kB reserved_highatomic:0KB active_anon:1936kB inactive_anon:20kB active_file:396kB inactive_file:344kB unevictable:0kB writepending:0kB present:3129200kB managed:2801520kB mlocked:0kB bounce:0kB free_pcp:5188kB local_pcp:0kB free_cma:0kB +[13366.565148] lowmem_reserve[]: 0 0 29137 29137 29137 +[13366.566168] Node 0 Normal free:28533824kB boost:0kB min:596740kB low:745924kB high:895108kB reserved_highatomic:28672KB active_anon:222900kB inactive_anon:33508kB active_file:175296kB inactive_file:13408kB unevictable:0kB writepending:32kB present:30408704kB managed:29837172kB mlocked:0kB bounce:0kB free_pcp:295724kB local_pcp:0kB free_cma:0kB +[13366.569485] lowmem_reserve[]: 0 0 0 0 0 +[13366.570416] Node 1 Normal free:32141144kB boost:0kB min:660504kB low:825628kB high:990752kB reserved_highatomic:69632KB active_anon:100440kB inactive_anon:49404kB active_file:102108kB inactive_file:8744kB unevictable:0kB writepending:8kB present:33554432kB managed:33025372kB mlocked:0kB bounce:0kB free_pcp:270880kB local_pcp:46860kB free_cma:0kB +[13366.573403] lowmem_reserve[]: 0 0 0 0 0 +[13366.574015] Node 0 DMA: 0*4kB 0*8kB 0*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB 1*1024kB (U) 1*2048kB (M) 3*4096kB (M) = 15360kB +[13366.575474] Node 0 DMA32: 782*4kB (UME) 756*8kB (UME) 736*16kB (UME) 745*32kB (UME) 694*64kB (UME) 653*128kB (UME) 595*256kB (UME) 552*512kB (UME) 454*1024kB (UME) 347*2048kB (UME) 246*4096kB (UME) = 2790904kB +[13366.577442] Node 0 Normal: 33856*4kB (UMEH) 51815*8kB (UMEH) 42418*16kB (UMEH) 36272*32kB (UMEH) 22195*64kB (UMEH) 10296*128kB (UMEH) 7238*256kB (UMEH) 5638*512kB (UEH) 5337*1024kB (UMEH) 3506*2048kB (UMEH) 1470*4096kB (UME) = 28533784kB +[13366.580460] Node 1 Normal: 15776*4kB (UMEH) 37485*8kB (UMEH) 29509*16kB (UMEH) 21420*32kB (UMEH) 14818*64kB (UMEH) 13051*128kB (UMEH) 9918*256kB (UMEH) 7374*512kB (UMEH) 5397*1024kB (UMEH) 3887*2048kB (UMEH) 2002*4096kB (UME) = 32141240kB +[13366.583027] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=1048576kB +[13366.584380] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB +[13366.585702] Node 1 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=1048576kB +[13366.587042] Node 1 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB +[13366.588372] 87386 total pagecache pages +[13366.589266] 0 pages in swap cache +[13366.590327] Free swap = 0kB +[13366.591227] Total swap = 0kB +[13366.592142] 16777082 pages RAM +[13366.593057] 0 pages HighMem/MovableOnly +[13366.594037] 357226 pages reserved +[13366.594979] 0 pages hwpoisoned + +This failure really confuse me as there're still lots of available pages. +Finally I figured out it was caused by a fatal signal. When a process is +allocating memory via vm_area_alloc_pages(), it will break directly even +if it hasn't allocated the requested pages when it receives a fatal +signal. In that case, we shouldn't show this warn_alloc, as it is +useless. We only need to show this warning when there're really no enough +pages. + +Link: https://lkml.kernel.org/r/20230330162625.13604-1-laoar.shao@gmail.com +Signed-off-by: Yafang Shao +Reviewed-by: Lorenzo Stoakes +Cc: Christoph Hellwig +Cc: Uladzislau Rezki (Sony) +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -2927,9 +2927,11 @@ static void *__vmalloc_area_node(struct + * allocation request, free them via __vfree() if any. + */ + if (area->nr_pages != nr_small_pages) { +- warn_alloc(gfp_mask, NULL, +- "vmalloc error: size %lu, page order %u, failed to allocate pages", +- area->nr_pages * PAGE_SIZE, page_order); ++ /* vm_area_alloc_pages() can also fail due to a fatal signal */ ++ if (!fatal_signal_pending(current)) ++ warn_alloc(gfp_mask, NULL, ++ "vmalloc error: size %lu, page order %u, failed to allocate pages", ++ area->nr_pages * PAGE_SIZE, page_order); + goto fail; + } + diff --git a/queue-5.15/ring-buffer-fix-race-while-reader-and-writer-are-on-the-same-page.patch b/queue-5.15/ring-buffer-fix-race-while-reader-and-writer-are-on-the-same-page.patch new file mode 100644 index 00000000000..7eef08c8526 --- /dev/null +++ b/queue-5.15/ring-buffer-fix-race-while-reader-and-writer-are-on-the-same-page.patch @@ -0,0 +1,103 @@ +From 6455b6163d8c680366663cdb8c679514d55fc30c Mon Sep 17 00:00:00 2001 +From: Zheng Yejian +Date: Sat, 25 Mar 2023 10:12:47 +0800 +Subject: ring-buffer: Fix race while reader and writer are on the same page + +From: Zheng Yejian + +commit 6455b6163d8c680366663cdb8c679514d55fc30c upstream. + +When user reads file 'trace_pipe', kernel keeps printing following logs +that warn at "cpu_buffer->reader_page->read > rb_page_size(reader)" in +rb_get_reader_page(). It just looks like there's an infinite loop in +tracing_read_pipe(). This problem occurs several times on arm64 platform +when testing v5.10 and below. + + Call trace: + rb_get_reader_page+0x248/0x1300 + rb_buffer_peek+0x34/0x160 + ring_buffer_peek+0xbc/0x224 + peek_next_entry+0x98/0xbc + __find_next_entry+0xc4/0x1c0 + trace_find_next_entry_inc+0x30/0x94 + tracing_read_pipe+0x198/0x304 + vfs_read+0xb4/0x1e0 + ksys_read+0x74/0x100 + __arm64_sys_read+0x24/0x30 + el0_svc_common.constprop.0+0x7c/0x1bc + do_el0_svc+0x2c/0x94 + el0_svc+0x20/0x30 + el0_sync_handler+0xb0/0xb4 + el0_sync+0x160/0x180 + +Then I dump the vmcore and look into the problematic per_cpu ring_buffer, +I found that tail_page/commit_page/reader_page are on the same page while +reader_page->read is obviously abnormal: + tail_page == commit_page == reader_page == { + .write = 0x100d20, + .read = 0x8f9f4805, // Far greater than 0xd20, obviously abnormal!!! + .entries = 0x10004c, + .real_end = 0x0, + .page = { + .time_stamp = 0x857257416af0, + .commit = 0xd20, // This page hasn't been full filled. + // .data[0...0xd20] seems normal. + } + } + +The root cause is most likely the race that reader and writer are on the +same page while reader saw an event that not fully committed by writer. + +To fix this, add memory barriers to make sure the reader can see the +content of what is committed. Since commit a0fcaaed0c46 ("ring-buffer: Fix +race between reset page and reading page") has added the read barrier in +rb_get_reader_page(), here we just need to add the write barrier. + +Link: https://lore.kernel.org/linux-trace-kernel/20230325021247.2923907-1-zhengyejian1@huawei.com + +Cc: stable@vger.kernel.org +Fixes: 77ae365eca89 ("ring-buffer: make lockless") +Suggested-by: Steven Rostedt (Google) +Signed-off-by: Zheng Yejian +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ring_buffer.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -3041,6 +3041,10 @@ rb_set_commit_to_write(struct ring_buffe + if (RB_WARN_ON(cpu_buffer, + rb_is_reader_page(cpu_buffer->tail_page))) + return; ++ /* ++ * No need for a memory barrier here, as the update ++ * of the tail_page did it for this page. ++ */ + local_set(&cpu_buffer->commit_page->page->commit, + rb_page_write(cpu_buffer->commit_page)); + rb_inc_page(&cpu_buffer->commit_page); +@@ -3050,6 +3054,8 @@ rb_set_commit_to_write(struct ring_buffe + while (rb_commit_index(cpu_buffer) != + rb_page_write(cpu_buffer->commit_page)) { + ++ /* Make sure the readers see the content of what is committed. */ ++ smp_wmb(); + local_set(&cpu_buffer->commit_page->page->commit, + rb_page_write(cpu_buffer->commit_page)); + RB_WARN_ON(cpu_buffer, +@@ -4632,7 +4638,12 @@ rb_get_reader_page(struct ring_buffer_pe + + /* + * Make sure we see any padding after the write update +- * (see rb_reset_tail()) ++ * (see rb_reset_tail()). ++ * ++ * In addition, a writer may be writing on the reader page ++ * if the page has not been fully filled, so the read barrier ++ * is also needed to make sure we see the content of what is ++ * committed by the writer (see rb_set_commit_to_write()). + */ + smp_rmb(); + diff --git a/queue-5.15/series b/queue-5.15/series index ffab84e7499..bae8f9202bb 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -85,3 +85,8 @@ can-j1939-j1939_tp_tx_dat_new-fix-out-of-bounds-memory-access.patch can-isotp-isotp_ops-fix-poll-to-not-report-false-epollout-events.patch tracing-free-error-logs-of-tracing-instances.patch asoc-hdac_hdmi-use-set_stream-instead-of-set_tdm_slots.patch +mm-vmalloc-avoid-warn_alloc-noise-caused-by-fatal-signal.patch +drm-panfrost-fix-the-panfrost_mmu_map_fault_addr-error-path.patch +drm-nouveau-disp-support-more-modes-by-checking-with-lower-bpc.patch +ring-buffer-fix-race-while-reader-and-writer-are-on-the-same-page.patch +mm-swap-fix-swap_info_struct-race-between-swapoff-and-get_swap_pages.patch