From: Greg Kroah-Hartman Date: Sun, 8 Sep 2024 12:23:59 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v4.19.322~81 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=59a765586741729d331437be7b76b257588db194;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: eventfs-use-list_del_rcu-for-srcu-protected-list-variable.patch ila-call-nf_unregister_net_hooks-sooner.patch net-mana-fix-error-handling-in-mana_create_txq-rxq-s-napi-cleanup.patch net-mctp-serial-fix-missing-escapes-on-transmit.patch nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch nilfs2-protect-references-to-superblock-parameters-exposed-in-sysfs.patch revert-drm-amdgpu-align-pp_power_profile_mode-with-kernel-docs.patch sched-sch_cake-fix-bulk-flow-accounting-logic-for-host-fairness.patch tcp_bpf-fix-return-value-of-tcp_bpf_sendmsg.patch x86-apic-make-x2apic_disable-work-correctly.patch x86-fpu-avoid-writing-lbr-bit-to-ia32_xss-unless-supported.patch --- diff --git a/queue-6.6/eventfs-use-list_del_rcu-for-srcu-protected-list-variable.patch b/queue-6.6/eventfs-use-list_del_rcu-for-srcu-protected-list-variable.patch new file mode 100644 index 00000000000..c618d4d3454 --- /dev/null +++ b/queue-6.6/eventfs-use-list_del_rcu-for-srcu-protected-list-variable.patch @@ -0,0 +1,112 @@ +From d2603279c7d645bf0d11fa253b23f1ab48fc8d3c Mon Sep 17 00:00:00 2001 +From: Steven Rostedt +Date: Wed, 4 Sep 2024 13:16:05 -0400 +Subject: eventfs: Use list_del_rcu() for SRCU protected list variable + +From: Steven Rostedt + +commit d2603279c7d645bf0d11fa253b23f1ab48fc8d3c upstream. + +Chi Zhiling reported: + + We found a null pointer accessing in tracefs[1], the reason is that the + variable 'ei_child' is set to LIST_POISON1, that means the list was + removed in eventfs_remove_rec. so when access the ei_child->is_freed, the + panic triggered. + + by the way, the following script can reproduce this panic + + loop1 (){ + while true + do + echo "p:kp submit_bio" > /sys/kernel/debug/tracing/kprobe_events + echo "" > /sys/kernel/debug/tracing/kprobe_events + done + } + loop2 (){ + while true + do + tree /sys/kernel/debug/tracing/events/kprobes/ + done + } + loop1 & + loop2 + + [1]: + [ 1147.959632][T17331] Unable to handle kernel paging request at virtual address dead000000000150 + [ 1147.968239][T17331] Mem abort info: + [ 1147.971739][T17331] ESR = 0x0000000096000004 + [ 1147.976172][T17331] EC = 0x25: DABT (current EL), IL = 32 bits + [ 1147.982171][T17331] SET = 0, FnV = 0 + [ 1147.985906][T17331] EA = 0, S1PTW = 0 + [ 1147.989734][T17331] FSC = 0x04: level 0 translation fault + [ 1147.995292][T17331] Data abort info: + [ 1147.998858][T17331] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 + [ 1148.005023][T17331] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 + [ 1148.010759][T17331] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 + [ 1148.016752][T17331] [dead000000000150] address between user and kernel address ranges + [ 1148.024571][T17331] Internal error: Oops: 0000000096000004 [#1] SMP + [ 1148.030825][T17331] Modules linked in: team_mode_loadbalance team nlmon act_gact cls_flower sch_ingress bonding tls macvlan dummy ib_core bridge stp llc veth amdgpu amdxcp mfd_core gpu_sched drm_exec drm_buddy radeon crct10dif_ce video drm_suballoc_helper ghash_ce drm_ttm_helper sha2_ce ttm sha256_arm64 i2c_algo_bit sha1_ce sbsa_gwdt cp210x drm_display_helper cec sr_mod cdrom drm_kms_helper binfmt_misc sg loop fuse drm dm_mod nfnetlink ip_tables autofs4 [last unloaded: tls] + [ 1148.072808][T17331] CPU: 3 PID: 17331 Comm: ls Tainted: G W ------- ---- 6.6.43 #2 + [ 1148.081751][T17331] Source Version: 21b3b386e948bedd29369af66f3e98ab01b1c650 + [ 1148.088783][T17331] Hardware name: Greatwall GW-001M1A-FTF/GW-001M1A-FTF, BIOS KunLun BIOS V4.0 07/16/2020 + [ 1148.098419][T17331] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) + [ 1148.106060][T17331] pc : eventfs_iterate+0x2c0/0x398 + [ 1148.111017][T17331] lr : eventfs_iterate+0x2fc/0x398 + [ 1148.115969][T17331] sp : ffff80008d56bbd0 + [ 1148.119964][T17331] x29: ffff80008d56bbf0 x28: ffff001ff5be2600 x27: 0000000000000000 + [ 1148.127781][T17331] x26: ffff001ff52ca4e0 x25: 0000000000009977 x24: dead000000000100 + [ 1148.135598][T17331] x23: 0000000000000000 x22: 000000000000000b x21: ffff800082645f10 + [ 1148.143415][T17331] x20: ffff001fddf87c70 x19: ffff80008d56bc90 x18: 0000000000000000 + [ 1148.151231][T17331] x17: 0000000000000000 x16: 0000000000000000 x15: ffff001ff52ca4e0 + [ 1148.159048][T17331] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 + [ 1148.166864][T17331] x11: 0000000000000000 x10: 0000000000000000 x9 : ffff8000804391d0 + [ 1148.174680][T17331] x8 : 0000000180000000 x7 : 0000000000000018 x6 : 0000aaab04b92862 + [ 1148.182498][T17331] x5 : 0000aaab04b92862 x4 : 0000000080000000 x3 : 0000000000000068 + [ 1148.190314][T17331] x2 : 000000000000000f x1 : 0000000000007ea8 x0 : 0000000000000001 + [ 1148.198131][T17331] Call trace: + [ 1148.201259][T17331] eventfs_iterate+0x2c0/0x398 + [ 1148.205864][T17331] iterate_dir+0x98/0x188 + [ 1148.210036][T17331] __arm64_sys_getdents64+0x78/0x160 + [ 1148.215161][T17331] invoke_syscall+0x78/0x108 + [ 1148.219593][T17331] el0_svc_common.constprop.0+0x48/0xf0 + [ 1148.224977][T17331] do_el0_svc+0x24/0x38 + [ 1148.228974][T17331] el0_svc+0x40/0x168 + [ 1148.232798][T17331] el0t_64_sync_handler+0x120/0x130 + [ 1148.237836][T17331] el0t_64_sync+0x1a4/0x1a8 + [ 1148.242182][T17331] Code: 54ffff6c f9400676 910006d6 f9000676 (b9405300) + [ 1148.248955][T17331] ---[ end trace 0000000000000000 ]--- + +The issue is that list_del() is used on an SRCU protected list variable +before the synchronization occurs. This can poison the list pointers while +there is a reader iterating the list. + +This is simply fixed by using list_del_rcu() that is specifically made for +this purpose. + +Link: https://lore.kernel.org/linux-trace-kernel/20240829085025.3600021-1-chizhiling@163.com/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Link: https://lore.kernel.org/20240904131605.640d42b1@gandalf.local.home +Fixes: 43aa6f97c2d03 ("eventfs: Get rid of dentry pointers without refcounts") +Reported-by: Chi Zhiling +Tested-by: Chi Zhiling +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/event_inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/tracefs/event_inode.c ++++ b/fs/tracefs/event_inode.c +@@ -935,7 +935,7 @@ static void eventfs_remove_rec(struct ev + list_for_each_entry(ei_child, &ei->children, list) + eventfs_remove_rec(ei_child, level + 1); + +- list_del(&ei->list); ++ list_del_rcu(&ei->list); + free_ei(ei); + } + diff --git a/queue-6.6/ila-call-nf_unregister_net_hooks-sooner.patch b/queue-6.6/ila-call-nf_unregister_net_hooks-sooner.patch new file mode 100644 index 00000000000..c34dc3b55a3 --- /dev/null +++ b/queue-6.6/ila-call-nf_unregister_net_hooks-sooner.patch @@ -0,0 +1,194 @@ +From 031ae72825cef43e4650140b800ad58bf7a6a466 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 4 Sep 2024 14:44:18 +0000 +Subject: ila: call nf_unregister_net_hooks() sooner + +From: Eric Dumazet + +commit 031ae72825cef43e4650140b800ad58bf7a6a466 upstream. + +syzbot found an use-after-free Read in ila_nf_input [1] + +Issue here is that ila_xlat_exit_net() frees the rhashtable, +then call nf_unregister_net_hooks(). + +It should be done in the reverse way, with a synchronize_rcu(). + +This is a good match for a pre_exit() method. + +[1] + BUG: KASAN: use-after-free in rht_key_hashfn include/linux/rhashtable.h:159 [inline] + BUG: KASAN: use-after-free in __rhashtable_lookup include/linux/rhashtable.h:604 [inline] + BUG: KASAN: use-after-free in rhashtable_lookup include/linux/rhashtable.h:646 [inline] + BUG: KASAN: use-after-free in rhashtable_lookup_fast+0x77a/0x9b0 include/linux/rhashtable.h:672 +Read of size 4 at addr ffff888064620008 by task ksoftirqd/0/16 + +CPU: 0 UID: 0 PID: 16 Comm: ksoftirqd/0 Not tainted 6.11.0-rc4-syzkaller-00238-g2ad6d23f465a #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 +Call Trace: + + __dump_stack lib/dump_stack.c:93 [inline] + dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 + print_address_description mm/kasan/report.c:377 [inline] + print_report+0x169/0x550 mm/kasan/report.c:488 + kasan_report+0x143/0x180 mm/kasan/report.c:601 + rht_key_hashfn include/linux/rhashtable.h:159 [inline] + __rhashtable_lookup include/linux/rhashtable.h:604 [inline] + rhashtable_lookup include/linux/rhashtable.h:646 [inline] + rhashtable_lookup_fast+0x77a/0x9b0 include/linux/rhashtable.h:672 + ila_lookup_wildcards net/ipv6/ila/ila_xlat.c:132 [inline] + ila_xlat_addr net/ipv6/ila/ila_xlat.c:652 [inline] + ila_nf_input+0x1fe/0x3c0 net/ipv6/ila/ila_xlat.c:190 + nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] + nf_hook_slow+0xc3/0x220 net/netfilter/core.c:626 + nf_hook include/linux/netfilter.h:269 [inline] + NF_HOOK+0x29e/0x450 include/linux/netfilter.h:312 + __netif_receive_skb_one_core net/core/dev.c:5661 [inline] + __netif_receive_skb+0x1ea/0x650 net/core/dev.c:5775 + process_backlog+0x662/0x15b0 net/core/dev.c:6108 + __napi_poll+0xcb/0x490 net/core/dev.c:6772 + napi_poll net/core/dev.c:6841 [inline] + net_rx_action+0x89b/0x1240 net/core/dev.c:6963 + handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 + run_ksoftirqd+0xca/0x130 kernel/softirq.c:928 + smpboot_thread_fn+0x544/0xa30 kernel/smpboot.c:164 + kthread+0x2f0/0x390 kernel/kthread.c:389 + ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 + + +The buggy address belongs to the physical page: +page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x64620 +flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff) +page_type: 0xbfffffff(buddy) +raw: 00fff00000000000 ffffea0000959608 ffffea00019d9408 0000000000000000 +raw: 0000000000000000 0000000000000003 00000000bfffffff 0000000000000000 +page dumped because: kasan: bad access detected +page_owner tracks the page as freed +page last allocated via order 3, migratetype Unmovable, gfp_mask 0x52dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_ZERO), pid 5242, tgid 5242 (syz-executor), ts 73611328570, free_ts 618981657187 + set_page_owner include/linux/page_owner.h:32 [inline] + post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1493 + prep_new_page mm/page_alloc.c:1501 [inline] + get_page_from_freelist+0x2e4c/0x2f10 mm/page_alloc.c:3439 + __alloc_pages_noprof+0x256/0x6c0 mm/page_alloc.c:4695 + __alloc_pages_node_noprof include/linux/gfp.h:269 [inline] + alloc_pages_node_noprof include/linux/gfp.h:296 [inline] + ___kmalloc_large_node+0x8b/0x1d0 mm/slub.c:4103 + __kmalloc_large_node_noprof+0x1a/0x80 mm/slub.c:4130 + __do_kmalloc_node mm/slub.c:4146 [inline] + __kmalloc_node_noprof+0x2d2/0x440 mm/slub.c:4164 + __kvmalloc_node_noprof+0x72/0x190 mm/util.c:650 + bucket_table_alloc lib/rhashtable.c:186 [inline] + rhashtable_init_noprof+0x534/0xa60 lib/rhashtable.c:1071 + ila_xlat_init_net+0xa0/0x110 net/ipv6/ila/ila_xlat.c:613 + ops_init+0x359/0x610 net/core/net_namespace.c:139 + setup_net+0x515/0xca0 net/core/net_namespace.c:343 + copy_net_ns+0x4e2/0x7b0 net/core/net_namespace.c:508 + create_new_namespaces+0x425/0x7b0 kernel/nsproxy.c:110 + unshare_nsproxy_namespaces+0x124/0x180 kernel/nsproxy.c:228 + ksys_unshare+0x619/0xc10 kernel/fork.c:3328 + __do_sys_unshare kernel/fork.c:3399 [inline] + __se_sys_unshare kernel/fork.c:3397 [inline] + __x64_sys_unshare+0x38/0x40 kernel/fork.c:3397 +page last free pid 11846 tgid 11846 stack trace: + reset_page_owner include/linux/page_owner.h:25 [inline] + free_pages_prepare mm/page_alloc.c:1094 [inline] + free_unref_page+0xd22/0xea0 mm/page_alloc.c:2612 + __folio_put+0x2c8/0x440 mm/swap.c:128 + folio_put include/linux/mm.h:1486 [inline] + free_large_kmalloc+0x105/0x1c0 mm/slub.c:4565 + kfree+0x1c4/0x360 mm/slub.c:4588 + rhashtable_free_and_destroy+0x7c6/0x920 lib/rhashtable.c:1169 + ila_xlat_exit_net+0x55/0x110 net/ipv6/ila/ila_xlat.c:626 + ops_exit_list net/core/net_namespace.c:173 [inline] + cleanup_net+0x802/0xcc0 net/core/net_namespace.c:640 + process_one_work kernel/workqueue.c:3231 [inline] + process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312 + worker_thread+0x86d/0xd40 kernel/workqueue.c:3390 + kthread+0x2f0/0x390 kernel/kthread.c:389 + ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 + +Memory state around the buggy address: + ffff88806461ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff88806461ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +>ffff888064620000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ^ + ffff888064620080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff888064620100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + +Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Cc: Tom Herbert +Reviewed-by: Florian Westphal +Link: https://patch.msgid.link/20240904144418.1162839-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ila/ila.h | 1 + + net/ipv6/ila/ila_main.c | 6 ++++++ + net/ipv6/ila/ila_xlat.c | 13 +++++++++---- + 3 files changed, 16 insertions(+), 4 deletions(-) + +--- a/net/ipv6/ila/ila.h ++++ b/net/ipv6/ila/ila.h +@@ -108,6 +108,7 @@ int ila_lwt_init(void); + void ila_lwt_fini(void); + + int ila_xlat_init_net(struct net *net); ++void ila_xlat_pre_exit_net(struct net *net); + void ila_xlat_exit_net(struct net *net); + + int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info); +--- a/net/ipv6/ila/ila_main.c ++++ b/net/ipv6/ila/ila_main.c +@@ -71,6 +71,11 @@ ila_xlat_init_fail: + return err; + } + ++static __net_exit void ila_pre_exit_net(struct net *net) ++{ ++ ila_xlat_pre_exit_net(net); ++} ++ + static __net_exit void ila_exit_net(struct net *net) + { + ila_xlat_exit_net(net); +@@ -78,6 +83,7 @@ static __net_exit void ila_exit_net(stru + + static struct pernet_operations ila_net_ops = { + .init = ila_init_net, ++ .pre_exit = ila_pre_exit_net, + .exit = ila_exit_net, + .id = &ila_net_id, + .size = sizeof(struct ila_net), +--- a/net/ipv6/ila/ila_xlat.c ++++ b/net/ipv6/ila/ila_xlat.c +@@ -619,6 +619,15 @@ int ila_xlat_init_net(struct net *net) + return 0; + } + ++void ila_xlat_pre_exit_net(struct net *net) ++{ ++ struct ila_net *ilan = net_generic(net, ila_net_id); ++ ++ if (ilan->xlat.hooks_registered) ++ nf_unregister_net_hooks(net, ila_nf_hook_ops, ++ ARRAY_SIZE(ila_nf_hook_ops)); ++} ++ + void ila_xlat_exit_net(struct net *net) + { + struct ila_net *ilan = net_generic(net, ila_net_id); +@@ -626,10 +635,6 @@ void ila_xlat_exit_net(struct net *net) + rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL); + + free_bucket_spinlocks(ilan->xlat.locks); +- +- if (ilan->xlat.hooks_registered) +- nf_unregister_net_hooks(net, ila_nf_hook_ops, +- ARRAY_SIZE(ila_nf_hook_ops)); + } + + static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) diff --git a/queue-6.6/net-mana-fix-error-handling-in-mana_create_txq-rxq-s-napi-cleanup.patch b/queue-6.6/net-mana-fix-error-handling-in-mana_create_txq-rxq-s-napi-cleanup.patch new file mode 100644 index 00000000000..efd301eddeb --- /dev/null +++ b/queue-6.6/net-mana-fix-error-handling-in-mana_create_txq-rxq-s-napi-cleanup.patch @@ -0,0 +1,118 @@ +From b6ecc662037694488bfff7c9fd21c405df8411f2 Mon Sep 17 00:00:00 2001 +From: Souradeep Chakrabarti +Date: Mon, 2 Sep 2024 05:43:47 -0700 +Subject: net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup + +From: Souradeep Chakrabarti + +commit b6ecc662037694488bfff7c9fd21c405df8411f2 upstream. + +Currently napi_disable() gets called during rxq and txq cleanup, +even before napi is enabled and hrtimer is initialized. It causes +kernel panic. + +? page_fault_oops+0x136/0x2b0 + ? page_counter_cancel+0x2e/0x80 + ? do_user_addr_fault+0x2f2/0x640 + ? refill_obj_stock+0xc4/0x110 + ? exc_page_fault+0x71/0x160 + ? asm_exc_page_fault+0x27/0x30 + ? __mmdrop+0x10/0x180 + ? __mmdrop+0xec/0x180 + ? hrtimer_active+0xd/0x50 + hrtimer_try_to_cancel+0x2c/0xf0 + hrtimer_cancel+0x15/0x30 + napi_disable+0x65/0x90 + mana_destroy_rxq+0x4c/0x2f0 + mana_create_rxq.isra.0+0x56c/0x6d0 + ? mana_uncfg_vport+0x50/0x50 + mana_alloc_queues+0x21b/0x320 + ? skb_dequeue+0x5f/0x80 + +Cc: stable@vger.kernel.org +Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") +Signed-off-by: Souradeep Chakrabarti +Reviewed-by: Haiyang Zhang +Reviewed-by: Shradha Gupta +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 22 +++++++++++++--------- + include/net/mana/mana.h | 2 ++ + 2 files changed, 15 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1858,10 +1858,12 @@ static void mana_destroy_txq(struct mana + + for (i = 0; i < apc->num_queues; i++) { + napi = &apc->tx_qp[i].tx_cq.napi; +- napi_synchronize(napi); +- napi_disable(napi); +- netif_napi_del(napi); +- ++ if (apc->tx_qp[i].txq.napi_initialized) { ++ napi_synchronize(napi); ++ napi_disable(napi); ++ netif_napi_del(napi); ++ apc->tx_qp[i].txq.napi_initialized = false; ++ } + mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); + + mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); +@@ -1917,6 +1919,7 @@ static int mana_create_txq(struct mana_p + txq->ndev = net; + txq->net_txq = netdev_get_tx_queue(net, i); + txq->vp_offset = apc->tx_vp_offset; ++ txq->napi_initialized = false; + skb_queue_head_init(&txq->pending_skbs); + + memset(&spec, 0, sizeof(spec)); +@@ -1983,6 +1986,7 @@ static int mana_create_txq(struct mana_p + + netif_napi_add_tx(net, &cq->napi, mana_poll); + napi_enable(&cq->napi); ++ txq->napi_initialized = true; + + mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); + } +@@ -1994,7 +1998,7 @@ out: + } + + static void mana_destroy_rxq(struct mana_port_context *apc, +- struct mana_rxq *rxq, bool validate_state) ++ struct mana_rxq *rxq, bool napi_initialized) + + { + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; +@@ -2009,15 +2013,15 @@ static void mana_destroy_rxq(struct mana + + napi = &rxq->rx_cq.napi; + +- if (validate_state) ++ if (napi_initialized) { + napi_synchronize(napi); + +- napi_disable(napi); ++ napi_disable(napi); + ++ netif_napi_del(napi); ++ } + xdp_rxq_info_unreg(&rxq->xdp_rxq); + +- netif_napi_del(napi); +- + mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); + + mana_deinit_cq(apc, &rxq->rx_cq); +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -97,6 +97,8 @@ struct mana_txq { + + atomic_t pending_sends; + ++ bool napi_initialized; ++ + struct mana_stats_tx stats; + }; + diff --git a/queue-6.6/net-mctp-serial-fix-missing-escapes-on-transmit.patch b/queue-6.6/net-mctp-serial-fix-missing-escapes-on-transmit.patch new file mode 100644 index 00000000000..b548932b62a --- /dev/null +++ b/queue-6.6/net-mctp-serial-fix-missing-escapes-on-transmit.patch @@ -0,0 +1,55 @@ +From f962e8361adfa84e8252d3fc3e5e6bb879f029b1 Mon Sep 17 00:00:00 2001 +From: Matt Johnston +Date: Thu, 29 Aug 2024 15:43:46 +0800 +Subject: net: mctp-serial: Fix missing escapes on transmit + +From: Matt Johnston + +commit f962e8361adfa84e8252d3fc3e5e6bb879f029b1 upstream. + +0x7d and 0x7e bytes are meant to be escaped in the data portion of +frames, but this didn't occur since next_chunk_len() had an off-by-one +error. That also resulted in the final byte of a payload being written +as a separate tty write op. + +The chunk prior to an escaped byte would be one byte short, and the +next call would never test the txpos+1 case, which is where the escaped +byte was located. That meant it never hit the escaping case in +mctp_serial_tx_work(). + +Example Input: 01 00 08 c8 7e 80 02 + +Previous incorrect chunks from next_chunk_len(): + +01 00 08 +c8 7e 80 +02 + +With this fix: + +01 00 08 c8 +7e +80 02 + +Cc: stable@vger.kernel.org +Fixes: a0c2ccd9b5ad ("mctp: Add MCTP-over-serial transport binding") +Signed-off-by: Matt Johnston +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/mctp/mctp-serial.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/mctp/mctp-serial.c ++++ b/drivers/net/mctp/mctp-serial.c +@@ -91,8 +91,8 @@ static int next_chunk_len(struct mctp_se + * will be those non-escaped bytes, and does not include the escaped + * byte. + */ +- for (i = 1; i + dev->txpos + 1 < dev->txlen; i++) { +- if (needs_escape(dev->txbuf[dev->txpos + i + 1])) ++ for (i = 1; i + dev->txpos < dev->txlen; i++) { ++ if (needs_escape(dev->txbuf[dev->txpos + i])) + break; + } + diff --git a/queue-6.6/nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch b/queue-6.6/nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch new file mode 100644 index 00000000000..b32a711c34a --- /dev/null +++ b/queue-6.6/nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch @@ -0,0 +1,90 @@ +From 5787fcaab9eb5930f5378d6a1dd03d916d146622 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Sat, 10 Aug 2024 15:52:42 +0900 +Subject: nilfs2: fix missing cleanup on rollforward recovery error + +From: Ryusuke Konishi + +commit 5787fcaab9eb5930f5378d6a1dd03d916d146622 upstream. + +In an error injection test of a routine for mount-time recovery, KASAN +found a use-after-free bug. + +It turned out that if data recovery was performed using partial logs +created by dsync writes, but an error occurred before starting the log +writer to create a recovered checkpoint, the inodes whose data had been +recovered were left in the ns_dirty_files list of the nilfs object and +were not freed. + +Fix this issue by cleaning up inodes that have read the recovery data if +the recovery routine fails midway before the log writer starts. + +Link: https://lkml.kernel.org/r/20240810065242.3701-1-konishi.ryusuke@gmail.com +Fixes: 0f3e1c7f23f8 ("nilfs2: recovery functions") +Signed-off-by: Ryusuke Konishi +Tested-by: Ryusuke Konishi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/recovery.c | 35 +++++++++++++++++++++++++++++++++-- + 1 file changed, 33 insertions(+), 2 deletions(-) + +--- a/fs/nilfs2/recovery.c ++++ b/fs/nilfs2/recovery.c +@@ -709,6 +709,33 @@ static void nilfs_finish_roll_forward(st + } + + /** ++ * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery ++ * @nilfs: nilfs object ++ */ ++static void nilfs_abort_roll_forward(struct the_nilfs *nilfs) ++{ ++ struct nilfs_inode_info *ii, *n; ++ LIST_HEAD(head); ++ ++ /* Abandon inodes that have read recovery data */ ++ spin_lock(&nilfs->ns_inode_lock); ++ list_splice_init(&nilfs->ns_dirty_files, &head); ++ spin_unlock(&nilfs->ns_inode_lock); ++ if (list_empty(&head)) ++ return; ++ ++ set_nilfs_purging(nilfs); ++ list_for_each_entry_safe(ii, n, &head, i_dirty) { ++ spin_lock(&nilfs->ns_inode_lock); ++ list_del_init(&ii->i_dirty); ++ spin_unlock(&nilfs->ns_inode_lock); ++ ++ iput(&ii->vfs_inode); ++ } ++ clear_nilfs_purging(nilfs); ++} ++ ++/** + * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint + * @nilfs: nilfs object + * @sb: super block instance +@@ -766,15 +793,19 @@ int nilfs_salvage_orphan_logs(struct the + if (unlikely(err)) { + nilfs_err(sb, "error %d writing segment for recovery", + err); +- goto failed; ++ goto put_root; + } + + nilfs_finish_roll_forward(nilfs, ri); + } + +- failed: ++put_root: + nilfs_put_root(root); + return err; ++ ++failed: ++ nilfs_abort_roll_forward(nilfs); ++ goto put_root; + } + + /** diff --git a/queue-6.6/nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch b/queue-6.6/nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch new file mode 100644 index 00000000000..c538538737b --- /dev/null +++ b/queue-6.6/nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch @@ -0,0 +1,81 @@ +From 6576dd6695f2afca3f4954029ac4a64f82ba60ab Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Wed, 14 Aug 2024 19:11:19 +0900 +Subject: nilfs2: fix state management in error path of log writing function + +From: Ryusuke Konishi + +commit 6576dd6695f2afca3f4954029ac4a64f82ba60ab upstream. + +After commit a694291a6211 ("nilfs2: separate wait function from +nilfs_segctor_write") was applied, the log writing function +nilfs_segctor_do_construct() was able to issue I/O requests continuously +even if user data blocks were split into multiple logs across segments, +but two potential flaws were introduced in its error handling. + +First, if nilfs_segctor_begin_construction() fails while creating the +second or subsequent logs, the log writing function returns without +calling nilfs_segctor_abort_construction(), so the writeback flag set on +pages/folios will remain uncleared. This causes page cache operations to +hang waiting for the writeback flag. For example, +truncate_inode_pages_final(), which is called via nilfs_evict_inode() when +an inode is evicted from memory, will hang. + +Second, the NILFS_I_COLLECTED flag set on normal inodes remain uncleared. +As a result, if the next log write involves checkpoint creation, that's +fine, but if a partial log write is performed that does not, inodes with +NILFS_I_COLLECTED set are erroneously removed from the "sc_dirty_files" +list, and their data and b-tree blocks may not be written to the device, +corrupting the block mapping. + +Fix these issues by uniformly calling nilfs_segctor_abort_construction() +on failure of each step in the loop in nilfs_segctor_do_construct(), +having it clean up logs and segment usages according to progress, and +correcting the conditions for calling nilfs_redirty_inodes() to ensure +that the NILFS_I_COLLECTED flag is cleared. + +Link: https://lkml.kernel.org/r/20240814101119.4070-1-konishi.ryusuke@gmail.com +Fixes: a694291a6211 ("nilfs2: separate wait function from nilfs_segctor_write") +Signed-off-by: Ryusuke Konishi +Tested-by: Ryusuke Konishi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/segment.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/fs/nilfs2/segment.c ++++ b/fs/nilfs2/segment.c +@@ -1835,6 +1835,9 @@ static void nilfs_segctor_abort_construc + nilfs_abort_logs(&logs, ret ? : err); + + list_splice_tail_init(&sci->sc_segbufs, &logs); ++ if (list_empty(&logs)) ++ return; /* if the first segment buffer preparation failed */ ++ + nilfs_cancel_segusage(&logs, nilfs->ns_sufile); + nilfs_free_incomplete_logs(&logs, nilfs); + +@@ -2079,7 +2082,7 @@ static int nilfs_segctor_do_construct(st + + err = nilfs_segctor_begin_construction(sci, nilfs); + if (unlikely(err)) +- goto out; ++ goto failed; + + /* Update time stamp */ + sci->sc_seg_ctime = ktime_get_real_seconds(); +@@ -2142,10 +2145,9 @@ static int nilfs_segctor_do_construct(st + return err; + + failed_to_write: +- if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) +- nilfs_redirty_inodes(&sci->sc_dirty_files); +- + failed: ++ if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE) ++ nilfs_redirty_inodes(&sci->sc_dirty_files); + if (nilfs_doing_gc()) + nilfs_redirty_inodes(&sci->sc_gc_inodes); + nilfs_segctor_abort_construction(sci, nilfs, err); diff --git a/queue-6.6/nilfs2-protect-references-to-superblock-parameters-exposed-in-sysfs.patch b/queue-6.6/nilfs2-protect-references-to-superblock-parameters-exposed-in-sysfs.patch new file mode 100644 index 00000000000..aaefbe9140c --- /dev/null +++ b/queue-6.6/nilfs2-protect-references-to-superblock-parameters-exposed-in-sysfs.patch @@ -0,0 +1,104 @@ +From 683408258917541bdb294cd717c210a04381931e Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Sun, 11 Aug 2024 19:03:20 +0900 +Subject: nilfs2: protect references to superblock parameters exposed in sysfs + +From: Ryusuke Konishi + +commit 683408258917541bdb294cd717c210a04381931e upstream. + +The superblock buffers of nilfs2 can not only be overwritten at runtime +for modifications/repairs, but they are also regularly swapped, replaced +during resizing, and even abandoned when degrading to one side due to +backing device issues. So, accessing them requires mutual exclusion using +the reader/writer semaphore "nilfs->ns_sem". + +Some sysfs attribute show methods read this superblock buffer without the +necessary mutual exclusion, which can cause problems with pointer +dereferencing and memory access, so fix it. + +Link: https://lkml.kernel.org/r/20240811100320.9913-1-konishi.ryusuke@gmail.com +Fixes: da7141fb78db ("nilfs2: add /sys/fs/nilfs2/ group") +Signed-off-by: Ryusuke Konishi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/sysfs.c | 43 +++++++++++++++++++++++++++++++++---------- + 1 file changed, 33 insertions(+), 10 deletions(-) + +--- a/fs/nilfs2/sysfs.c ++++ b/fs/nilfs2/sysfs.c +@@ -836,9 +836,15 @@ ssize_t nilfs_dev_revision_show(struct n + struct the_nilfs *nilfs, + char *buf) + { +- struct nilfs_super_block **sbp = nilfs->ns_sbp; +- u32 major = le32_to_cpu(sbp[0]->s_rev_level); +- u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level); ++ struct nilfs_super_block *raw_sb; ++ u32 major; ++ u16 minor; ++ ++ down_read(&nilfs->ns_sem); ++ raw_sb = nilfs->ns_sbp[0]; ++ major = le32_to_cpu(raw_sb->s_rev_level); ++ minor = le16_to_cpu(raw_sb->s_minor_rev_level); ++ up_read(&nilfs->ns_sem); + + return sysfs_emit(buf, "%d.%d\n", major, minor); + } +@@ -856,8 +862,13 @@ ssize_t nilfs_dev_device_size_show(struc + struct the_nilfs *nilfs, + char *buf) + { +- struct nilfs_super_block **sbp = nilfs->ns_sbp; +- u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size); ++ struct nilfs_super_block *raw_sb; ++ u64 dev_size; ++ ++ down_read(&nilfs->ns_sem); ++ raw_sb = nilfs->ns_sbp[0]; ++ dev_size = le64_to_cpu(raw_sb->s_dev_size); ++ up_read(&nilfs->ns_sem); + + return sysfs_emit(buf, "%llu\n", dev_size); + } +@@ -879,9 +890,15 @@ ssize_t nilfs_dev_uuid_show(struct nilfs + struct the_nilfs *nilfs, + char *buf) + { +- struct nilfs_super_block **sbp = nilfs->ns_sbp; ++ struct nilfs_super_block *raw_sb; ++ ssize_t len; ++ ++ down_read(&nilfs->ns_sem); ++ raw_sb = nilfs->ns_sbp[0]; ++ len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid); ++ up_read(&nilfs->ns_sem); + +- return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid); ++ return len; + } + + static +@@ -889,10 +906,16 @@ ssize_t nilfs_dev_volume_name_show(struc + struct the_nilfs *nilfs, + char *buf) + { +- struct nilfs_super_block **sbp = nilfs->ns_sbp; ++ struct nilfs_super_block *raw_sb; ++ ssize_t len; ++ ++ down_read(&nilfs->ns_sem); ++ raw_sb = nilfs->ns_sbp[0]; ++ len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n", ++ raw_sb->s_volume_name); ++ up_read(&nilfs->ns_sem); + +- return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n", +- sbp[0]->s_volume_name); ++ return len; + } + + static const char dev_readme_str[] = diff --git a/queue-6.6/revert-drm-amdgpu-align-pp_power_profile_mode-with-kernel-docs.patch b/queue-6.6/revert-drm-amdgpu-align-pp_power_profile_mode-with-kernel-docs.patch new file mode 100644 index 00000000000..9beb2f49cf9 --- /dev/null +++ b/queue-6.6/revert-drm-amdgpu-align-pp_power_profile_mode-with-kernel-docs.patch @@ -0,0 +1,45 @@ +From 1a8d845470941f1b6de1b392227530c097dc5e0c Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Thu, 5 Sep 2024 14:24:38 -0400 +Subject: Revert "drm/amdgpu: align pp_power_profile_mode with kernel docs" + +From: Alex Deucher + +commit 1a8d845470941f1b6de1b392227530c097dc5e0c upstream. + +This reverts commit 8f614469de248a4bc55fb07e55d5f4c340c75b11. + +This breaks some manual setting of the profile mode in +certain cases. + +Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3600 +Signed-off-by: Alex Deucher +(cherry picked from commit 7a199557643e993d4e7357860624b8aa5d8f4340) +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +@@ -1883,7 +1883,8 @@ static int smu_adjust_power_state_dynami + smu_dpm_ctx->dpm_level = level; + } + +- if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { ++ if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && ++ smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { + index = fls(smu->workload_mask); + index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; + workload[0] = smu->workload_setting[index]; +@@ -1962,7 +1963,8 @@ static int smu_switch_power_profile(void + workload[0] = smu->workload_setting[index]; + } + +- if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) ++ if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && ++ smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) + smu_bump_power_profile_mode(smu, workload, 0); + + return 0; diff --git a/queue-6.6/sched-sch_cake-fix-bulk-flow-accounting-logic-for-host-fairness.patch b/queue-6.6/sched-sch_cake-fix-bulk-flow-accounting-logic-for-host-fairness.patch new file mode 100644 index 00000000000..06b7c9b1353 --- /dev/null +++ b/queue-6.6/sched-sch_cake-fix-bulk-flow-accounting-logic-for-host-fairness.patch @@ -0,0 +1,83 @@ +From 546ea84d07e3e324644025e2aae2d12ea4c5896e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Tue, 3 Sep 2024 18:08:45 +0200 +Subject: sched: sch_cake: fix bulk flow accounting logic for host fairness +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Toke Høiland-Jørgensen + +commit 546ea84d07e3e324644025e2aae2d12ea4c5896e upstream. + +In sch_cake, we keep track of the count of active bulk flows per host, +when running in dst/src host fairness mode, which is used as the +round-robin weight when iterating through flows. The count of active +bulk flows is updated whenever a flow changes state. + +This has a peculiar interaction with the hash collision handling: when a +hash collision occurs (after the set-associative hashing), the state of +the hash bucket is simply updated to match the new packet that collided, +and if host fairness is enabled, that also means assigning new per-host +state to the flow. For this reason, the bulk flow counters of the +host(s) assigned to the flow are decremented, before new state is +assigned (and the counters, which may not belong to the same host +anymore, are incremented again). + +Back when this code was introduced, the host fairness mode was always +enabled, so the decrement was unconditional. When the configuration +flags were introduced the *increment* was made conditional, but +the *decrement* was not. Which of course can lead to a spurious +decrement (and associated wrap-around to U16_MAX). + +AFAICT, when host fairness is disabled, the decrement and wrap-around +happens as soon as a hash collision occurs (which is not that common in +itself, due to the set-associative hashing). However, in most cases this +is harmless, as the value is only used when host fairness mode is +enabled. So in order to trigger an array overflow, sch_cake has to first +be configured with host fairness disabled, and while running in this +mode, a hash collision has to occur to cause the overflow. Then, the +qdisc has to be reconfigured to enable host fairness, which leads to the +array out-of-bounds because the wrapped-around value is retained and +used as an array index. It seems that syzbot managed to trigger this, +which is quite impressive in its own right. + +This patch fixes the issue by introducing the same conditional check on +decrement as is used on increment. + +The original bug predates the upstreaming of cake, but the commit listed +in the Fixes tag touched that code, meaning that this patch won't apply +before that. + +Fixes: 712639929912 ("sch_cake: Make the dual modes fairer") +Reported-by: syzbot+7fe7b81d602cc1e6b94d@syzkaller.appspotmail.com +Signed-off-by: Toke Høiland-Jørgensen +Link: https://patch.msgid.link/20240903160846.20909-1-toke@redhat.com +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cake.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -786,12 +786,15 @@ skip_hash: + * queue, accept the collision, update the host tags. + */ + q->way_collisions++; +- if (q->flows[outer_hash + k].set == CAKE_SET_BULK) { +- q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--; +- q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--; +- } + allocate_src = cake_dsrc(flow_mode); + allocate_dst = cake_ddst(flow_mode); ++ ++ if (q->flows[outer_hash + k].set == CAKE_SET_BULK) { ++ if (allocate_src) ++ q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--; ++ if (allocate_dst) ++ q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--; ++ } + found: + /* reserve queue for future packets in same flow */ + reduced_hash = outer_hash + k; diff --git a/queue-6.6/series b/queue-6.6/series index f3c4d9beecc..4edde4ecc2a 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -45,3 +45,15 @@ tracing-timerlat-add-interface_lock-around-clearing-of-kthread-in-stop_kthread.p userfaultfd-don-t-bug_on-if-khugepaged-yanks-our-page-table.patch userfaultfd-fix-checks-for-huge-pmds.patch fscache-delete-fscache_cookie_lru_timer-when-fscache-exits-to-avoid-uaf.patch +eventfs-use-list_del_rcu-for-srcu-protected-list-variable.patch +net-mana-fix-error-handling-in-mana_create_txq-rxq-s-napi-cleanup.patch +net-mctp-serial-fix-missing-escapes-on-transmit.patch +x86-fpu-avoid-writing-lbr-bit-to-ia32_xss-unless-supported.patch +x86-apic-make-x2apic_disable-work-correctly.patch +revert-drm-amdgpu-align-pp_power_profile_mode-with-kernel-docs.patch +tcp_bpf-fix-return-value-of-tcp_bpf_sendmsg.patch +ila-call-nf_unregister_net_hooks-sooner.patch +sched-sch_cake-fix-bulk-flow-accounting-logic-for-host-fairness.patch +nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch +nilfs2-protect-references-to-superblock-parameters-exposed-in-sysfs.patch +nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch diff --git a/queue-6.6/tcp_bpf-fix-return-value-of-tcp_bpf_sendmsg.patch b/queue-6.6/tcp_bpf-fix-return-value-of-tcp_bpf_sendmsg.patch new file mode 100644 index 00000000000..f4e35d95ff7 --- /dev/null +++ b/queue-6.6/tcp_bpf-fix-return-value-of-tcp_bpf_sendmsg.patch @@ -0,0 +1,91 @@ +From fe1910f9337bd46a9343967b547ccab26b4b2c6e Mon Sep 17 00:00:00 2001 +From: Cong Wang +Date: Tue, 20 Aug 2024 20:07:44 -0700 +Subject: tcp_bpf: fix return value of tcp_bpf_sendmsg() + +From: Cong Wang + +commit fe1910f9337bd46a9343967b547ccab26b4b2c6e upstream. + +When we cork messages in psock->cork, the last message triggers the +flushing will result in sending a sk_msg larger than the current +message size. In this case, in tcp_bpf_send_verdict(), 'copied' becomes +negative at least in the following case: + +468 case __SK_DROP: +469 default: +470 sk_msg_free_partial(sk, msg, tosend); +471 sk_msg_apply_bytes(psock, tosend); +472 *copied -= (tosend + delta); // <==== HERE +473 return -EACCES; + +Therefore, it could lead to the following BUG with a proper value of +'copied' (thanks to syzbot). We should not use negative 'copied' as a +return value here. + + ------------[ cut here ]------------ + kernel BUG at net/socket.c:733! + Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP + Modules linked in: + CPU: 0 UID: 0 PID: 3265 Comm: syz-executor510 Not tainted 6.11.0-rc3-syzkaller-00060-gd07b43284ab3 #0 + Hardware name: linux,dummy-virt (DT) + pstate: 61400009 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) + pc : sock_sendmsg_nosec net/socket.c:733 [inline] + pc : sock_sendmsg_nosec net/socket.c:728 [inline] + pc : __sock_sendmsg+0x5c/0x60 net/socket.c:745 + lr : sock_sendmsg_nosec net/socket.c:730 [inline] + lr : __sock_sendmsg+0x54/0x60 net/socket.c:745 + sp : ffff800088ea3b30 + x29: ffff800088ea3b30 x28: fbf00000062bc900 x27: 0000000000000000 + x26: ffff800088ea3bc0 x25: ffff800088ea3bc0 x24: 0000000000000000 + x23: f9f00000048dc000 x22: 0000000000000000 x21: ffff800088ea3d90 + x20: f9f00000048dc000 x19: ffff800088ea3d90 x18: 0000000000000001 + x17: 0000000000000000 x16: 0000000000000000 x15: 000000002002ffaf + x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 + x11: 0000000000000000 x10: ffff8000815849c0 x9 : ffff8000815b49c0 + x8 : 0000000000000000 x7 : 000000000000003f x6 : 0000000000000000 + x5 : 00000000000007e0 x4 : fff07ffffd239000 x3 : fbf00000062bc900 + x2 : 0000000000000000 x1 : 0000000000000000 x0 : 00000000fffffdef + Call trace: + sock_sendmsg_nosec net/socket.c:733 [inline] + __sock_sendmsg+0x5c/0x60 net/socket.c:745 + ____sys_sendmsg+0x274/0x2ac net/socket.c:2597 + ___sys_sendmsg+0xac/0x100 net/socket.c:2651 + __sys_sendmsg+0x84/0xe0 net/socket.c:2680 + __do_sys_sendmsg net/socket.c:2689 [inline] + __se_sys_sendmsg net/socket.c:2687 [inline] + __arm64_sys_sendmsg+0x24/0x30 net/socket.c:2687 + __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] + invoke_syscall+0x48/0x110 arch/arm64/kernel/syscall.c:49 + el0_svc_common.constprop.0+0x40/0xe0 arch/arm64/kernel/syscall.c:132 + do_el0_svc+0x1c/0x28 arch/arm64/kernel/syscall.c:151 + el0_svc+0x34/0xec arch/arm64/kernel/entry-common.c:712 + el0t_64_sync_handler+0x100/0x12c arch/arm64/kernel/entry-common.c:730 + el0t_64_sync+0x19c/0x1a0 arch/arm64/kernel/entry.S:598 + Code: f9404463 d63f0060 3108441f 54fffe81 (d4210000) + ---[ end trace 0000000000000000 ]--- + +Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") +Reported-by: syzbot+58c03971700330ce14d8@syzkaller.appspotmail.com +Cc: Jakub Sitnicki +Signed-off-by: Cong Wang +Reviewed-by: John Fastabend +Acked-by: Martin KaFai Lau +Link: https://patch.msgid.link/20240821030744.320934-1-xiyou.wangcong@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_bpf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -577,7 +577,7 @@ out_err: + err = sk_stream_error(sk, msg->msg_flags, err); + release_sock(sk); + sk_psock_put(sk, psock); +- return copied ? copied : err; ++ return copied > 0 ? copied : err; + } + + enum { diff --git a/queue-6.6/x86-apic-make-x2apic_disable-work-correctly.patch b/queue-6.6/x86-apic-make-x2apic_disable-work-correctly.patch new file mode 100644 index 00000000000..2e98aa559b5 --- /dev/null +++ b/queue-6.6/x86-apic-make-x2apic_disable-work-correctly.patch @@ -0,0 +1,60 @@ +From 0ecc5be200c84e67114f3640064ba2bae3ba2f5a Mon Sep 17 00:00:00 2001 +From: Yuntao Wang +Date: Tue, 13 Aug 2024 09:48:27 +0800 +Subject: x86/apic: Make x2apic_disable() work correctly + +From: Yuntao Wang + +commit 0ecc5be200c84e67114f3640064ba2bae3ba2f5a upstream. + +x2apic_disable() clears x2apic_state and x2apic_mode unconditionally, even +when the state is X2APIC_ON_LOCKED, which prevents the kernel to disable +it thereby creating inconsistent state. + +Due to the early state check for X2APIC_ON, the code path which warns about +a locked X2APIC cannot be reached. + +Test for state < X2APIC_ON instead and move the clearing of the state and +mode variables to the place which actually disables X2APIC. + +[ tglx: Massaged change log. Added Fixes tag. Moved clearing so it's at the + right place for back ports ] + +Fixes: a57e456a7b28 ("x86/apic: Fix fallout from x2apic cleanup") +Signed-off-by: Yuntao Wang +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240813014827.895381-1-yuntao.wang@linux.dev +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/apic/apic.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/apic/apic.c ++++ b/arch/x86/kernel/apic/apic.c +@@ -1812,12 +1812,9 @@ static __init void apic_set_fixmap(bool + + static __init void x2apic_disable(void) + { +- u32 x2apic_id, state = x2apic_state; ++ u32 x2apic_id; + +- x2apic_mode = 0; +- x2apic_state = X2APIC_DISABLED; +- +- if (state != X2APIC_ON) ++ if (x2apic_state < X2APIC_ON) + return; + + x2apic_id = read_apic_id(); +@@ -1830,6 +1827,10 @@ static __init void x2apic_disable(void) + } + + __x2apic_disable(); ++ ++ x2apic_mode = 0; ++ x2apic_state = X2APIC_DISABLED; ++ + /* + * Don't reread the APIC ID as it was already done from + * check_x2apic() and the APIC driver still is a x2APIC variant, diff --git a/queue-6.6/x86-fpu-avoid-writing-lbr-bit-to-ia32_xss-unless-supported.patch b/queue-6.6/x86-fpu-avoid-writing-lbr-bit-to-ia32_xss-unless-supported.patch new file mode 100644 index 00000000000..5e43ef58cc0 --- /dev/null +++ b/queue-6.6/x86-fpu-avoid-writing-lbr-bit-to-ia32_xss-unless-supported.patch @@ -0,0 +1,91 @@ +From 2848ff28d180bd63a95da8e5dcbcdd76c1beeb7b Mon Sep 17 00:00:00 2001 +From: Mitchell Levy +Date: Mon, 12 Aug 2024 13:44:12 -0700 +Subject: x86/fpu: Avoid writing LBR bit to IA32_XSS unless supported + +From: Mitchell Levy + +commit 2848ff28d180bd63a95da8e5dcbcdd76c1beeb7b upstream. + +There are two distinct CPU features related to the use of XSAVES and LBR: +whether LBR is itself supported and whether XSAVES supports LBR. The LBR +subsystem correctly checks both in intel_pmu_arch_lbr_init(), but the +XSTATE subsystem does not. + +The LBR bit is only removed from xfeatures_mask_independent when LBR is not +supported by the CPU, but there is no validation of XSTATE support. + +If XSAVES does not support LBR the write to IA32_XSS causes a #GP fault, +leaving the state of IA32_XSS unchanged, i.e. zero. The fault is handled +with a warning and the boot continues. + +Consequently the next XRSTORS which tries to restore supervisor state fails +with #GP because the RFBM has zero for all supervisor features, which does +not match the XCOMP_BV field. + +As XFEATURE_MASK_FPSTATE includes supervisor features setting up the FPU +causes a #GP, which ends up in fpu_reset_from_exception_fixup(). That fails +due to the same problem resulting in recursive #GPs until the kernel runs +out of stack space and double faults. + +Prevent this by storing the supported independent features in +fpu_kernel_cfg during XSTATE initialization and use that cached value for +retrieving the independent feature bits to be written into IA32_XSS. + +[ tglx: Massaged change log ] + +Fixes: f0dccc9da4c0 ("x86/fpu/xstate: Support dynamic supervisor feature for LBR") +Suggested-by: Thomas Gleixner +Signed-off-by: Mitchell Levy +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240812-xsave-lbr-fix-v3-1-95bac1bf62f4@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/fpu/types.h | 7 +++++++ + arch/x86/kernel/fpu/xstate.c | 3 +++ + arch/x86/kernel/fpu/xstate.h | 4 ++-- + 3 files changed, 12 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/fpu/types.h ++++ b/arch/x86/include/asm/fpu/types.h +@@ -589,6 +589,13 @@ struct fpu_state_config { + * even without XSAVE support, i.e. legacy features FP + SSE + */ + u64 legacy_features; ++ /* ++ * @independent_features: ++ * ++ * Features that are supported by XSAVES, but not managed as part of ++ * the FPU core, such as LBR ++ */ ++ u64 independent_features; + }; + + /* FPU state configuration information */ +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -788,6 +788,9 @@ void __init fpu__init_system_xstate(unsi + goto out_disable; + } + ++ fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features & ++ XFEATURE_MASK_INDEPENDENT; ++ + /* + * Clear XSAVE features that are disabled in the normal CPUID. + */ +--- a/arch/x86/kernel/fpu/xstate.h ++++ b/arch/x86/kernel/fpu/xstate.h +@@ -64,9 +64,9 @@ static inline u64 xfeatures_mask_supervi + static inline u64 xfeatures_mask_independent(void) + { + if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) +- return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR; ++ return fpu_kernel_cfg.independent_features & ~XFEATURE_MASK_LBR; + +- return XFEATURE_MASK_INDEPENDENT; ++ return fpu_kernel_cfg.independent_features; + } + + /* XSAVE/XRSTOR wrapper functions */