From: Greg Kroah-Hartman Date: Sat, 5 Mar 2022 12:08:51 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v4.9.305~81 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d1c7e4967f0719611d32c43a118611230bbbcd1f;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: bpf-sockmap-do-not-ignore-orig_len-parameter.patch e1000e-correct-nvm-checksum-verification-flow.patch mm-consider-__gfp_nowarn-flag-for-oversized-kvmalloc-calls.patch net-fix-up-skbs-delta_truesize-in-udp-gro-frag_list.patch netfilter-fix-use-after-free-in-__nf_register_net_hook.patch netfilter-nf_queue-don-t-assume-sk-is-full-socket.patch netfilter-nf_queue-fix-possible-use-after-free.patch netfilter-nf_queue-handle-socket-prefetch.patch xfrm-enforce-validity-of-offload-input-flags.patch xfrm-fix-mtu-regression.patch xfrm-fix-the-if_id-check-in-changelink.patch --- diff --git a/queue-5.10/bpf-sockmap-do-not-ignore-orig_len-parameter.patch b/queue-5.10/bpf-sockmap-do-not-ignore-orig_len-parameter.patch new file mode 100644 index 00000000000..553c94aef8b --- /dev/null +++ b/queue-5.10/bpf-sockmap-do-not-ignore-orig_len-parameter.patch @@ -0,0 +1,43 @@ +From 60ce37b03917e593d8e5d8bcc7ec820773daf81d Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 2 Mar 2022 08:17:22 -0800 +Subject: bpf, sockmap: Do not ignore orig_len parameter + +From: Eric Dumazet + +commit 60ce37b03917e593d8e5d8bcc7ec820773daf81d upstream. + +Currently, sk_psock_verdict_recv() returns skb->len + +This is problematic because tcp_read_sock() might have +passed orig_len < skb->len, due to the presence of TCP urgent data. + +This causes an infinite loop from tcp_read_sock() + +Followup patch will make tcp_read_sock() more robust vs bad actors. + +Fixes: ef5659280eb1 ("bpf, sockmap: Allow skipping sk_skb parser program") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Acked-by: John Fastabend +Acked-by: Jakub Sitnicki +Tested-by: Jakub Sitnicki +Acked-by: Daniel Borkmann +Link: https://lore.kernel.org/r/20220302161723.3910001-1-eric.dumazet@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skmsg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/skmsg.c ++++ b/net/core/skmsg.c +@@ -943,7 +943,7 @@ static int sk_psock_verdict_recv(read_de + struct sk_psock *psock; + struct bpf_prog *prog; + int ret = __SK_DROP; +- int len = skb->len; ++ int len = orig_len; + + /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */ + skb = skb_clone(skb, GFP_ATOMIC); diff --git a/queue-5.10/e1000e-correct-nvm-checksum-verification-flow.patch b/queue-5.10/e1000e-correct-nvm-checksum-verification-flow.patch new file mode 100644 index 00000000000..a292da13800 --- /dev/null +++ b/queue-5.10/e1000e-correct-nvm-checksum-verification-flow.patch @@ -0,0 +1,38 @@ +From ffd24fa2fcc76ecb2e61e7a4ef8588177bcb42a6 Mon Sep 17 00:00:00 2001 +From: Sasha Neftin +Date: Thu, 3 Feb 2022 14:21:49 +0200 +Subject: e1000e: Correct NVM checksum verification flow + +From: Sasha Neftin + +commit ffd24fa2fcc76ecb2e61e7a4ef8588177bcb42a6 upstream. + +Update MAC type check e1000_pch_tgp because for e1000_pch_cnp, +NVM checksum update is still possible. +Emit a more detailed warning message. + +Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=1191663 +Fixes: 4051f68318ca ("e1000e: Do not take care about recovery NVM checksum") +Reported-by: Thomas Bogendoerfer +Signed-off-by: Sasha Neftin +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/e1000e/ich8lan.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c ++++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c +@@ -4134,9 +4134,9 @@ static s32 e1000_validate_nvm_checksum_i + return ret_val; + + if (!(data & valid_csum_mask)) { +- e_dbg("NVM Checksum Invalid\n"); ++ e_dbg("NVM Checksum valid bit not set\n"); + +- if (hw->mac.type < e1000_pch_cnp) { ++ if (hw->mac.type < e1000_pch_tgp) { + data |= valid_csum_mask; + ret_val = e1000_write_nvm(hw, word, 1, &data); + if (ret_val) diff --git a/queue-5.10/mm-consider-__gfp_nowarn-flag-for-oversized-kvmalloc-calls.patch b/queue-5.10/mm-consider-__gfp_nowarn-flag-for-oversized-kvmalloc-calls.patch new file mode 100644 index 00000000000..7f47679fc07 --- /dev/null +++ b/queue-5.10/mm-consider-__gfp_nowarn-flag-for-oversized-kvmalloc-calls.patch @@ -0,0 +1,105 @@ +From 0708a0afe291bdfe1386d74d5ec1f0c27e8b9168 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Fri, 4 Mar 2022 15:26:32 +0100 +Subject: mm: Consider __GFP_NOWARN flag for oversized kvmalloc() calls +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Daniel Borkmann + +commit 0708a0afe291bdfe1386d74d5ec1f0c27e8b9168 upstream. + +syzkaller was recently triggering an oversized kvmalloc() warning via +xdp_umem_create(). + +The triggered warning was added back in 7661809d493b ("mm: don't allow +oversized kvmalloc() calls"). The rationale for the warning for huge +kvmalloc sizes was as a reaction to a security bug where the size was +more than UINT_MAX but not everything was prepared to handle unsigned +long sizes. + +Anyway, the AF_XDP related call trace from this syzkaller report was: + + kvmalloc include/linux/mm.h:806 [inline] + kvmalloc_array include/linux/mm.h:824 [inline] + kvcalloc include/linux/mm.h:829 [inline] + xdp_umem_pin_pages net/xdp/xdp_umem.c:102 [inline] + xdp_umem_reg net/xdp/xdp_umem.c:219 [inline] + xdp_umem_create+0x6a5/0xf00 net/xdp/xdp_umem.c:252 + xsk_setsockopt+0x604/0x790 net/xdp/xsk.c:1068 + __sys_setsockopt+0x1fd/0x4e0 net/socket.c:2176 + __do_sys_setsockopt net/socket.c:2187 [inline] + __se_sys_setsockopt net/socket.c:2184 [inline] + __x64_sys_setsockopt+0xb5/0x150 net/socket.c:2184 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +Björn mentioned that requests for >2GB allocation can still be valid: + + The structure that is being allocated is the page-pinning accounting. + AF_XDP has an internal limit of U32_MAX pages, which is *a lot*, but + still fewer than what memcg allows (PAGE_COUNTER_MAX is a LONG_MAX/ + PAGE_SIZE on 64 bit systems). [...] + + I could just change from U32_MAX to INT_MAX, but as I stated earlier + that has a hacky feeling to it. [...] From my perspective, the code + isn't broken, with the memcg limits in consideration. [...] + +Linus says: + + [...] Pretty much every time this has come up, the kernel warning has + shown that yes, the code was broken and there really wasn't a reason + for doing allocations that big. + + Of course, some people would be perfectly fine with the allocation + failing, they just don't want the warning. I didn't want __GFP_NOWARN + to shut it up originally because I wanted people to see all those + cases, but these days I think we can just say "yeah, people can shut + it up explicitly by saying 'go ahead and fail this allocation, don't + warn about it'". + + So enough time has passed that by now I'd certainly be ok with [it]. + +Thus allow call-sites to silence such userspace triggered splats if the +allocation requests have __GFP_NOWARN. For xdp_umem_pin_pages()'s call +to kvcalloc() this is already the case, so nothing else needed there. + +Fixes: 7661809d493b ("mm: don't allow oversized kvmalloc() calls") +Reported-by: syzbot+11421fbbff99b989670e@syzkaller.appspotmail.com +Suggested-by: Linus Torvalds +Signed-off-by: Daniel Borkmann +Tested-by: syzbot+11421fbbff99b989670e@syzkaller.appspotmail.com +Cc: Björn Töpel +Cc: Magnus Karlsson +Cc: Willy Tarreau +Cc: Andrew Morton +Cc: Alexei Starovoitov +Cc: Andrii Nakryiko +Cc: Jakub Kicinski +Cc: David S. Miller +Link: https://lore.kernel.org/bpf/CAJ+HfNhyfsT5cS_U9EC213ducHs9k9zNxX9+abqC0kTrPbQ0gg@mail.gmail.com +Link: https://lore.kernel.org/bpf/20211201202905.b9892171e3f5b9a60f9da251@linux-foundation.org +Reviewed-by: Leon Romanovsky +Ackd-by: Michal Hocko +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/util.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/mm/util.c ++++ b/mm/util.c +@@ -582,8 +582,10 @@ void *kvmalloc_node(size_t size, gfp_t f + return ret; + + /* Don't even allow crazy sizes */ +- if (WARN_ON_ONCE(size > INT_MAX)) ++ if (unlikely(size > INT_MAX)) { ++ WARN_ON_ONCE(!(flags & __GFP_NOWARN)); + return NULL; ++ } + + return __vmalloc_node(size, 1, flags, node, + __builtin_return_address(0)); diff --git a/queue-5.10/net-fix-up-skbs-delta_truesize-in-udp-gro-frag_list.patch b/queue-5.10/net-fix-up-skbs-delta_truesize-in-udp-gro-frag_list.patch new file mode 100644 index 00000000000..4144cd5d662 --- /dev/null +++ b/queue-5.10/net-fix-up-skbs-delta_truesize-in-udp-gro-frag_list.patch @@ -0,0 +1,64 @@ +From 224102de2ff105a2c05695e66a08f4b5b6b2d19c Mon Sep 17 00:00:00 2001 +From: lena wang +Date: Tue, 1 Mar 2022 19:17:09 +0800 +Subject: net: fix up skbs delta_truesize in UDP GRO frag_list + +From: lena wang + +commit 224102de2ff105a2c05695e66a08f4b5b6b2d19c upstream. + +The truesize for a UDP GRO packet is added by main skb and skbs in main +skb's frag_list: +skb_gro_receive_list + p->truesize += skb->truesize; + +The commit 53475c5dd856 ("net: fix use-after-free when UDP GRO with +shared fraglist") introduced a truesize increase for frag_list skbs. +When uncloning skb, it will call pskb_expand_head and trusesize for +frag_list skbs may increase. This can occur when allocators uses +__netdev_alloc_skb and not jump into __alloc_skb. This flow does not +use ksize(len) to calculate truesize while pskb_expand_head uses. +skb_segment_list +err = skb_unclone(nskb, GFP_ATOMIC); +pskb_expand_head + if (!skb->sk || skb->destructor == sock_edemux) + skb->truesize += size - osize; + +If we uses increased truesize adding as delta_truesize, it will be +larger than before and even larger than previous total truesize value +if skbs in frag_list are abundant. The main skb truesize will become +smaller and even a minus value or a huge value for an unsigned int +parameter. Then the following memory check will drop this abnormal skb. + +To avoid this error we should use the original truesize to segment the +main skb. + +Fixes: 53475c5dd856 ("net: fix use-after-free when UDP GRO with shared fraglist") +Signed-off-by: lena wang +Acked-by: Paolo Abeni +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/1646133431-8948-1-git-send-email-lena.wang@mediatek.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3690,6 +3690,7 @@ struct sk_buff *skb_segment_list(struct + list_skb = list_skb->next; + + err = 0; ++ delta_truesize += nskb->truesize; + if (skb_shared(nskb)) { + tmp = skb_clone(nskb, GFP_ATOMIC); + if (tmp) { +@@ -3714,7 +3715,6 @@ struct sk_buff *skb_segment_list(struct + tail = nskb; + + delta_len += nskb->len; +- delta_truesize += nskb->truesize; + + skb_push(nskb, -skb_network_offset(nskb) + offset); + diff --git a/queue-5.10/netfilter-fix-use-after-free-in-__nf_register_net_hook.patch b/queue-5.10/netfilter-fix-use-after-free-in-__nf_register_net_hook.patch new file mode 100644 index 00000000000..db7f08e8d9e --- /dev/null +++ b/queue-5.10/netfilter-fix-use-after-free-in-__nf_register_net_hook.patch @@ -0,0 +1,141 @@ +From 56763f12b0f02706576a088e85ef856deacc98a0 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Sun, 27 Feb 2022 10:01:41 -0800 +Subject: netfilter: fix use-after-free in __nf_register_net_hook() + +From: Eric Dumazet + +commit 56763f12b0f02706576a088e85ef856deacc98a0 upstream. + +We must not dereference @new_hooks after nf_hook_mutex has been released, +because other threads might have freed our allocated hooks already. + +BUG: KASAN: use-after-free in nf_hook_entries_get_hook_ops include/linux/netfilter.h:130 [inline] +BUG: KASAN: use-after-free in hooks_validate net/netfilter/core.c:171 [inline] +BUG: KASAN: use-after-free in __nf_register_net_hook+0x77a/0x820 net/netfilter/core.c:438 +Read of size 2 at addr ffff88801c1a8000 by task syz-executor237/4430 + +CPU: 1 PID: 4430 Comm: syz-executor237 Not tainted 5.17.0-rc5-syzkaller-00306-g2293be58d6a1 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 + print_address_description.constprop.0.cold+0x8d/0x336 mm/kasan/report.c:255 + __kasan_report mm/kasan/report.c:442 [inline] + kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 + nf_hook_entries_get_hook_ops include/linux/netfilter.h:130 [inline] + hooks_validate net/netfilter/core.c:171 [inline] + __nf_register_net_hook+0x77a/0x820 net/netfilter/core.c:438 + nf_register_net_hook+0x114/0x170 net/netfilter/core.c:571 + nf_register_net_hooks+0x59/0xc0 net/netfilter/core.c:587 + nf_synproxy_ipv6_init+0x85/0xe0 net/netfilter/nf_synproxy_core.c:1218 + synproxy_tg6_check+0x30d/0x560 net/ipv6/netfilter/ip6t_SYNPROXY.c:81 + xt_check_target+0x26c/0x9e0 net/netfilter/x_tables.c:1038 + check_target net/ipv6/netfilter/ip6_tables.c:530 [inline] + find_check_entry.constprop.0+0x7f1/0x9e0 net/ipv6/netfilter/ip6_tables.c:573 + translate_table+0xc8b/0x1750 net/ipv6/netfilter/ip6_tables.c:735 + do_replace net/ipv6/netfilter/ip6_tables.c:1153 [inline] + do_ip6t_set_ctl+0x56e/0xb90 net/ipv6/netfilter/ip6_tables.c:1639 + nf_setsockopt+0x83/0xe0 net/netfilter/nf_sockopt.c:101 + ipv6_setsockopt+0x122/0x180 net/ipv6/ipv6_sockglue.c:1024 + rawv6_setsockopt+0xd3/0x6a0 net/ipv6/raw.c:1084 + __sys_setsockopt+0x2db/0x610 net/socket.c:2180 + __do_sys_setsockopt net/socket.c:2191 [inline] + __se_sys_setsockopt net/socket.c:2188 [inline] + __x64_sys_setsockopt+0xba/0x150 net/socket.c:2188 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae +RIP: 0033:0x7f65a1ace7d9 +Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 71 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007f65a1a7f308 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 +RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00007f65a1ace7d9 +RDX: 0000000000000040 RSI: 0000000000000029 RDI: 0000000000000003 +RBP: 00007f65a1b574c8 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000020000000 R11: 0000000000000246 R12: 00007f65a1b55130 +R13: 00007f65a1b574c0 R14: 00007f65a1b24090 R15: 0000000000022000 + + +The buggy address belongs to the page: +page:ffffea0000706a00 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1c1a8 +flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff) +raw: 00fff00000000000 ffffea0001c1b108 ffffea000046dd08 0000000000000000 +raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 +page dumped because: kasan: bad access detected +page_owner tracks the page as freed +page last allocated via order 2, migratetype Unmovable, gfp_mask 0x52dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_ZERO), pid 4430, ts 1061781545818, free_ts 1061791488993 + prep_new_page mm/page_alloc.c:2434 [inline] + get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4165 + __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5389 + __alloc_pages_node include/linux/gfp.h:572 [inline] + alloc_pages_node include/linux/gfp.h:595 [inline] + kmalloc_large_node+0x62/0x130 mm/slub.c:4438 + __kmalloc_node+0x35a/0x4a0 mm/slub.c:4454 + kmalloc_node include/linux/slab.h:604 [inline] + kvmalloc_node+0x97/0x100 mm/util.c:580 + kvmalloc include/linux/slab.h:731 [inline] + kvzalloc include/linux/slab.h:739 [inline] + allocate_hook_entries_size net/netfilter/core.c:61 [inline] + nf_hook_entries_grow+0x140/0x780 net/netfilter/core.c:128 + __nf_register_net_hook+0x144/0x820 net/netfilter/core.c:429 + nf_register_net_hook+0x114/0x170 net/netfilter/core.c:571 + nf_register_net_hooks+0x59/0xc0 net/netfilter/core.c:587 + nf_synproxy_ipv6_init+0x85/0xe0 net/netfilter/nf_synproxy_core.c:1218 + synproxy_tg6_check+0x30d/0x560 net/ipv6/netfilter/ip6t_SYNPROXY.c:81 + xt_check_target+0x26c/0x9e0 net/netfilter/x_tables.c:1038 + check_target net/ipv6/netfilter/ip6_tables.c:530 [inline] + find_check_entry.constprop.0+0x7f1/0x9e0 net/ipv6/netfilter/ip6_tables.c:573 + translate_table+0xc8b/0x1750 net/ipv6/netfilter/ip6_tables.c:735 + do_replace net/ipv6/netfilter/ip6_tables.c:1153 [inline] + do_ip6t_set_ctl+0x56e/0xb90 net/ipv6/netfilter/ip6_tables.c:1639 + nf_setsockopt+0x83/0xe0 net/netfilter/nf_sockopt.c:101 +page last free stack trace: + reset_page_owner include/linux/page_owner.h:24 [inline] + free_pages_prepare mm/page_alloc.c:1352 [inline] + free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1404 + free_unref_page_prepare mm/page_alloc.c:3325 [inline] + free_unref_page+0x19/0x690 mm/page_alloc.c:3404 + kvfree+0x42/0x50 mm/util.c:613 + rcu_do_batch kernel/rcu/tree.c:2527 [inline] + rcu_core+0x7b1/0x1820 kernel/rcu/tree.c:2778 + __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 + +Memory state around the buggy address: + ffff88801c1a7f00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff88801c1a7f80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +>ffff88801c1a8000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ^ + ffff88801c1a8080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff88801c1a8100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + +Fixes: 2420b79f8c18 ("netfilter: debug: check for sorted array") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Acked-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/core.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/netfilter/core.c ++++ b/net/netfilter/core.c +@@ -406,14 +406,15 @@ static int __nf_register_net_hook(struct + p = nf_entry_dereference(*pp); + new_hooks = nf_hook_entries_grow(p, reg); + +- if (!IS_ERR(new_hooks)) ++ if (!IS_ERR(new_hooks)) { ++ hooks_validate(new_hooks); + rcu_assign_pointer(*pp, new_hooks); ++ } + + mutex_unlock(&nf_hook_mutex); + if (IS_ERR(new_hooks)) + return PTR_ERR(new_hooks); + +- hooks_validate(new_hooks); + #ifdef CONFIG_NETFILTER_INGRESS + if (nf_ingress_hook(reg, pf)) + net_inc_ingress_queue(); diff --git a/queue-5.10/netfilter-nf_queue-don-t-assume-sk-is-full-socket.patch b/queue-5.10/netfilter-nf_queue-don-t-assume-sk-is-full-socket.patch new file mode 100644 index 00000000000..e0ae0ff0199 --- /dev/null +++ b/queue-5.10/netfilter-nf_queue-don-t-assume-sk-is-full-socket.patch @@ -0,0 +1,52 @@ +From 747670fd9a2d1b7774030dba65ca022ba442ce71 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Fri, 25 Feb 2022 14:02:41 +0100 +Subject: netfilter: nf_queue: don't assume sk is full socket + +From: Florian Westphal + +commit 747670fd9a2d1b7774030dba65ca022ba442ce71 upstream. + +There is no guarantee that state->sk refers to a full socket. + +If refcount transitions to 0, sock_put calls sk_free which then ends up +with garbage fields. + +I'd like to thank Oleksandr Natalenko and Jiri Benc for considerable +debug work and pointing out state->sk oddities. + +Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") +Tested-by: Oleksandr Natalenko +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_queue.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/net/netfilter/nf_queue.c ++++ b/net/netfilter/nf_queue.c +@@ -46,6 +46,15 @@ void nf_unregister_queue_handler(struct + } + EXPORT_SYMBOL(nf_unregister_queue_handler); + ++static void nf_queue_sock_put(struct sock *sk) ++{ ++#ifdef CONFIG_INET ++ sock_gen_put(sk); ++#else ++ sock_put(sk); ++#endif ++} ++ + static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) + { + struct nf_hook_state *state = &entry->state; +@@ -56,7 +65,7 @@ static void nf_queue_entry_release_refs( + if (state->out) + dev_put(state->out); + if (state->sk) +- sock_put(state->sk); ++ nf_queue_sock_put(state->sk); + + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + if (entry->physin) diff --git a/queue-5.10/netfilter-nf_queue-fix-possible-use-after-free.patch b/queue-5.10/netfilter-nf_queue-fix-possible-use-after-free.patch new file mode 100644 index 00000000000..0c12bba403a --- /dev/null +++ b/queue-5.10/netfilter-nf_queue-fix-possible-use-after-free.patch @@ -0,0 +1,104 @@ +From c3873070247d9e3c7a6b0cf9bf9b45e8018427b1 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Mon, 28 Feb 2022 06:22:22 +0100 +Subject: netfilter: nf_queue: fix possible use-after-free + +From: Florian Westphal + +commit c3873070247d9e3c7a6b0cf9bf9b45e8018427b1 upstream. + +Eric Dumazet says: + The sock_hold() side seems suspect, because there is no guarantee + that sk_refcnt is not already 0. + +On failure, we cannot queue the packet and need to indicate an +error. The packet will be dropped by the caller. + +v2: split skb prefetch hunk into separate change + +Fixes: 271b72c7fa82c ("udp: RCU handling for Unicast packets.") +Reported-by: Eric Dumazet +Reviewed-by: Eric Dumazet +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_queue.h | 2 +- + net/netfilter/nf_queue.c | 13 +++++++++---- + net/netfilter/nfnetlink_queue.c | 12 +++++++++--- + 3 files changed, 19 insertions(+), 8 deletions(-) + +--- a/include/net/netfilter/nf_queue.h ++++ b/include/net/netfilter/nf_queue.h +@@ -37,7 +37,7 @@ void nf_register_queue_handler(struct ne + void nf_unregister_queue_handler(struct net *net); + void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); + +-void nf_queue_entry_get_refs(struct nf_queue_entry *entry); ++bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); + void nf_queue_entry_free(struct nf_queue_entry *entry); + + static inline void init_hashrandom(u32 *jhash_initval) +--- a/net/netfilter/nf_queue.c ++++ b/net/netfilter/nf_queue.c +@@ -100,16 +100,17 @@ static void __nf_queue_entry_init_physde + } + + /* Bump dev refs so they don't vanish while packet is out */ +-void nf_queue_entry_get_refs(struct nf_queue_entry *entry) ++bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) + { + struct nf_hook_state *state = &entry->state; + ++ if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) ++ return false; ++ + if (state->in) + dev_hold(state->in); + if (state->out) + dev_hold(state->out); +- if (state->sk) +- sock_hold(state->sk); + + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + if (entry->physin) +@@ -117,6 +118,7 @@ void nf_queue_entry_get_refs(struct nf_q + if (entry->physout) + dev_hold(entry->physout); + #endif ++ return true; + } + EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); + +@@ -205,7 +207,10 @@ static int __nf_queue(struct sk_buff *sk + + __nf_queue_entry_init_physdevs(entry); + +- nf_queue_entry_get_refs(entry); ++ if (!nf_queue_entry_get_refs(entry)) { ++ kfree(entry); ++ return -ENOTCONN; ++ } + + switch (entry->state.pf) { + case AF_INET: +--- a/net/netfilter/nfnetlink_queue.c ++++ b/net/netfilter/nfnetlink_queue.c +@@ -712,9 +712,15 @@ static struct nf_queue_entry * + nf_queue_entry_dup(struct nf_queue_entry *e) + { + struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); +- if (entry) +- nf_queue_entry_get_refs(entry); +- return entry; ++ ++ if (!entry) ++ return NULL; ++ ++ if (nf_queue_entry_get_refs(entry)) ++ return entry; ++ ++ kfree(entry); ++ return NULL; + } + + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) diff --git a/queue-5.10/netfilter-nf_queue-handle-socket-prefetch.patch b/queue-5.10/netfilter-nf_queue-handle-socket-prefetch.patch new file mode 100644 index 00000000000..ca316909226 --- /dev/null +++ b/queue-5.10/netfilter-nf_queue-handle-socket-prefetch.patch @@ -0,0 +1,53 @@ +From 3b836da4081fa585cf6c392f62557496f2cb0efe Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Tue, 1 Mar 2022 00:46:19 +0100 +Subject: netfilter: nf_queue: handle socket prefetch + +From: Florian Westphal + +commit 3b836da4081fa585cf6c392f62557496f2cb0efe upstream. + +In case someone combines bpf socket assign and nf_queue, then we will +queue an skb who references a struct sock that did not have its +reference count incremented. + +As we leave rcu protection, there is no guarantee that skb->sk is still +valid. + +For refcount-less skb->sk case, try to increment the reference count +and then override the destructor. + +In case of failure we have two choices: orphan the skb and 'delete' +preselect or let nf_queue() drop the packet. + +Do the latter, it should not happen during normal operation. + +Fixes: cf7fbe660f2d ("bpf: Add socket assign support") +Acked-by: Joe Stringer +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_queue.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/net/netfilter/nf_queue.c ++++ b/net/netfilter/nf_queue.c +@@ -189,6 +189,18 @@ static int __nf_queue(struct sk_buff *sk + break; + } + ++ if (skb_sk_is_prefetched(skb)) { ++ struct sock *sk = skb->sk; ++ ++ if (!sk_is_refcounted(sk)) { ++ if (!refcount_inc_not_zero(&sk->sk_refcnt)) ++ return -ENOTCONN; ++ ++ /* drop refcount on skb_orphan */ ++ skb->destructor = sock_edemux; ++ } ++ } ++ + entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC); + if (!entry) + return -ENOMEM; diff --git a/queue-5.10/series b/queue-5.10/series index 6d5a1ad787a..a54405644e2 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -32,3 +32,14 @@ drm-i915-s-jsp2-icp2-pch.patch xen-netfront-destroy-queues-before-real_num_tx_queues-is-zeroed.patch thermal-core-fix-tz_get_trip-null-pointer-dereference.patch ntb-intel-fix-port-config-status-offset-for-spr.patch +mm-consider-__gfp_nowarn-flag-for-oversized-kvmalloc-calls.patch +xfrm-fix-mtu-regression.patch +netfilter-fix-use-after-free-in-__nf_register_net_hook.patch +bpf-sockmap-do-not-ignore-orig_len-parameter.patch +xfrm-fix-the-if_id-check-in-changelink.patch +xfrm-enforce-validity-of-offload-input-flags.patch +e1000e-correct-nvm-checksum-verification-flow.patch +net-fix-up-skbs-delta_truesize-in-udp-gro-frag_list.patch +netfilter-nf_queue-don-t-assume-sk-is-full-socket.patch +netfilter-nf_queue-fix-possible-use-after-free.patch +netfilter-nf_queue-handle-socket-prefetch.patch diff --git a/queue-5.10/xfrm-enforce-validity-of-offload-input-flags.patch b/queue-5.10/xfrm-enforce-validity-of-offload-input-flags.patch new file mode 100644 index 00000000000..c4ca62bc5f2 --- /dev/null +++ b/queue-5.10/xfrm-enforce-validity-of-offload-input-flags.patch @@ -0,0 +1,65 @@ +From 7c76ecd9c99b6e9a771d813ab1aa7fa428b3ade1 Mon Sep 17 00:00:00 2001 +From: Leon Romanovsky +Date: Tue, 8 Feb 2022 16:14:32 +0200 +Subject: xfrm: enforce validity of offload input flags + +From: Leon Romanovsky + +commit 7c76ecd9c99b6e9a771d813ab1aa7fa428b3ade1 upstream. + +struct xfrm_user_offload has flags variable that received user input, +but kernel didn't check if valid bits were provided. It caused a situation +where not sanitized input was forwarded directly to the drivers. + +For example, XFRM_OFFLOAD_IPV6 define that was exposed, was used by +strongswan, but not implemented in the kernel at all. + +As a solution, check and sanitize input flags to forward +XFRM_OFFLOAD_INBOUND to the drivers. + +Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") +Signed-off-by: Leon Romanovsky +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/xfrm.h | 6 ++++++ + net/xfrm/xfrm_device.c | 6 +++++- + 2 files changed, 11 insertions(+), 1 deletion(-) + +--- a/include/uapi/linux/xfrm.h ++++ b/include/uapi/linux/xfrm.h +@@ -506,6 +506,12 @@ struct xfrm_user_offload { + int ifindex; + __u8 flags; + }; ++/* This flag was exposed without any kernel code that supporting it. ++ * Unfortunately, strongswan has the code that uses sets this flag, ++ * which makes impossible to reuse this bit. ++ * ++ * So leave it here to make sure that it won't be reused by mistake. ++ */ + #define XFRM_OFFLOAD_IPV6 1 + #define XFRM_OFFLOAD_INBOUND 2 + +--- a/net/xfrm/xfrm_device.c ++++ b/net/xfrm/xfrm_device.c +@@ -223,6 +223,9 @@ int xfrm_dev_state_add(struct net *net, + if (x->encap || x->tfcpad) + return -EINVAL; + ++ if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) ++ return -EINVAL; ++ + dev = dev_get_by_index(net, xuo->ifindex); + if (!dev) { + if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { +@@ -261,7 +264,8 @@ int xfrm_dev_state_add(struct net *net, + xso->dev = dev; + xso->real_dev = dev; + xso->num_exthdrs = 1; +- xso->flags = xuo->flags; ++ /* Don't forward bit that is not implemented */ ++ xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6; + + err = dev->xfrmdev_ops->xdo_dev_state_add(x); + if (err) { diff --git a/queue-5.10/xfrm-fix-mtu-regression.patch b/queue-5.10/xfrm-fix-mtu-regression.patch new file mode 100644 index 00000000000..3ee4fabcbbd --- /dev/null +++ b/queue-5.10/xfrm-fix-mtu-regression.patch @@ -0,0 +1,79 @@ +From 6596a0229541270fb8d38d989f91b78838e5e9da Mon Sep 17 00:00:00 2001 +From: Jiri Bohac +Date: Wed, 19 Jan 2022 10:22:53 +0100 +Subject: xfrm: fix MTU regression + +From: Jiri Bohac + +commit 6596a0229541270fb8d38d989f91b78838e5e9da upstream. + +Commit 749439bfac6e1a2932c582e2699f91d329658196 ("ipv6: fix udpv6 +sendmsg crash caused by too small MTU") breaks PMTU for xfrm. + +A Packet Too Big ICMPv6 message received in response to an ESP +packet will prevent all further communication through the tunnel +if the reported MTU minus the ESP overhead is smaller than 1280. + +E.g. in a case of a tunnel-mode ESP with sha256/aes the overhead +is 92 bytes. Receiving a PTB with MTU of 1371 or less will result +in all further packets in the tunnel dropped. A ping through the +tunnel fails with "ping: sendmsg: Invalid argument". + +Apparently the MTU on the xfrm route is smaller than 1280 and +fails the check inside ip6_setup_cork() added by 749439bf. + +We found this by debugging USGv6/ipv6ready failures. Failing +tests are: "Phase-2 Interoperability Test Scenario IPsec" / +5.3.11 and 5.4.11 (Tunnel Mode: Fragmentation). + +Commit b515d2637276a3810d6595e10ab02c13bfd0b63a ("xfrm: +xfrm_state_mtu should return at least 1280 for ipv6") attempted +to fix this but caused another regression in TCP MSS calculations +and had to be reverted. + +The patch below fixes the situation by dropping the MTU +check and instead checking for the underflows described in the +749439bf commit message. + +Signed-off-by: Jiri Bohac +Fixes: 749439bfac6e ("ipv6: fix udpv6 sendmsg crash caused by too small MTU") +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1432,8 +1432,6 @@ static int ip6_setup_cork(struct sock *s + if (np->frag_size) + mtu = np->frag_size; + } +- if (mtu < IPV6_MIN_MTU) +- return -EINVAL; + cork->base.fragsize = mtu; + cork->base.gso_size = ipc6->gso_size; + cork->base.tx_flags = 0; +@@ -1495,8 +1493,6 @@ static int __ip6_append_data(struct sock + + fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + + (opt ? opt->opt_nflen : 0); +- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - +- sizeof(struct frag_hdr); + + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->opt_flen + opt->opt_nflen : 0) + +@@ -1504,6 +1500,13 @@ static int __ip6_append_data(struct sock + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + ++ if (mtu < fragheaderlen || ++ ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr)) ++ goto emsgsize; ++ ++ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - ++ sizeof(struct frag_hdr); ++ + /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit + * the first fragment + */ diff --git a/queue-5.10/xfrm-fix-the-if_id-check-in-changelink.patch b/queue-5.10/xfrm-fix-the-if_id-check-in-changelink.patch new file mode 100644 index 00000000000..b3d8a770862 --- /dev/null +++ b/queue-5.10/xfrm-fix-the-if_id-check-in-changelink.patch @@ -0,0 +1,36 @@ +From 6d0d95a1c2b07270870e7be16575c513c29af3f1 Mon Sep 17 00:00:00 2001 +From: Antony Antony +Date: Tue, 1 Feb 2022 07:51:57 +0100 +Subject: xfrm: fix the if_id check in changelink + +From: Antony Antony + +commit 6d0d95a1c2b07270870e7be16575c513c29af3f1 upstream. + +if_id will be always 0, because it was not yet initialized. + +Fixes: 8dce43919566 ("xfrm: interface with if_id 0 should return error") +Reported-by: Pavel Machek +Signed-off-by: Antony Antony +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + net/xfrm/xfrm_interface.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/xfrm/xfrm_interface.c ++++ b/net/xfrm/xfrm_interface.c +@@ -679,12 +679,12 @@ static int xfrmi_changelink(struct net_d + struct net *net = xi->net; + struct xfrm_if_parms p = {}; + ++ xfrmi_netlink_parms(data, &p); + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + +- xfrmi_netlink_parms(data, &p); + xi = xfrmi_locate(net, &p); + if (!xi) { + xi = netdev_priv(dev);