]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 5 Apr 2024 10:07:05 +0000 (12:07 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 5 Apr 2024 10:07:05 +0000 (12:07 +0200)
added patches:
bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch
netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch
netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch

queue-5.4/bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch [new file with mode: 0644]
queue-5.4/netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch [new file with mode: 0644]
queue-5.4/netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch [new file with mode: 0644]
queue-5.4/series

diff --git a/queue-5.4/bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch b/queue-5.4/bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch
new file mode 100644 (file)
index 0000000..90eaa4e
--- /dev/null
@@ -0,0 +1,74 @@
+From ff91059932401894e6c86341915615c5eb0eca48 Mon Sep 17 00:00:00 2001
+From: Jakub Sitnicki <jakub@cloudflare.com>
+Date: Tue, 2 Apr 2024 12:46:21 +0200
+Subject: bpf, sockmap: Prevent lock inversion deadlock in map delete elem
+
+From: Jakub Sitnicki <jakub@cloudflare.com>
+
+commit ff91059932401894e6c86341915615c5eb0eca48 upstream.
+
+syzkaller started using corpuses where a BPF tracing program deletes
+elements from a sockmap/sockhash map. Because BPF tracing programs can be
+invoked from any interrupt context, locks taken during a map_delete_elem
+operation must be hardirq-safe. Otherwise a deadlock due to lock inversion
+is possible, as reported by lockdep:
+
+       CPU0                    CPU1
+       ----                    ----
+  lock(&htab->buckets[i].lock);
+                               local_irq_disable();
+                               lock(&host->lock);
+                               lock(&htab->buckets[i].lock);
+  <Interrupt>
+    lock(&host->lock);
+
+Locks in sockmap are hardirq-unsafe by design. We expects elements to be
+deleted from sockmap/sockhash only in task (normal) context with interrupts
+enabled, or in softirq context.
+
+Detect when map_delete_elem operation is invoked from a context which is
+_not_ hardirq-unsafe, that is interrupts are disabled, and bail out with an
+error.
+
+Note that map updates are not affected by this issue. BPF verifier does not
+allow updating sockmap/sockhash from a BPF tracing program today.
+
+Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
+Reported-by: xingwei lee <xrivendell7@gmail.com>
+Reported-by: yue sun <samsun1006219@gmail.com>
+Reported-by: syzbot+bc922f476bd65abbd466@syzkaller.appspotmail.com
+Reported-by: syzbot+d4066896495db380182e@syzkaller.appspotmail.com
+Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: syzbot+d4066896495db380182e@syzkaller.appspotmail.com
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=d4066896495db380182e
+Closes: https://syzkaller.appspot.com/bug?extid=bc922f476bd65abbd466
+Link: https://lore.kernel.org/bpf/20240402104621.1050319-1-jakub@cloudflare.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_map.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -321,6 +321,9 @@ static int __sock_map_delete(struct bpf_
+       struct sock *sk;
+       int err = 0;
++      if (irqs_disabled())
++              return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
++
+       raw_spin_lock_bh(&stab->lock);
+       sk = *psk;
+       if (!sk_test || sk_test == sk)
+@@ -654,6 +657,9 @@ static int sock_hash_delete_elem(struct
+       struct bpf_htab_elem *elem;
+       int ret = -ENOENT;
++      if (irqs_disabled())
++              return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
++
+       hash = sock_hash_bucket_hash(key, key_size);
+       bucket = sock_hash_select_bucket(htab, hash);
diff --git a/queue-5.4/netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch b/queue-5.4/netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch
new file mode 100644 (file)
index 0000000..00d63e4
--- /dev/null
@@ -0,0 +1,58 @@
+From 24225011d81b471acc0e1e315b7d9905459a6304 Mon Sep 17 00:00:00 2001
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+Date: Wed, 3 Apr 2024 15:22:04 +0800
+Subject: netfilter: nf_tables: Fix potential data-race in __nft_flowtable_type_get()
+
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+
+commit 24225011d81b471acc0e1e315b7d9905459a6304 upstream.
+
+nft_unregister_flowtable_type() within nf_flow_inet_module_exit() can
+concurrent with __nft_flowtable_type_get() within nf_tables_newflowtable().
+And thhere is not any protection when iterate over nf_tables_flowtables
+list in __nft_flowtable_type_get(). Therefore, there is pertential
+data-race of nf_tables_flowtables list entry.
+
+Use list_for_each_entry_rcu() to iterate over nf_tables_flowtables list
+in __nft_flowtable_type_get(), and use rcu_read_lock() in the caller
+nft_flowtable_type_get() to protect the entire type query process.
+
+Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend")
+Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -6041,11 +6041,12 @@ static int nf_tables_flowtable_parse_hoo
+       return err;
+ }
++/* call under rcu_read_lock */
+ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
+ {
+       const struct nf_flowtable_type *type;
+-      list_for_each_entry(type, &nf_tables_flowtables, list) {
++      list_for_each_entry_rcu(type, &nf_tables_flowtables, list) {
+               if (family == type->family)
+                       return type;
+       }
+@@ -6057,9 +6058,13 @@ nft_flowtable_type_get(struct net *net,
+ {
+       const struct nf_flowtable_type *type;
++      rcu_read_lock();
+       type = __nft_flowtable_type_get(family);
+-      if (type != NULL && try_module_get(type->owner))
++      if (type != NULL && try_module_get(type->owner)) {
++              rcu_read_unlock();
+               return type;
++      }
++      rcu_read_unlock();
+       lockdep_nfnl_nft_mutex_not_held();
+ #ifdef CONFIG_MODULES
diff --git a/queue-5.4/netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch b/queue-5.4/netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch
new file mode 100644 (file)
index 0000000..3b1b92a
--- /dev/null
@@ -0,0 +1,125 @@
+From 24cea9677025e0de419989ecb692acd4bb34cac2 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 2 Apr 2024 18:04:36 +0200
+Subject: netfilter: nf_tables: flush pending destroy work before exit_net release
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 24cea9677025e0de419989ecb692acd4bb34cac2 upstream.
+
+Similar to 2c9f0293280e ("netfilter: nf_tables: flush pending destroy
+work before netlink notifier") to address a race between exit_net and
+the destroy workqueue.
+
+The trace below shows an element to be released via destroy workqueue
+while exit_net path (triggered via module removal) has already released
+the set that is used in such transaction.
+
+[ 1360.547789] BUG: KASAN: slab-use-after-free in nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables]
+[ 1360.547861] Read of size 8 at addr ffff888140500cc0 by task kworker/4:1/152465
+[ 1360.547870] CPU: 4 PID: 152465 Comm: kworker/4:1 Not tainted 6.8.0+ #359
+[ 1360.547882] Workqueue: events nf_tables_trans_destroy_work [nf_tables]
+[ 1360.547984] Call Trace:
+[ 1360.547991]  <TASK>
+[ 1360.547998]  dump_stack_lvl+0x53/0x70
+[ 1360.548014]  print_report+0xc4/0x610
+[ 1360.548026]  ? __virt_addr_valid+0xba/0x160
+[ 1360.548040]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
+[ 1360.548054]  ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables]
+[ 1360.548176]  kasan_report+0xae/0xe0
+[ 1360.548189]  ? nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables]
+[ 1360.548312]  nf_tables_trans_destroy_work+0x3f5/0x590 [nf_tables]
+[ 1360.548447]  ? __pfx_nf_tables_trans_destroy_work+0x10/0x10 [nf_tables]
+[ 1360.548577]  ? _raw_spin_unlock_irq+0x18/0x30
+[ 1360.548591]  process_one_work+0x2f1/0x670
+[ 1360.548610]  worker_thread+0x4d3/0x760
+[ 1360.548627]  ? __pfx_worker_thread+0x10/0x10
+[ 1360.548640]  kthread+0x16b/0x1b0
+[ 1360.548653]  ? __pfx_kthread+0x10/0x10
+[ 1360.548665]  ret_from_fork+0x2f/0x50
+[ 1360.548679]  ? __pfx_kthread+0x10/0x10
+[ 1360.548690]  ret_from_fork_asm+0x1a/0x30
+[ 1360.548707]  </TASK>
+
+[ 1360.548719] Allocated by task 192061:
+[ 1360.548726]  kasan_save_stack+0x20/0x40
+[ 1360.548739]  kasan_save_track+0x14/0x30
+[ 1360.548750]  __kasan_kmalloc+0x8f/0xa0
+[ 1360.548760]  __kmalloc_node+0x1f1/0x450
+[ 1360.548771]  nf_tables_newset+0x10c7/0x1b50 [nf_tables]
+[ 1360.548883]  nfnetlink_rcv_batch+0xbc4/0xdc0 [nfnetlink]
+[ 1360.548909]  nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink]
+[ 1360.548927]  netlink_unicast+0x367/0x4f0
+[ 1360.548935]  netlink_sendmsg+0x34b/0x610
+[ 1360.548944]  ____sys_sendmsg+0x4d4/0x510
+[ 1360.548953]  ___sys_sendmsg+0xc9/0x120
+[ 1360.548961]  __sys_sendmsg+0xbe/0x140
+[ 1360.548971]  do_syscall_64+0x55/0x120
+[ 1360.548982]  entry_SYSCALL_64_after_hwframe+0x55/0x5d
+
+[ 1360.548994] Freed by task 192222:
+[ 1360.548999]  kasan_save_stack+0x20/0x40
+[ 1360.549009]  kasan_save_track+0x14/0x30
+[ 1360.549019]  kasan_save_free_info+0x3b/0x60
+[ 1360.549028]  poison_slab_object+0x100/0x180
+[ 1360.549036]  __kasan_slab_free+0x14/0x30
+[ 1360.549042]  kfree+0xb6/0x260
+[ 1360.549049]  __nft_release_table+0x473/0x6a0 [nf_tables]
+[ 1360.549131]  nf_tables_exit_net+0x170/0x240 [nf_tables]
+[ 1360.549221]  ops_exit_list+0x50/0xa0
+[ 1360.549229]  free_exit_list+0x101/0x140
+[ 1360.549236]  unregister_pernet_operations+0x107/0x160
+[ 1360.549245]  unregister_pernet_subsys+0x1c/0x30
+[ 1360.549254]  nf_tables_module_exit+0x43/0x80 [nf_tables]
+[ 1360.549345]  __do_sys_delete_module+0x253/0x370
+[ 1360.549352]  do_syscall_64+0x55/0x120
+[ 1360.549360]  entry_SYSCALL_64_after_hwframe+0x55/0x5d
+
+(gdb) list *__nft_release_table+0x473
+0x1e033 is in __nft_release_table (net/netfilter/nf_tables_api.c:11354).
+11349           list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
+11350                   list_del(&flowtable->list);
+11351                   nft_use_dec(&table->use);
+11352                   nf_tables_flowtable_destroy(flowtable);
+11353           }
+11354           list_for_each_entry_safe(set, ns, &table->sets, list) {
+11355                   list_del(&set->list);
+11356                   nft_use_dec(&table->use);
+11357                   if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+11358                           nft_map_deactivate(&ctx, set);
+(gdb)
+
+[ 1360.549372] Last potentially related work creation:
+[ 1360.549376]  kasan_save_stack+0x20/0x40
+[ 1360.549384]  __kasan_record_aux_stack+0x9b/0xb0
+[ 1360.549392]  __queue_work+0x3fb/0x780
+[ 1360.549399]  queue_work_on+0x4f/0x60
+[ 1360.549407]  nft_rhash_remove+0x33b/0x340 [nf_tables]
+[ 1360.549516]  nf_tables_commit+0x1c6a/0x2620 [nf_tables]
+[ 1360.549625]  nfnetlink_rcv_batch+0x728/0xdc0 [nfnetlink]
+[ 1360.549647]  nfnetlink_rcv+0x1a8/0x1e0 [nfnetlink]
+[ 1360.549671]  netlink_unicast+0x367/0x4f0
+[ 1360.549680]  netlink_sendmsg+0x34b/0x610
+[ 1360.549690]  ____sys_sendmsg+0x4d4/0x510
+[ 1360.549697]  ___sys_sendmsg+0xc9/0x120
+[ 1360.549706]  __sys_sendmsg+0xbe/0x140
+[ 1360.549715]  do_syscall_64+0x55/0x120
+[ 1360.549725]  entry_SYSCALL_64_after_hwframe+0x55/0x5d
+
+Fixes: 0935d5588400 ("netfilter: nf_tables: asynchronous release")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -8476,6 +8476,7 @@ static void __exit nf_tables_module_exit
+       unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+       nft_chain_filter_fini();
+       nft_chain_route_fini();
++      nf_tables_trans_destroy_flush_work();
+       unregister_pernet_subsys(&nf_tables_net_ops);
+       cancel_work_sync(&trans_gc_work);
+       cancel_work_sync(&trans_destroy_work);
index e6e4aab93418580b7109228b74f14565d5bac420..caa725abfe9366b664a546f735cb5adceefeff3c 100644 (file)
@@ -146,3 +146,6 @@ vfio-pci-create-persistent-intx-handler.patch
 vfio-platform-create-persistent-irq-handlers.patch
 revert-x86-mm-ident_map-use-gbpages-only-where-full-gb-page-should-be-mapped.patch
 mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations.patch
+netfilter-nf_tables-flush-pending-destroy-work-before-exit_net-release.patch
+netfilter-nf_tables-fix-potential-data-race-in-__nft_flowtable_type_get.patch
+bpf-sockmap-prevent-lock-inversion-deadlock-in-map-delete-elem.patch