]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table
authorVlad Buslov <vladbu@nvidia.com>
Tue, 5 Dec 2023 17:25:54 +0000 (18:25 +0100)
committerDavid S. Miller <davem@davemloft.net>
Mon, 11 Dec 2023 09:59:58 +0000 (09:59 +0000)
The referenced change added custom cleanup code to act_ct to delete any
callbacks registered on the parent block when deleting the
tcf_ct_flow_table instance. However, the underlying issue is that the
drivers don't obtain the reference to the tcf_ct_flow_table instance when
registering callbacks which means that not only driver callbacks may still
be on the table when deleting it but also that the driver can still have
pointers to its internal nf_flowtable and can use it concurrently which
results either warning in netfilter[0] or use-after-free.

Fix the issue by taking a reference to the underlying struct
tcf_ct_flow_table instance when registering the callback and release the
reference when unregistering. Expose new API required for such reference
counting by adding two new callbacks to nf_flowtable_type and implementing
them for act_ct flowtable_ct type. This fixes the issue by extending the
lifetime of nf_flowtable until all users have unregistered.

[0]:
[106170.938634] ------------[ cut here ]------------
[106170.939111] WARNING: CPU: 21 PID: 3688 at include/net/netfilter/nf_flow_table.h:262 mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
[106170.940108] Modules linked in: act_ct nf_flow_table act_mirred act_skbedit act_tunnel_key vxlan cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa bonding openvswitch nsh rpcrdma rdma_ucm
ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_regis
try overlay mlx5_core
[106170.943496] CPU: 21 PID: 3688 Comm: kworker/u48:0 Not tainted 6.6.0-rc7_for_upstream_min_debug_2023_11_01_13_02 #1
[106170.944361] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[106170.945292] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
[106170.945846] RIP: 0010:mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
[106170.946413] Code: 89 ef 48 83 05 71 a4 14 00 01 e8 f4 06 04 e1 48 83 05 6c a4 14 00 01 48 83 c4 28 5b 5d 41 5c 41 5d c3 48 83 05 d1 8b 14 00 01 <0f> 0b 48 83 05 d7 8b 14 00 01 e9 96 fe ff ff 48 83 05 a2 90 14 00
[106170.947924] RSP: 0018:ffff88813ff0fcb8 EFLAGS: 00010202
[106170.948397] RAX: 0000000000000000 RBX: ffff88811eabac40 RCX: ffff88811eabad48
[106170.949040] RDX: ffff88811eab8000 RSI: ffffffffa02cd560 RDI: 0000000000000000
[106170.949679] RBP: ffff88811eab8000 R08: 0000000000000001 R09: ffffffffa0229700
[106170.950317] R10: ffff888103538fc0 R11: 0000000000000001 R12: ffff88811eabad58
[106170.950969] R13: ffff888110c01c00 R14: ffff888106b40000 R15: 0000000000000000
[106170.951616] FS:  0000000000000000(0000) GS:ffff88885fd40000(0000) knlGS:0000000000000000
[106170.952329] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[106170.952834] CR2: 00007f1cefd28cb0 CR3: 000000012181b006 CR4: 0000000000370ea0
[106170.953482] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[106170.954121] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[106170.954766] Call Trace:
[106170.955057]  <TASK>
[106170.955315]  ? __warn+0x79/0x120
[106170.955648]  ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
[106170.956172]  ? report_bug+0x17c/0x190
[106170.956537]  ? handle_bug+0x3c/0x60
[106170.956891]  ? exc_invalid_op+0x14/0x70
[106170.957264]  ? asm_exc_invalid_op+0x16/0x20
[106170.957666]  ? mlx5_del_flow_rules+0x10/0x310 [mlx5_core]
[106170.958172]  ? mlx5_tc_ct_block_flow_offload_add+0x1240/0x1240 [mlx5_core]
[106170.958788]  ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
[106170.959339]  ? mlx5_tc_ct_del_ft_cb+0xc6/0x2b0 [mlx5_core]
[106170.959854]  ? mapping_remove+0x154/0x1d0 [mlx5_core]
[106170.960342]  ? mlx5e_tc_action_miss_mapping_put+0x4f/0x80 [mlx5_core]
[106170.960927]  mlx5_tc_ct_delete_flow+0x76/0xc0 [mlx5_core]
[106170.961441]  mlx5_free_flow_attr_actions+0x13b/0x220 [mlx5_core]
[106170.962001]  mlx5e_tc_del_fdb_flow+0x22c/0x3b0 [mlx5_core]
[106170.962524]  mlx5e_tc_del_flow+0x95/0x3c0 [mlx5_core]
[106170.963034]  mlx5e_flow_put+0x73/0xe0 [mlx5_core]
[106170.963506]  mlx5e_put_flow_list+0x38/0x70 [mlx5_core]
[106170.964002]  mlx5e_rep_update_flows+0xec/0x290 [mlx5_core]
[106170.964525]  mlx5e_rep_neigh_update+0x1da/0x310 [mlx5_core]
[106170.965056]  process_one_work+0x13a/0x2c0
[106170.965443]  worker_thread+0x2e5/0x3f0
[106170.965808]  ? rescuer_thread+0x410/0x410
[106170.966192]  kthread+0xc6/0xf0
[106170.966515]  ? kthread_complete_and_exit+0x20/0x20
[106170.966970]  ret_from_fork+0x2d/0x50
[106170.967332]  ? kthread_complete_and_exit+0x20/0x20
[106170.967774]  ret_from_fork_asm+0x11/0x20
[106170.970466]  </TASK>
[106170.970726] ---[ end trace 0000000000000000 ]---

Fixes: 77ac5e40c44e ("net/sched: act_ct: remove and free nf_table callbacks")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/netfilter/nf_flow_table.h
net/sched/act_ct.c

index fe1507c1db828b14e6d684f1f1bca514dbb4103d..692d5955911c7dbf967110be2dd98e62c75f20af 100644 (file)
@@ -62,6 +62,8 @@ struct nf_flowtable_type {
                                                  enum flow_offload_tuple_dir dir,
                                                  struct nf_flow_rule *flow_rule);
        void                            (*free)(struct nf_flowtable *ft);
+       void                            (*get)(struct nf_flowtable *ft);
+       void                            (*put)(struct nf_flowtable *ft);
        nf_hookfn                       *hook;
        struct module                   *owner;
 };
@@ -240,6 +242,11 @@ nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
        }
 
        list_add_tail(&block_cb->list, &block->cb_list);
+       up_write(&flow_table->flow_block_lock);
+
+       if (flow_table->type->get)
+               flow_table->type->get(flow_table);
+       return 0;
 
 unlock:
        up_write(&flow_table->flow_block_lock);
@@ -262,6 +269,9 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
                WARN_ON(true);
        }
        up_write(&flow_table->flow_block_lock);
+
+       if (flow_table->type->put)
+               flow_table->type->put(flow_table);
 }
 
 void flow_offload_route_init(struct flow_offload *flow,
index b3f4a503ee2ba4fd9620567208e2e77ed80b41b1..f69c47945175b657da6d7368c94693e81465296f 100644 (file)
@@ -286,9 +286,31 @@ static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow)
               !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
 }
 
+static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft);
+
+static void tcf_ct_nf_get(struct nf_flowtable *ft)
+{
+       struct tcf_ct_flow_table *ct_ft =
+               container_of(ft, struct tcf_ct_flow_table, nf_ft);
+
+       tcf_ct_flow_table_get_ref(ct_ft);
+}
+
+static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft);
+
+static void tcf_ct_nf_put(struct nf_flowtable *ft)
+{
+       struct tcf_ct_flow_table *ct_ft =
+               container_of(ft, struct tcf_ct_flow_table, nf_ft);
+
+       tcf_ct_flow_table_put(ct_ft);
+}
+
 static struct nf_flowtable_type flowtable_ct = {
        .gc             = tcf_ct_flow_is_outdated,
        .action         = tcf_ct_flow_table_fill_actions,
+       .get            = tcf_ct_nf_get,
+       .put            = tcf_ct_nf_put,
        .owner          = THIS_MODULE,
 };
 
@@ -337,9 +359,13 @@ err_alloc:
        return err;
 }
 
+static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft)
+{
+       refcount_inc(&ct_ft->ref);
+}
+
 static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
 {
-       struct flow_block_cb *block_cb, *tmp_cb;
        struct tcf_ct_flow_table *ct_ft;
        struct flow_block *block;
 
@@ -347,13 +373,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
                             rwork);
        nf_flow_table_free(&ct_ft->nf_ft);
 
-       /* Remove any remaining callbacks before cleanup */
        block = &ct_ft->nf_ft.flow_block;
        down_write(&ct_ft->nf_ft.flow_block_lock);
-       list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) {
-               list_del(&block_cb->list);
-               flow_block_cb_free(block_cb);
-       }
+       WARN_ON(!list_empty(&block->cb_list));
        up_write(&ct_ft->nf_ft.flow_block_lock);
        kfree(ct_ft);