1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * net/sched/act_ct.c Connection Tracking action
5 * Authors: Paul Blakey <paulb@mellanox.com>
6 * Yossi Kuperman <yossiku@mellanox.com>
7 * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/skbuff.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/pkt_cls.h>
17 #include <linux/ipv6.h>
18 #include <linux/rhashtable.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
21 #include <net/pkt_cls.h>
22 #include <net/act_api.h>
24 #include <net/ipv6_frag.h>
25 #include <uapi/linux/tc_act/tc_ct.h>
26 #include <net/tc_act/tc_ct.h>
28 #include <net/netfilter/nf_flow_table.h>
29 #include <net/netfilter/nf_conntrack.h>
30 #include <net/netfilter/nf_conntrack_core.h>
31 #include <net/netfilter/nf_conntrack_zones.h>
32 #include <net/netfilter/nf_conntrack_helper.h>
33 #include <net/netfilter/nf_conntrack_acct.h>
34 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
35 #include <net/netfilter/nf_conntrack_act_ct.h>
36 #include <uapi/linux/netfilter/nf_nat.h>
38 static struct workqueue_struct
*act_ct_wq
;
39 static struct rhashtable zones_ht
;
40 static DEFINE_MUTEX(zones_mutex
);
42 struct tcf_ct_flow_table
{
43 struct rhash_head node
; /* In zones tables */
45 struct rcu_work rwork
;
46 struct nf_flowtable nf_ft
;
53 static const struct rhashtable_params zones_params
= {
54 .head_offset
= offsetof(struct tcf_ct_flow_table
, node
),
55 .key_offset
= offsetof(struct tcf_ct_flow_table
, zone
),
56 .key_len
= sizeof_field(struct tcf_ct_flow_table
, zone
),
57 .automatic_shrinking
= true,
60 static struct flow_action_entry
*
61 tcf_ct_flow_table_flow_action_get_next(struct flow_action
*flow_action
)
63 int i
= flow_action
->num_entries
++;
65 return &flow_action
->entries
[i
];
68 static void tcf_ct_add_mangle_action(struct flow_action
*action
,
69 enum flow_action_mangle_base htype
,
74 struct flow_action_entry
*entry
;
76 entry
= tcf_ct_flow_table_flow_action_get_next(action
);
77 entry
->id
= FLOW_ACTION_MANGLE
;
78 entry
->mangle
.htype
= htype
;
79 entry
->mangle
.mask
= ~mask
;
80 entry
->mangle
.offset
= offset
;
81 entry
->mangle
.val
= val
;
84 /* The following nat helper functions check if the inverted reverse tuple
85 * (target) is different then the current dir tuple - meaning nat for ports
86 * and/or ip is needed, and add the relevant mangle actions.
89 tcf_ct_flow_table_add_action_nat_ipv4(const struct nf_conntrack_tuple
*tuple
,
90 struct nf_conntrack_tuple target
,
91 struct flow_action
*action
)
93 if (memcmp(&target
.src
.u3
, &tuple
->src
.u3
, sizeof(target
.src
.u3
)))
94 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_IP4
,
95 offsetof(struct iphdr
, saddr
),
97 be32_to_cpu(target
.src
.u3
.ip
));
98 if (memcmp(&target
.dst
.u3
, &tuple
->dst
.u3
, sizeof(target
.dst
.u3
)))
99 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_IP4
,
100 offsetof(struct iphdr
, daddr
),
102 be32_to_cpu(target
.dst
.u3
.ip
));
106 tcf_ct_add_ipv6_addr_mangle_action(struct flow_action
*action
,
107 union nf_inet_addr
*addr
,
112 for (i
= 0; i
< sizeof(struct in6_addr
) / sizeof(u32
); i
++)
113 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_IP6
,
114 i
* sizeof(u32
) + offset
,
115 0xFFFFFFFF, be32_to_cpu(addr
->ip6
[i
]));
119 tcf_ct_flow_table_add_action_nat_ipv6(const struct nf_conntrack_tuple
*tuple
,
120 struct nf_conntrack_tuple target
,
121 struct flow_action
*action
)
123 if (memcmp(&target
.src
.u3
, &tuple
->src
.u3
, sizeof(target
.src
.u3
)))
124 tcf_ct_add_ipv6_addr_mangle_action(action
, &target
.src
.u3
,
125 offsetof(struct ipv6hdr
,
127 if (memcmp(&target
.dst
.u3
, &tuple
->dst
.u3
, sizeof(target
.dst
.u3
)))
128 tcf_ct_add_ipv6_addr_mangle_action(action
, &target
.dst
.u3
,
129 offsetof(struct ipv6hdr
,
134 tcf_ct_flow_table_add_action_nat_tcp(const struct nf_conntrack_tuple
*tuple
,
135 struct nf_conntrack_tuple target
,
136 struct flow_action
*action
)
138 __be16 target_src
= target
.src
.u
.tcp
.port
;
139 __be16 target_dst
= target
.dst
.u
.tcp
.port
;
141 if (target_src
!= tuple
->src
.u
.tcp
.port
)
142 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_TCP
,
143 offsetof(struct tcphdr
, source
),
144 0xFFFF, be16_to_cpu(target_src
));
145 if (target_dst
!= tuple
->dst
.u
.tcp
.port
)
146 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_TCP
,
147 offsetof(struct tcphdr
, dest
),
148 0xFFFF, be16_to_cpu(target_dst
));
152 tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple
*tuple
,
153 struct nf_conntrack_tuple target
,
154 struct flow_action
*action
)
156 __be16 target_src
= target
.src
.u
.udp
.port
;
157 __be16 target_dst
= target
.dst
.u
.udp
.port
;
159 if (target_src
!= tuple
->src
.u
.udp
.port
)
160 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_UDP
,
161 offsetof(struct udphdr
, source
),
162 0xFFFF, be16_to_cpu(target_src
));
163 if (target_dst
!= tuple
->dst
.u
.udp
.port
)
164 tcf_ct_add_mangle_action(action
, FLOW_ACT_MANGLE_HDR_TYPE_UDP
,
165 offsetof(struct udphdr
, dest
),
166 0xFFFF, be16_to_cpu(target_dst
));
169 static void tcf_ct_flow_table_add_action_meta(struct nf_conn
*ct
,
170 enum ip_conntrack_dir dir
,
171 struct flow_action
*action
)
173 struct nf_conn_labels
*ct_labels
;
174 struct flow_action_entry
*entry
;
175 enum ip_conntrack_info ctinfo
;
178 entry
= tcf_ct_flow_table_flow_action_get_next(action
);
179 entry
->id
= FLOW_ACTION_CT_METADATA
;
180 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
181 entry
->ct_metadata
.mark
= ct
->mark
;
183 ctinfo
= dir
== IP_CT_DIR_ORIGINAL
? IP_CT_ESTABLISHED
:
184 IP_CT_ESTABLISHED_REPLY
;
185 /* aligns with the CT reference on the SKB nf_ct_set */
186 entry
->ct_metadata
.cookie
= (unsigned long)ct
| ctinfo
;
187 entry
->ct_metadata
.orig_dir
= dir
== IP_CT_DIR_ORIGINAL
;
189 act_ct_labels
= entry
->ct_metadata
.labels
;
190 ct_labels
= nf_ct_labels_find(ct
);
192 memcpy(act_ct_labels
, ct_labels
->bits
, NF_CT_LABELS_MAX_SIZE
);
194 memset(act_ct_labels
, 0, NF_CT_LABELS_MAX_SIZE
);
197 static int tcf_ct_flow_table_add_action_nat(struct net
*net
,
199 enum ip_conntrack_dir dir
,
200 struct flow_action
*action
)
202 const struct nf_conntrack_tuple
*tuple
= &ct
->tuplehash
[dir
].tuple
;
203 struct nf_conntrack_tuple target
;
205 if (!(ct
->status
& IPS_NAT_MASK
))
208 nf_ct_invert_tuple(&target
, &ct
->tuplehash
[!dir
].tuple
);
210 switch (tuple
->src
.l3num
) {
212 tcf_ct_flow_table_add_action_nat_ipv4(tuple
, target
,
216 tcf_ct_flow_table_add_action_nat_ipv6(tuple
, target
,
223 switch (nf_ct_protonum(ct
)) {
225 tcf_ct_flow_table_add_action_nat_tcp(tuple
, target
, action
);
228 tcf_ct_flow_table_add_action_nat_udp(tuple
, target
, action
);
237 static int tcf_ct_flow_table_fill_actions(struct net
*net
,
238 const struct flow_offload
*flow
,
239 enum flow_offload_tuple_dir tdir
,
240 struct nf_flow_rule
*flow_rule
)
242 struct flow_action
*action
= &flow_rule
->rule
->action
;
243 int num_entries
= action
->num_entries
;
244 struct nf_conn
*ct
= flow
->ct
;
245 enum ip_conntrack_dir dir
;
249 case FLOW_OFFLOAD_DIR_ORIGINAL
:
250 dir
= IP_CT_DIR_ORIGINAL
;
252 case FLOW_OFFLOAD_DIR_REPLY
:
253 dir
= IP_CT_DIR_REPLY
;
259 err
= tcf_ct_flow_table_add_action_nat(net
, ct
, dir
, action
);
263 tcf_ct_flow_table_add_action_meta(ct
, dir
, action
);
267 /* Clear filled actions */
268 for (i
= num_entries
; i
< action
->num_entries
; i
++)
269 memset(&action
->entries
[i
], 0, sizeof(action
->entries
[i
]));
270 action
->num_entries
= num_entries
;
275 static struct nf_flowtable_type flowtable_ct
= {
276 .action
= tcf_ct_flow_table_fill_actions
,
277 .owner
= THIS_MODULE
,
280 static int tcf_ct_flow_table_get(struct tcf_ct_params
*params
)
282 struct tcf_ct_flow_table
*ct_ft
;
285 mutex_lock(&zones_mutex
);
286 ct_ft
= rhashtable_lookup_fast(&zones_ht
, ¶ms
->zone
, zones_params
);
287 if (ct_ft
&& refcount_inc_not_zero(&ct_ft
->ref
))
290 ct_ft
= kzalloc(sizeof(*ct_ft
), GFP_KERNEL
);
293 refcount_set(&ct_ft
->ref
, 1);
295 ct_ft
->zone
= params
->zone
;
296 err
= rhashtable_insert_fast(&zones_ht
, &ct_ft
->node
, zones_params
);
300 ct_ft
->nf_ft
.type
= &flowtable_ct
;
301 ct_ft
->nf_ft
.flags
|= NF_FLOWTABLE_HW_OFFLOAD
|
302 NF_FLOWTABLE_COUNTER
;
303 err
= nf_flow_table_init(&ct_ft
->nf_ft
);
307 __module_get(THIS_MODULE
);
309 params
->ct_ft
= ct_ft
;
310 params
->nf_ft
= &ct_ft
->nf_ft
;
311 mutex_unlock(&zones_mutex
);
316 rhashtable_remove_fast(&zones_ht
, &ct_ft
->node
, zones_params
);
320 mutex_unlock(&zones_mutex
);
324 static void tcf_ct_flow_table_cleanup_work(struct work_struct
*work
)
326 struct flow_block_cb
*block_cb
, *tmp_cb
;
327 struct tcf_ct_flow_table
*ct_ft
;
328 struct flow_block
*block
;
330 ct_ft
= container_of(to_rcu_work(work
), struct tcf_ct_flow_table
,
332 nf_flow_table_free(&ct_ft
->nf_ft
);
334 /* Remove any remaining callbacks before cleanup */
335 block
= &ct_ft
->nf_ft
.flow_block
;
336 down_write(&ct_ft
->nf_ft
.flow_block_lock
);
337 list_for_each_entry_safe(block_cb
, tmp_cb
, &block
->cb_list
, list
) {
338 list_del(&block_cb
->list
);
339 flow_block_cb_free(block_cb
);
341 up_write(&ct_ft
->nf_ft
.flow_block_lock
);
344 module_put(THIS_MODULE
);
347 static void tcf_ct_flow_table_put(struct tcf_ct_params
*params
)
349 struct tcf_ct_flow_table
*ct_ft
= params
->ct_ft
;
351 if (refcount_dec_and_test(¶ms
->ct_ft
->ref
)) {
352 rhashtable_remove_fast(&zones_ht
, &ct_ft
->node
, zones_params
);
353 INIT_RCU_WORK(&ct_ft
->rwork
, tcf_ct_flow_table_cleanup_work
);
354 queue_rcu_work(act_ct_wq
, &ct_ft
->rwork
);
358 static void tcf_ct_flow_tc_ifidx(struct flow_offload
*entry
,
359 struct nf_conn_act_ct_ext
*act_ct_ext
, u8 dir
)
361 entry
->tuplehash
[dir
].tuple
.xmit_type
= FLOW_OFFLOAD_XMIT_TC
;
362 entry
->tuplehash
[dir
].tuple
.tc
.iifidx
= act_ct_ext
->ifindex
[dir
];
365 static void tcf_ct_flow_table_add(struct tcf_ct_flow_table
*ct_ft
,
369 struct nf_conn_act_ct_ext
*act_ct_ext
;
370 struct flow_offload
*entry
;
373 if (test_and_set_bit(IPS_OFFLOAD_BIT
, &ct
->status
))
376 entry
= flow_offload_alloc(ct
);
383 ct
->proto
.tcp
.seen
[0].flags
|= IP_CT_TCP_FLAG_BE_LIBERAL
;
384 ct
->proto
.tcp
.seen
[1].flags
|= IP_CT_TCP_FLAG_BE_LIBERAL
;
387 act_ct_ext
= nf_conn_act_ct_ext_find(ct
);
389 tcf_ct_flow_tc_ifidx(entry
, act_ct_ext
, FLOW_OFFLOAD_DIR_ORIGINAL
);
390 tcf_ct_flow_tc_ifidx(entry
, act_ct_ext
, FLOW_OFFLOAD_DIR_REPLY
);
393 err
= flow_offload_add(&ct_ft
->nf_ft
, entry
);
400 flow_offload_free(entry
);
402 clear_bit(IPS_OFFLOAD_BIT
, &ct
->status
);
405 static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table
*ct_ft
,
407 enum ip_conntrack_info ctinfo
)
411 if ((ctinfo
!= IP_CT_ESTABLISHED
&& ctinfo
!= IP_CT_ESTABLISHED_REPLY
) ||
412 !test_bit(IPS_ASSURED_BIT
, &ct
->status
))
415 switch (nf_ct_protonum(ct
)) {
418 if (ct
->proto
.tcp
.state
!= TCP_CONNTRACK_ESTABLISHED
)
423 #ifdef CONFIG_NF_CT_PROTO_GRE
425 struct nf_conntrack_tuple
*tuple
;
427 if (ct
->status
& IPS_NAT_MASK
)
429 tuple
= &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
;
430 /* No support for GRE v1 */
431 if (tuple
->src
.u
.gre
.key
|| tuple
->dst
.u
.gre
.key
)
440 if (nf_ct_ext_exist(ct
, NF_CT_EXT_HELPER
) ||
441 ct
->status
& IPS_SEQ_ADJUST
)
444 tcf_ct_flow_table_add(ct_ft
, ct
, tcp
);
448 tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff
*skb
,
449 struct flow_offload_tuple
*tuple
,
450 struct tcphdr
**tcph
)
452 struct flow_ports
*ports
;
458 if (!pskb_network_may_pull(skb
, sizeof(*iph
)))
462 thoff
= iph
->ihl
* 4;
464 if (ip_is_fragment(iph
) ||
465 unlikely(thoff
!= sizeof(struct iphdr
)))
468 ipproto
= iph
->protocol
;
471 hdrsize
= sizeof(struct tcphdr
);
474 hdrsize
= sizeof(*ports
);
476 #ifdef CONFIG_NF_CT_PROTO_GRE
478 hdrsize
= sizeof(struct gre_base_hdr
);
488 if (!pskb_network_may_pull(skb
, thoff
+ hdrsize
))
493 *tcph
= (void *)(skb_network_header(skb
) + thoff
);
496 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
497 tuple
->src_port
= ports
->source
;
498 tuple
->dst_port
= ports
->dest
;
501 struct gre_base_hdr
*greh
;
503 greh
= (struct gre_base_hdr
*)(skb_network_header(skb
) + thoff
);
504 if ((greh
->flags
& GRE_VERSION
) != GRE_VERSION_0
)
512 tuple
->src_v4
.s_addr
= iph
->saddr
;
513 tuple
->dst_v4
.s_addr
= iph
->daddr
;
514 tuple
->l3proto
= AF_INET
;
515 tuple
->l4proto
= ipproto
;
521 tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff
*skb
,
522 struct flow_offload_tuple
*tuple
,
523 struct tcphdr
**tcph
)
525 struct flow_ports
*ports
;
526 struct ipv6hdr
*ip6h
;
531 if (!pskb_network_may_pull(skb
, sizeof(*ip6h
)))
534 ip6h
= ipv6_hdr(skb
);
535 thoff
= sizeof(*ip6h
);
537 nexthdr
= ip6h
->nexthdr
;
540 hdrsize
= sizeof(struct tcphdr
);
543 hdrsize
= sizeof(*ports
);
545 #ifdef CONFIG_NF_CT_PROTO_GRE
547 hdrsize
= sizeof(struct gre_base_hdr
);
554 if (ip6h
->hop_limit
<= 1)
557 if (!pskb_network_may_pull(skb
, thoff
+ hdrsize
))
562 *tcph
= (void *)(skb_network_header(skb
) + thoff
);
565 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
566 tuple
->src_port
= ports
->source
;
567 tuple
->dst_port
= ports
->dest
;
570 struct gre_base_hdr
*greh
;
572 greh
= (struct gre_base_hdr
*)(skb_network_header(skb
) + thoff
);
573 if ((greh
->flags
& GRE_VERSION
) != GRE_VERSION_0
)
579 ip6h
= ipv6_hdr(skb
);
581 tuple
->src_v6
= ip6h
->saddr
;
582 tuple
->dst_v6
= ip6h
->daddr
;
583 tuple
->l3proto
= AF_INET6
;
584 tuple
->l4proto
= nexthdr
;
589 static bool tcf_ct_flow_table_lookup(struct tcf_ct_params
*p
,
593 struct nf_flowtable
*nf_ft
= &p
->ct_ft
->nf_ft
;
594 struct flow_offload_tuple_rhash
*tuplehash
;
595 struct flow_offload_tuple tuple
= {};
596 enum ip_conntrack_info ctinfo
;
597 struct tcphdr
*tcph
= NULL
;
598 struct flow_offload
*flow
;
604 if (!tcf_ct_flow_table_fill_tuple_ipv4(skb
, &tuple
, &tcph
))
608 if (!tcf_ct_flow_table_fill_tuple_ipv6(skb
, &tuple
, &tcph
))
615 tuplehash
= flow_offload_lookup(nf_ft
, &tuple
);
619 dir
= tuplehash
->tuple
.dir
;
620 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
623 if (tcph
&& (unlikely(tcph
->fin
|| tcph
->rst
))) {
624 flow_offload_teardown(flow
);
628 ctinfo
= dir
== FLOW_OFFLOAD_DIR_ORIGINAL
? IP_CT_ESTABLISHED
:
629 IP_CT_ESTABLISHED_REPLY
;
631 flow_offload_refresh(nf_ft
, flow
);
632 nf_conntrack_get(&ct
->ct_general
);
633 nf_ct_set(skb
, ct
, ctinfo
);
634 if (nf_ft
->flags
& NF_FLOWTABLE_COUNTER
)
635 nf_ct_acct_update(ct
, dir
, skb
->len
);
640 static int tcf_ct_flow_tables_init(void)
642 return rhashtable_init(&zones_ht
, &zones_params
);
645 static void tcf_ct_flow_tables_uninit(void)
647 rhashtable_destroy(&zones_ht
);
650 static struct tc_action_ops act_ct_ops
;
651 static unsigned int ct_net_id
;
653 struct tc_ct_action_net
{
654 struct tc_action_net tn
; /* Must be first */
658 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
659 static bool tcf_ct_skb_nfct_cached(struct net
*net
, struct sk_buff
*skb
,
660 u16 zone_id
, bool force
)
662 enum ip_conntrack_info ctinfo
;
665 ct
= nf_ct_get(skb
, &ctinfo
);
668 if (!net_eq(net
, read_pnet(&ct
->ct_net
)))
670 if (nf_ct_zone(ct
)->id
!= zone_id
)
673 /* Force conntrack entry direction. */
674 if (force
&& CTINFO2DIR(ctinfo
) != IP_CT_DIR_ORIGINAL
) {
675 if (nf_ct_is_confirmed(ct
))
685 nf_ct_set(skb
, NULL
, IP_CT_UNTRACKED
);
690 /* Trim the skb to the length specified by the IP/IPv6 header,
691 * removing any trailing lower-layer padding. This prepares the skb
692 * for higher-layer processing that assumes skb->len excludes padding
693 * (such as nf_ip_checksum). The caller needs to pull the skb to the
694 * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
696 static int tcf_ct_skb_network_trim(struct sk_buff
*skb
, int family
)
703 len
= ntohs(ip_hdr(skb
)->tot_len
);
706 len
= sizeof(struct ipv6hdr
)
707 + ntohs(ipv6_hdr(skb
)->payload_len
);
713 err
= pskb_trim_rcsum(skb
, len
);
718 static u8
tcf_ct_skb_nf_family(struct sk_buff
*skb
)
720 u8 family
= NFPROTO_UNSPEC
;
722 switch (skb_protocol(skb
, true)) {
723 case htons(ETH_P_IP
):
724 family
= NFPROTO_IPV4
;
726 case htons(ETH_P_IPV6
):
727 family
= NFPROTO_IPV6
;
736 static int tcf_ct_ipv4_is_fragment(struct sk_buff
*skb
, bool *frag
)
740 len
= skb_network_offset(skb
) + sizeof(struct iphdr
);
741 if (unlikely(skb
->len
< len
))
743 if (unlikely(!pskb_may_pull(skb
, len
)))
746 *frag
= ip_is_fragment(ip_hdr(skb
));
750 static int tcf_ct_ipv6_is_fragment(struct sk_buff
*skb
, bool *frag
)
752 unsigned int flags
= 0, len
, payload_ofs
= 0;
753 unsigned short frag_off
;
756 len
= skb_network_offset(skb
) + sizeof(struct ipv6hdr
);
757 if (unlikely(skb
->len
< len
))
759 if (unlikely(!pskb_may_pull(skb
, len
)))
762 nexthdr
= ipv6_find_hdr(skb
, &payload_ofs
, -1, &frag_off
, &flags
);
763 if (unlikely(nexthdr
< 0))
766 *frag
= flags
& IP6_FH_F_FRAG
;
770 static int tcf_ct_handle_fragments(struct net
*net
, struct sk_buff
*skb
,
771 u8 family
, u16 zone
, bool *defrag
)
773 enum ip_conntrack_info ctinfo
;
779 /* Previously seen (loopback)? Ignore. */
780 ct
= nf_ct_get(skb
, &ctinfo
);
781 if ((ct
&& !nf_ct_is_template(ct
)) || ctinfo
== IP_CT_UNTRACKED
)
784 if (family
== NFPROTO_IPV4
)
785 err
= tcf_ct_ipv4_is_fragment(skb
, &frag
);
787 err
= tcf_ct_ipv6_is_fragment(skb
, &frag
);
792 mru
= tc_skb_cb(skb
)->mru
;
794 if (family
== NFPROTO_IPV4
) {
795 enum ip_defrag_users user
= IP_DEFRAG_CONNTRACK_IN
+ zone
;
797 memset(IPCB(skb
), 0, sizeof(struct inet_skb_parm
));
799 err
= ip_defrag(net
, skb
, user
);
801 if (err
&& err
!= -EINPROGRESS
)
806 mru
= IPCB(skb
)->frag_max_size
;
808 } else { /* NFPROTO_IPV6 */
809 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
810 enum ip6_defrag_users user
= IP6_DEFRAG_CONNTRACK_IN
+ zone
;
812 memset(IP6CB(skb
), 0, sizeof(struct inet6_skb_parm
));
813 err
= nf_ct_frag6_gather(net
, skb
, user
);
814 if (err
&& err
!= -EINPROGRESS
)
819 mru
= IP6CB(skb
)->frag_max_size
;
827 if (err
!= -EINPROGRESS
)
828 tc_skb_cb(skb
)->mru
= mru
;
838 static void tcf_ct_params_free(struct rcu_head
*head
)
840 struct tcf_ct_params
*params
= container_of(head
,
841 struct tcf_ct_params
, rcu
);
843 tcf_ct_flow_table_put(params
);
846 nf_ct_put(params
->tmpl
);
850 #if IS_ENABLED(CONFIG_NF_NAT)
851 /* Modelled after nf_nat_ipv[46]_fn().
852 * range is only used for new, uninitialized NAT state.
853 * Returns either NF_ACCEPT or NF_DROP.
855 static int ct_nat_execute(struct sk_buff
*skb
, struct nf_conn
*ct
,
856 enum ip_conntrack_info ctinfo
,
857 const struct nf_nat_range2
*range
,
858 enum nf_nat_manip_type maniptype
)
860 __be16 proto
= skb_protocol(skb
, true);
861 int hooknum
, err
= NF_ACCEPT
;
863 /* See HOOK2MANIP(). */
864 if (maniptype
== NF_NAT_MANIP_SRC
)
865 hooknum
= NF_INET_LOCAL_IN
; /* Source NAT */
867 hooknum
= NF_INET_LOCAL_OUT
; /* Destination NAT */
871 case IP_CT_RELATED_REPLY
:
872 if (proto
== htons(ETH_P_IP
) &&
873 ip_hdr(skb
)->protocol
== IPPROTO_ICMP
) {
874 if (!nf_nat_icmp_reply_translation(skb
, ct
, ctinfo
,
878 } else if (IS_ENABLED(CONFIG_IPV6
) && proto
== htons(ETH_P_IPV6
)) {
880 u8 nexthdr
= ipv6_hdr(skb
)->nexthdr
;
881 int hdrlen
= ipv6_skip_exthdr(skb
,
882 sizeof(struct ipv6hdr
),
883 &nexthdr
, &frag_off
);
885 if (hdrlen
>= 0 && nexthdr
== IPPROTO_ICMPV6
) {
886 if (!nf_nat_icmpv6_reply_translation(skb
, ct
,
894 /* Non-ICMP, fall thru to initialize if needed. */
897 /* Seen it before? This can happen for loopback, retrans,
900 if (!nf_nat_initialized(ct
, maniptype
)) {
901 /* Initialize according to the NAT action. */
902 err
= (range
&& range
->flags
& NF_NAT_RANGE_MAP_IPS
)
903 /* Action is set up to establish a new
906 ? nf_nat_setup_info(ct
, range
, maniptype
)
907 : nf_nat_alloc_null_binding(ct
, hooknum
);
908 if (err
!= NF_ACCEPT
)
913 case IP_CT_ESTABLISHED
:
914 case IP_CT_ESTABLISHED_REPLY
:
922 err
= nf_nat_packet(ct
, ctinfo
, hooknum
, skb
);
923 if (err
== NF_ACCEPT
) {
924 if (maniptype
== NF_NAT_MANIP_SRC
)
925 tc_skb_cb(skb
)->post_ct_snat
= 1;
926 if (maniptype
== NF_NAT_MANIP_DST
)
927 tc_skb_cb(skb
)->post_ct_dnat
= 1;
932 #endif /* CONFIG_NF_NAT */
934 static void tcf_ct_act_set_mark(struct nf_conn
*ct
, u32 mark
, u32 mask
)
936 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
942 new_mark
= mark
| (ct
->mark
& ~(mask
));
943 if (ct
->mark
!= new_mark
) {
945 if (nf_ct_is_confirmed(ct
))
946 nf_conntrack_event_cache(IPCT_MARK
, ct
);
951 static void tcf_ct_act_set_labels(struct nf_conn
*ct
,
955 #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
956 size_t labels_sz
= sizeof_field(struct tcf_ct_params
, labels
);
958 if (!memchr_inv(labels_m
, 0, labels_sz
))
961 nf_connlabels_replace(ct
, labels
, labels_m
, 4);
965 static int tcf_ct_act_nat(struct sk_buff
*skb
,
967 enum ip_conntrack_info ctinfo
,
969 struct nf_nat_range2
*range
,
972 #if IS_ENABLED(CONFIG_NF_NAT)
974 enum nf_nat_manip_type maniptype
;
976 if (!(ct_action
& TCA_CT_ACT_NAT
))
979 /* Add NAT extension if not confirmed yet. */
980 if (!nf_ct_is_confirmed(ct
) && !nf_ct_nat_ext_add(ct
))
981 return NF_DROP
; /* Can't NAT. */
983 if (ctinfo
!= IP_CT_NEW
&& (ct
->status
& IPS_NAT_MASK
) &&
984 (ctinfo
!= IP_CT_RELATED
|| commit
)) {
985 /* NAT an established or related connection like before. */
986 if (CTINFO2DIR(ctinfo
) == IP_CT_DIR_REPLY
)
987 /* This is the REPLY direction for a connection
988 * for which NAT was applied in the forward
989 * direction. Do the reverse NAT.
991 maniptype
= ct
->status
& IPS_SRC_NAT
992 ? NF_NAT_MANIP_DST
: NF_NAT_MANIP_SRC
;
994 maniptype
= ct
->status
& IPS_SRC_NAT
995 ? NF_NAT_MANIP_SRC
: NF_NAT_MANIP_DST
;
996 } else if (ct_action
& TCA_CT_ACT_NAT_SRC
) {
997 maniptype
= NF_NAT_MANIP_SRC
;
998 } else if (ct_action
& TCA_CT_ACT_NAT_DST
) {
999 maniptype
= NF_NAT_MANIP_DST
;
1004 err
= ct_nat_execute(skb
, ct
, ctinfo
, range
, maniptype
);
1005 if (err
== NF_ACCEPT
&& ct
->status
& IPS_DST_NAT
) {
1006 if (ct
->status
& IPS_SRC_NAT
) {
1007 if (maniptype
== NF_NAT_MANIP_SRC
)
1008 maniptype
= NF_NAT_MANIP_DST
;
1010 maniptype
= NF_NAT_MANIP_SRC
;
1012 err
= ct_nat_execute(skb
, ct
, ctinfo
, range
,
1014 } else if (CTINFO2DIR(ctinfo
) == IP_CT_DIR_ORIGINAL
) {
1015 err
= ct_nat_execute(skb
, ct
, ctinfo
, NULL
,
1025 static int tcf_ct_act(struct sk_buff
*skb
, const struct tc_action
*a
,
1026 struct tcf_result
*res
)
1028 struct net
*net
= dev_net(skb
->dev
);
1029 bool cached
, commit
, clear
, force
;
1030 enum ip_conntrack_info ctinfo
;
1031 struct tcf_ct
*c
= to_ct(a
);
1032 struct nf_conn
*tmpl
= NULL
;
1033 struct nf_hook_state state
;
1034 int nh_ofs
, err
, retval
;
1035 struct tcf_ct_params
*p
;
1036 bool skip_add
= false;
1037 bool defrag
= false;
1041 p
= rcu_dereference_bh(c
->params
);
1043 retval
= READ_ONCE(c
->tcf_action
);
1044 commit
= p
->ct_action
& TCA_CT_ACT_COMMIT
;
1045 clear
= p
->ct_action
& TCA_CT_ACT_CLEAR
;
1046 force
= p
->ct_action
& TCA_CT_ACT_FORCE
;
1049 tcf_lastuse_update(&c
->tcf_tm
);
1050 tcf_action_update_bstats(&c
->common
, skb
);
1053 tc_skb_cb(skb
)->post_ct
= false;
1054 ct
= nf_ct_get(skb
, &ctinfo
);
1057 nf_ct_set(skb
, NULL
, IP_CT_UNTRACKED
);
1063 family
= tcf_ct_skb_nf_family(skb
);
1064 if (family
== NFPROTO_UNSPEC
)
1067 /* The conntrack module expects to be working at L3.
1068 * We also try to pull the IPv4/6 header to linear area
1070 nh_ofs
= skb_network_offset(skb
);
1071 skb_pull_rcsum(skb
, nh_ofs
);
1072 err
= tcf_ct_handle_fragments(net
, skb
, family
, p
->zone
, &defrag
);
1073 if (err
== -EINPROGRESS
) {
1074 retval
= TC_ACT_STOLEN
;
1080 err
= tcf_ct_skb_network_trim(skb
, family
);
1084 /* If we are recirculating packets to match on ct fields and
1085 * committing with a separate ct action, then we don't need to
1086 * actually run the packet through conntrack twice unless it's for a
1089 cached
= tcf_ct_skb_nfct_cached(net
, skb
, p
->zone
, force
);
1091 if (tcf_ct_flow_table_lookup(p
, skb
, family
)) {
1096 /* Associate skb with specified zone. */
1098 nf_conntrack_put(skb_nfct(skb
));
1099 nf_conntrack_get(&tmpl
->ct_general
);
1100 nf_ct_set(skb
, tmpl
, IP_CT_NEW
);
1103 state
.hook
= NF_INET_PRE_ROUTING
;
1106 err
= nf_conntrack_in(skb
, &state
);
1107 if (err
!= NF_ACCEPT
)
1112 ct
= nf_ct_get(skb
, &ctinfo
);
1115 nf_ct_deliver_cached_events(ct
);
1116 nf_conn_act_ct_ext_fill(skb
, ct
, ctinfo
);
1118 err
= tcf_ct_act_nat(skb
, ct
, ctinfo
, p
->ct_action
, &p
->range
, commit
);
1119 if (err
!= NF_ACCEPT
)
1123 tcf_ct_act_set_mark(ct
, p
->mark
, p
->mark_mask
);
1124 tcf_ct_act_set_labels(ct
, p
->labels
, p
->labels_mask
);
1126 if (!nf_ct_is_confirmed(ct
))
1127 nf_conn_act_ct_ext_add(ct
);
1129 /* This will take care of sending queued events
1130 * even if the connection is already confirmed.
1132 if (nf_conntrack_confirm(skb
) != NF_ACCEPT
)
1137 tcf_ct_flow_table_process_conn(p
->ct_ft
, ct
, ctinfo
);
1140 skb_push_rcsum(skb
, nh_ofs
);
1142 tc_skb_cb(skb
)->post_ct
= true;
1143 tc_skb_cb(skb
)->zone
= p
->zone
;
1146 qdisc_skb_cb(skb
)->pkt_len
= skb
->len
;
1150 tcf_action_inc_drop_qstats(&c
->common
);
1154 static const struct nla_policy ct_policy
[TCA_CT_MAX
+ 1] = {
1155 [TCA_CT_ACTION
] = { .type
= NLA_U16
},
1156 [TCA_CT_PARMS
] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_ct
)),
1157 [TCA_CT_ZONE
] = { .type
= NLA_U16
},
1158 [TCA_CT_MARK
] = { .type
= NLA_U32
},
1159 [TCA_CT_MARK_MASK
] = { .type
= NLA_U32
},
1160 [TCA_CT_LABELS
] = { .type
= NLA_BINARY
,
1161 .len
= 128 / BITS_PER_BYTE
},
1162 [TCA_CT_LABELS_MASK
] = { .type
= NLA_BINARY
,
1163 .len
= 128 / BITS_PER_BYTE
},
1164 [TCA_CT_NAT_IPV4_MIN
] = { .type
= NLA_U32
},
1165 [TCA_CT_NAT_IPV4_MAX
] = { .type
= NLA_U32
},
1166 [TCA_CT_NAT_IPV6_MIN
] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr
)),
1167 [TCA_CT_NAT_IPV6_MAX
] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr
)),
1168 [TCA_CT_NAT_PORT_MIN
] = { .type
= NLA_U16
},
1169 [TCA_CT_NAT_PORT_MAX
] = { .type
= NLA_U16
},
1172 static int tcf_ct_fill_params_nat(struct tcf_ct_params
*p
,
1175 struct netlink_ext_ack
*extack
)
1177 struct nf_nat_range2
*range
;
1179 if (!(p
->ct_action
& TCA_CT_ACT_NAT
))
1182 if (!IS_ENABLED(CONFIG_NF_NAT
)) {
1183 NL_SET_ERR_MSG_MOD(extack
, "Netfilter nat isn't enabled in kernel");
1187 if (!(p
->ct_action
& (TCA_CT_ACT_NAT_SRC
| TCA_CT_ACT_NAT_DST
)))
1190 if ((p
->ct_action
& TCA_CT_ACT_NAT_SRC
) &&
1191 (p
->ct_action
& TCA_CT_ACT_NAT_DST
)) {
1192 NL_SET_ERR_MSG_MOD(extack
, "dnat and snat can't be enabled at the same time");
1197 if (tb
[TCA_CT_NAT_IPV4_MIN
]) {
1198 struct nlattr
*max_attr
= tb
[TCA_CT_NAT_IPV4_MAX
];
1200 p
->ipv4_range
= true;
1201 range
->flags
|= NF_NAT_RANGE_MAP_IPS
;
1202 range
->min_addr
.ip
=
1203 nla_get_in_addr(tb
[TCA_CT_NAT_IPV4_MIN
]);
1205 range
->max_addr
.ip
= max_attr
?
1206 nla_get_in_addr(max_attr
) :
1208 } else if (tb
[TCA_CT_NAT_IPV6_MIN
]) {
1209 struct nlattr
*max_attr
= tb
[TCA_CT_NAT_IPV6_MAX
];
1211 p
->ipv4_range
= false;
1212 range
->flags
|= NF_NAT_RANGE_MAP_IPS
;
1213 range
->min_addr
.in6
=
1214 nla_get_in6_addr(tb
[TCA_CT_NAT_IPV6_MIN
]);
1216 range
->max_addr
.in6
= max_attr
?
1217 nla_get_in6_addr(max_attr
) :
1218 range
->min_addr
.in6
;
1221 if (tb
[TCA_CT_NAT_PORT_MIN
]) {
1222 range
->flags
|= NF_NAT_RANGE_PROTO_SPECIFIED
;
1223 range
->min_proto
.all
= nla_get_be16(tb
[TCA_CT_NAT_PORT_MIN
]);
1225 range
->max_proto
.all
= tb
[TCA_CT_NAT_PORT_MAX
] ?
1226 nla_get_be16(tb
[TCA_CT_NAT_PORT_MAX
]) :
1227 range
->min_proto
.all
;
1233 static void tcf_ct_set_key_val(struct nlattr
**tb
,
1234 void *val
, int val_type
,
1235 void *mask
, int mask_type
,
1240 nla_memcpy(val
, tb
[val_type
], len
);
1245 if (mask_type
== TCA_CT_UNSPEC
|| !tb
[mask_type
])
1246 memset(mask
, 0xff, len
);
1248 nla_memcpy(mask
, tb
[mask_type
], len
);
1251 static int tcf_ct_fill_params(struct net
*net
,
1252 struct tcf_ct_params
*p
,
1255 struct netlink_ext_ack
*extack
)
1257 struct tc_ct_action_net
*tn
= net_generic(net
, ct_net_id
);
1258 struct nf_conntrack_zone zone
;
1259 struct nf_conn
*tmpl
;
1262 p
->zone
= NF_CT_DEFAULT_ZONE_ID
;
1264 tcf_ct_set_key_val(tb
,
1265 &p
->ct_action
, TCA_CT_ACTION
,
1266 NULL
, TCA_CT_UNSPEC
,
1267 sizeof(p
->ct_action
));
1269 if (p
->ct_action
& TCA_CT_ACT_CLEAR
)
1272 err
= tcf_ct_fill_params_nat(p
, parm
, tb
, extack
);
1276 if (tb
[TCA_CT_MARK
]) {
1277 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK
)) {
1278 NL_SET_ERR_MSG_MOD(extack
, "Conntrack mark isn't enabled.");
1281 tcf_ct_set_key_val(tb
,
1282 &p
->mark
, TCA_CT_MARK
,
1283 &p
->mark_mask
, TCA_CT_MARK_MASK
,
1287 if (tb
[TCA_CT_LABELS
]) {
1288 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS
)) {
1289 NL_SET_ERR_MSG_MOD(extack
, "Conntrack labels isn't enabled.");
1294 NL_SET_ERR_MSG_MOD(extack
, "Failed to set connlabel length");
1297 tcf_ct_set_key_val(tb
,
1298 p
->labels
, TCA_CT_LABELS
,
1299 p
->labels_mask
, TCA_CT_LABELS_MASK
,
1303 if (tb
[TCA_CT_ZONE
]) {
1304 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES
)) {
1305 NL_SET_ERR_MSG_MOD(extack
, "Conntrack zones isn't enabled.");
1309 tcf_ct_set_key_val(tb
,
1310 &p
->zone
, TCA_CT_ZONE
,
1311 NULL
, TCA_CT_UNSPEC
,
1315 nf_ct_zone_init(&zone
, p
->zone
, NF_CT_DEFAULT_ZONE_DIR
, 0);
1316 tmpl
= nf_ct_tmpl_alloc(net
, &zone
, GFP_KERNEL
);
1318 NL_SET_ERR_MSG_MOD(extack
, "Failed to allocate conntrack template");
1321 __set_bit(IPS_CONFIRMED_BIT
, &tmpl
->status
);
1327 static int tcf_ct_init(struct net
*net
, struct nlattr
*nla
,
1328 struct nlattr
*est
, struct tc_action
**a
,
1329 struct tcf_proto
*tp
, u32 flags
,
1330 struct netlink_ext_ack
*extack
)
1332 struct tc_action_net
*tn
= net_generic(net
, ct_net_id
);
1333 bool bind
= flags
& TCA_ACT_FLAGS_BIND
;
1334 struct tcf_ct_params
*params
= NULL
;
1335 struct nlattr
*tb
[TCA_CT_MAX
+ 1];
1336 struct tcf_chain
*goto_ch
= NULL
;
1343 NL_SET_ERR_MSG_MOD(extack
, "Ct requires attributes to be passed");
1347 err
= nla_parse_nested(tb
, TCA_CT_MAX
, nla
, ct_policy
, extack
);
1351 if (!tb
[TCA_CT_PARMS
]) {
1352 NL_SET_ERR_MSG_MOD(extack
, "Missing required ct parameters");
1355 parm
= nla_data(tb
[TCA_CT_PARMS
]);
1356 index
= parm
->index
;
1357 err
= tcf_idr_check_alloc(tn
, &index
, a
, bind
);
1362 err
= tcf_idr_create_from_flags(tn
, index
, est
, a
,
1363 &act_ct_ops
, bind
, flags
);
1365 tcf_idr_cleanup(tn
, index
);
1368 res
= ACT_P_CREATED
;
1373 if (!(flags
& TCA_ACT_FLAGS_REPLACE
)) {
1374 tcf_idr_release(*a
, bind
);
1378 err
= tcf_action_check_ctrlact(parm
->action
, tp
, &goto_ch
, extack
);
1384 params
= kzalloc(sizeof(*params
), GFP_KERNEL
);
1385 if (unlikely(!params
)) {
1390 err
= tcf_ct_fill_params(net
, params
, parm
, tb
, extack
);
1394 err
= tcf_ct_flow_table_get(params
);
1398 spin_lock_bh(&c
->tcf_lock
);
1399 goto_ch
= tcf_action_set_ctrlact(*a
, parm
->action
, goto_ch
);
1400 params
= rcu_replace_pointer(c
->params
, params
,
1401 lockdep_is_held(&c
->tcf_lock
));
1402 spin_unlock_bh(&c
->tcf_lock
);
1405 tcf_chain_put_by_act(goto_ch
);
1407 call_rcu(¶ms
->rcu
, tcf_ct_params_free
);
1413 tcf_chain_put_by_act(goto_ch
);
1415 tcf_idr_release(*a
, bind
);
1419 static void tcf_ct_cleanup(struct tc_action
*a
)
1421 struct tcf_ct_params
*params
;
1422 struct tcf_ct
*c
= to_ct(a
);
1424 params
= rcu_dereference_protected(c
->params
, 1);
1426 call_rcu(¶ms
->rcu
, tcf_ct_params_free
);
1429 static int tcf_ct_dump_key_val(struct sk_buff
*skb
,
1430 void *val
, int val_type
,
1431 void *mask
, int mask_type
,
1436 if (mask
&& !memchr_inv(mask
, 0, len
))
1439 err
= nla_put(skb
, val_type
, len
, val
);
1443 if (mask_type
!= TCA_CT_UNSPEC
) {
1444 err
= nla_put(skb
, mask_type
, len
, mask
);
1452 static int tcf_ct_dump_nat(struct sk_buff
*skb
, struct tcf_ct_params
*p
)
1454 struct nf_nat_range2
*range
= &p
->range
;
1456 if (!(p
->ct_action
& TCA_CT_ACT_NAT
))
1459 if (!(p
->ct_action
& (TCA_CT_ACT_NAT_SRC
| TCA_CT_ACT_NAT_DST
)))
1462 if (range
->flags
& NF_NAT_RANGE_MAP_IPS
) {
1463 if (p
->ipv4_range
) {
1464 if (nla_put_in_addr(skb
, TCA_CT_NAT_IPV4_MIN
,
1465 range
->min_addr
.ip
))
1467 if (nla_put_in_addr(skb
, TCA_CT_NAT_IPV4_MAX
,
1468 range
->max_addr
.ip
))
1471 if (nla_put_in6_addr(skb
, TCA_CT_NAT_IPV6_MIN
,
1472 &range
->min_addr
.in6
))
1474 if (nla_put_in6_addr(skb
, TCA_CT_NAT_IPV6_MAX
,
1475 &range
->max_addr
.in6
))
1480 if (range
->flags
& NF_NAT_RANGE_PROTO_SPECIFIED
) {
1481 if (nla_put_be16(skb
, TCA_CT_NAT_PORT_MIN
,
1482 range
->min_proto
.all
))
1484 if (nla_put_be16(skb
, TCA_CT_NAT_PORT_MAX
,
1485 range
->max_proto
.all
))
1492 static inline int tcf_ct_dump(struct sk_buff
*skb
, struct tc_action
*a
,
1495 unsigned char *b
= skb_tail_pointer(skb
);
1496 struct tcf_ct
*c
= to_ct(a
);
1497 struct tcf_ct_params
*p
;
1499 struct tc_ct opt
= {
1500 .index
= c
->tcf_index
,
1501 .refcnt
= refcount_read(&c
->tcf_refcnt
) - ref
,
1502 .bindcnt
= atomic_read(&c
->tcf_bindcnt
) - bind
,
1506 spin_lock_bh(&c
->tcf_lock
);
1507 p
= rcu_dereference_protected(c
->params
,
1508 lockdep_is_held(&c
->tcf_lock
));
1509 opt
.action
= c
->tcf_action
;
1511 if (tcf_ct_dump_key_val(skb
,
1512 &p
->ct_action
, TCA_CT_ACTION
,
1513 NULL
, TCA_CT_UNSPEC
,
1514 sizeof(p
->ct_action
)))
1515 goto nla_put_failure
;
1517 if (p
->ct_action
& TCA_CT_ACT_CLEAR
)
1520 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK
) &&
1521 tcf_ct_dump_key_val(skb
,
1522 &p
->mark
, TCA_CT_MARK
,
1523 &p
->mark_mask
, TCA_CT_MARK_MASK
,
1525 goto nla_put_failure
;
1527 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS
) &&
1528 tcf_ct_dump_key_val(skb
,
1529 p
->labels
, TCA_CT_LABELS
,
1530 p
->labels_mask
, TCA_CT_LABELS_MASK
,
1532 goto nla_put_failure
;
1534 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES
) &&
1535 tcf_ct_dump_key_val(skb
,
1536 &p
->zone
, TCA_CT_ZONE
,
1537 NULL
, TCA_CT_UNSPEC
,
1539 goto nla_put_failure
;
1541 if (tcf_ct_dump_nat(skb
, p
))
1542 goto nla_put_failure
;
1545 if (nla_put(skb
, TCA_CT_PARMS
, sizeof(opt
), &opt
))
1546 goto nla_put_failure
;
1548 tcf_tm_dump(&t
, &c
->tcf_tm
);
1549 if (nla_put_64bit(skb
, TCA_CT_TM
, sizeof(t
), &t
, TCA_CT_PAD
))
1550 goto nla_put_failure
;
1551 spin_unlock_bh(&c
->tcf_lock
);
1555 spin_unlock_bh(&c
->tcf_lock
);
1560 static int tcf_ct_walker(struct net
*net
, struct sk_buff
*skb
,
1561 struct netlink_callback
*cb
, int type
,
1562 const struct tc_action_ops
*ops
,
1563 struct netlink_ext_ack
*extack
)
1565 struct tc_action_net
*tn
= net_generic(net
, ct_net_id
);
1567 return tcf_generic_walker(tn
, skb
, cb
, type
, ops
, extack
);
1570 static int tcf_ct_search(struct net
*net
, struct tc_action
**a
, u32 index
)
1572 struct tc_action_net
*tn
= net_generic(net
, ct_net_id
);
1574 return tcf_idr_search(tn
, a
, index
);
1577 static void tcf_stats_update(struct tc_action
*a
, u64 bytes
, u64 packets
,
1578 u64 drops
, u64 lastuse
, bool hw
)
1580 struct tcf_ct
*c
= to_ct(a
);
1582 tcf_action_update_stats(a
, bytes
, packets
, drops
, hw
);
1583 c
->tcf_tm
.lastuse
= max_t(u64
, c
->tcf_tm
.lastuse
, lastuse
);
1586 static int tcf_ct_offload_act_setup(struct tc_action
*act
, void *entry_data
,
1587 u32
*index_inc
, bool bind
,
1588 struct netlink_ext_ack
*extack
)
1591 struct flow_action_entry
*entry
= entry_data
;
1593 entry
->id
= FLOW_ACTION_CT
;
1594 entry
->ct
.action
= tcf_ct_action(act
);
1595 entry
->ct
.zone
= tcf_ct_zone(act
);
1596 entry
->ct
.flow_table
= tcf_ct_ft(act
);
1599 struct flow_offload_action
*fl_action
= entry_data
;
1601 fl_action
->id
= FLOW_ACTION_CT
;
1607 static struct tc_action_ops act_ct_ops
= {
1610 .owner
= THIS_MODULE
,
1612 .dump
= tcf_ct_dump
,
1613 .init
= tcf_ct_init
,
1614 .cleanup
= tcf_ct_cleanup
,
1615 .walk
= tcf_ct_walker
,
1616 .lookup
= tcf_ct_search
,
1617 .stats_update
= tcf_stats_update
,
1618 .offload_act_setup
= tcf_ct_offload_act_setup
,
1619 .size
= sizeof(struct tcf_ct
),
1622 static __net_init
int ct_init_net(struct net
*net
)
1624 unsigned int n_bits
= sizeof_field(struct tcf_ct_params
, labels
) * 8;
1625 struct tc_ct_action_net
*tn
= net_generic(net
, ct_net_id
);
1627 if (nf_connlabels_get(net
, n_bits
- 1)) {
1629 pr_err("act_ct: Failed to set connlabels length");
1634 return tc_action_net_init(net
, &tn
->tn
, &act_ct_ops
);
1637 static void __net_exit
ct_exit_net(struct list_head
*net_list
)
1642 list_for_each_entry(net
, net_list
, exit_list
) {
1643 struct tc_ct_action_net
*tn
= net_generic(net
, ct_net_id
);
1646 nf_connlabels_put(net
);
1650 tc_action_net_exit(net_list
, ct_net_id
);
1653 static struct pernet_operations ct_net_ops
= {
1654 .init
= ct_init_net
,
1655 .exit_batch
= ct_exit_net
,
1657 .size
= sizeof(struct tc_ct_action_net
),
1660 static int __init
ct_init_module(void)
1664 act_ct_wq
= alloc_ordered_workqueue("act_ct_workqueue", 0);
1668 err
= tcf_ct_flow_tables_init();
1672 err
= tcf_register_action(&act_ct_ops
, &ct_net_ops
);
1676 static_branch_inc(&tcf_frag_xmit_count
);
1681 tcf_ct_flow_tables_uninit();
1683 destroy_workqueue(act_ct_wq
);
1687 static void __exit
ct_cleanup_module(void)
1689 static_branch_dec(&tcf_frag_xmit_count
);
1690 tcf_unregister_action(&act_ct_ops
, &ct_net_ops
);
1691 tcf_ct_flow_tables_uninit();
1692 destroy_workqueue(act_ct_wq
);
1695 module_init(ct_init_module
);
1696 module_exit(ct_cleanup_module
);
1697 MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>");
1698 MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>");
1699 MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>");
1700 MODULE_DESCRIPTION("Connection tracking action");
1701 MODULE_LICENSE("GPL v2");