1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/netfilter/nf_flow_table.h>
10 #include <net/netfilter/nf_tables.h>
11 #include <net/netfilter/nf_conntrack.h>
12 #include <net/netfilter/nf_conntrack_acct.h>
13 #include <net/netfilter/nf_conntrack_core.h>
14 #include <net/netfilter/nf_conntrack_tuple.h>
16 static struct workqueue_struct
*nf_flow_offload_wq
;
18 struct flow_offload_work
{
19 struct list_head list
;
20 enum flow_cls_command cmd
;
22 struct nf_flowtable
*flowtable
;
23 struct flow_offload
*flow
;
24 struct work_struct work
;
27 #define NF_FLOW_DISSECTOR(__match, __type, __field) \
28 (__match)->dissector.offset[__type] = \
29 offsetof(struct nf_flow_key, __field)
31 static void nf_flow_rule_lwt_match(struct nf_flow_match
*match
,
32 struct ip_tunnel_info
*tun_info
)
34 struct nf_flow_key
*mask
= &match
->mask
;
35 struct nf_flow_key
*key
= &match
->key
;
36 unsigned int enc_keys
;
38 if (!tun_info
|| !(tun_info
->mode
& IP_TUNNEL_INFO_TX
))
41 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_ENC_CONTROL
, enc_control
);
42 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_ENC_KEYID
, enc_key_id
);
43 key
->enc_key_id
.keyid
= tunnel_id_to_key32(tun_info
->key
.tun_id
);
44 mask
->enc_key_id
.keyid
= 0xffffffff;
45 enc_keys
= BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
46 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
);
48 if (ip_tunnel_info_af(tun_info
) == AF_INET
) {
49 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
51 key
->enc_ipv4
.src
= tun_info
->key
.u
.ipv4
.dst
;
52 key
->enc_ipv4
.dst
= tun_info
->key
.u
.ipv4
.src
;
53 if (key
->enc_ipv4
.src
)
54 mask
->enc_ipv4
.src
= 0xffffffff;
55 if (key
->enc_ipv4
.dst
)
56 mask
->enc_ipv4
.dst
= 0xffffffff;
57 enc_keys
|= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
);
58 key
->enc_control
.addr_type
= FLOW_DISSECTOR_KEY_IPV4_ADDRS
;
60 memcpy(&key
->enc_ipv6
.src
, &tun_info
->key
.u
.ipv6
.dst
,
61 sizeof(struct in6_addr
));
62 memcpy(&key
->enc_ipv6
.dst
, &tun_info
->key
.u
.ipv6
.src
,
63 sizeof(struct in6_addr
));
64 if (memcmp(&key
->enc_ipv6
.src
, &in6addr_any
,
65 sizeof(struct in6_addr
)))
66 memset(&key
->enc_ipv6
.src
, 0xff,
67 sizeof(struct in6_addr
));
68 if (memcmp(&key
->enc_ipv6
.dst
, &in6addr_any
,
69 sizeof(struct in6_addr
)))
70 memset(&key
->enc_ipv6
.dst
, 0xff,
71 sizeof(struct in6_addr
));
72 enc_keys
|= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
);
73 key
->enc_control
.addr_type
= FLOW_DISSECTOR_KEY_IPV6_ADDRS
;
76 match
->dissector
.used_keys
|= enc_keys
;
79 static int nf_flow_rule_match(struct nf_flow_match
*match
,
80 const struct flow_offload_tuple
*tuple
,
81 struct dst_entry
*other_dst
)
83 struct nf_flow_key
*mask
= &match
->mask
;
84 struct nf_flow_key
*key
= &match
->key
;
85 struct ip_tunnel_info
*tun_info
;
87 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_META
, meta
);
88 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_CONTROL
, control
);
89 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_BASIC
, basic
);
90 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_IPV4_ADDRS
, ipv4
);
91 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_IPV6_ADDRS
, ipv6
);
92 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_TCP
, tcp
);
93 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_PORTS
, tp
);
95 if (other_dst
&& other_dst
->lwtstate
) {
96 tun_info
= lwt_tun_info(other_dst
->lwtstate
);
97 nf_flow_rule_lwt_match(match
, tun_info
);
100 key
->meta
.ingress_ifindex
= tuple
->iifidx
;
101 mask
->meta
.ingress_ifindex
= 0xffffffff;
103 switch (tuple
->l3proto
) {
105 key
->control
.addr_type
= FLOW_DISSECTOR_KEY_IPV4_ADDRS
;
106 key
->basic
.n_proto
= htons(ETH_P_IP
);
107 key
->ipv4
.src
= tuple
->src_v4
.s_addr
;
108 mask
->ipv4
.src
= 0xffffffff;
109 key
->ipv4
.dst
= tuple
->dst_v4
.s_addr
;
110 mask
->ipv4
.dst
= 0xffffffff;
113 key
->control
.addr_type
= FLOW_DISSECTOR_KEY_IPV6_ADDRS
;
114 key
->basic
.n_proto
= htons(ETH_P_IPV6
);
115 key
->ipv6
.src
= tuple
->src_v6
;
116 memset(&mask
->ipv6
.src
, 0xff, sizeof(mask
->ipv6
.src
));
117 key
->ipv6
.dst
= tuple
->dst_v6
;
118 memset(&mask
->ipv6
.dst
, 0xff, sizeof(mask
->ipv6
.dst
));
123 mask
->control
.addr_type
= 0xffff;
124 match
->dissector
.used_keys
|= BIT(key
->control
.addr_type
);
125 mask
->basic
.n_proto
= 0xffff;
127 switch (tuple
->l4proto
) {
130 mask
->tcp
.flags
= cpu_to_be16(be32_to_cpu(TCP_FLAG_RST
| TCP_FLAG_FIN
) >> 16);
131 match
->dissector
.used_keys
|= BIT(FLOW_DISSECTOR_KEY_TCP
);
139 key
->basic
.ip_proto
= tuple
->l4proto
;
140 mask
->basic
.ip_proto
= 0xff;
142 key
->tp
.src
= tuple
->src_port
;
143 mask
->tp
.src
= 0xffff;
144 key
->tp
.dst
= tuple
->dst_port
;
145 mask
->tp
.dst
= 0xffff;
147 match
->dissector
.used_keys
|= BIT(FLOW_DISSECTOR_KEY_META
) |
148 BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
149 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
150 BIT(FLOW_DISSECTOR_KEY_PORTS
);
154 static void flow_offload_mangle(struct flow_action_entry
*entry
,
155 enum flow_action_mangle_base htype
, u32 offset
,
156 const __be32
*value
, const __be32
*mask
)
158 entry
->id
= FLOW_ACTION_MANGLE
;
159 entry
->mangle
.htype
= htype
;
160 entry
->mangle
.offset
= offset
;
161 memcpy(&entry
->mangle
.mask
, mask
, sizeof(u32
));
162 memcpy(&entry
->mangle
.val
, value
, sizeof(u32
));
165 static inline struct flow_action_entry
*
166 flow_action_entry_next(struct nf_flow_rule
*flow_rule
)
168 int i
= flow_rule
->rule
->action
.num_entries
++;
170 return &flow_rule
->rule
->action
.entries
[i
];
173 static int flow_offload_eth_src(struct net
*net
,
174 const struct flow_offload
*flow
,
175 enum flow_offload_tuple_dir dir
,
176 struct nf_flow_rule
*flow_rule
)
178 const struct flow_offload_tuple
*tuple
= &flow
->tuplehash
[!dir
].tuple
;
179 struct flow_action_entry
*entry0
= flow_action_entry_next(flow_rule
);
180 struct flow_action_entry
*entry1
= flow_action_entry_next(flow_rule
);
181 struct net_device
*dev
;
185 dev
= dev_get_by_index(net
, tuple
->iifidx
);
190 memcpy(&val16
, dev
->dev_addr
, 2);
192 flow_offload_mangle(entry0
, FLOW_ACT_MANGLE_HDR_TYPE_ETH
, 4,
196 memcpy(&val
, dev
->dev_addr
+ 2, 4);
197 flow_offload_mangle(entry1
, FLOW_ACT_MANGLE_HDR_TYPE_ETH
, 8,
204 static int flow_offload_eth_dst(struct net
*net
,
205 const struct flow_offload
*flow
,
206 enum flow_offload_tuple_dir dir
,
207 struct nf_flow_rule
*flow_rule
)
209 struct flow_action_entry
*entry0
= flow_action_entry_next(flow_rule
);
210 struct flow_action_entry
*entry1
= flow_action_entry_next(flow_rule
);
211 const void *daddr
= &flow
->tuplehash
[!dir
].tuple
.src_v4
;
212 const struct dst_entry
*dst_cache
;
213 unsigned char ha
[ETH_ALEN
];
219 dst_cache
= flow
->tuplehash
[dir
].tuple
.dst_cache
;
220 n
= dst_neigh_lookup(dst_cache
, daddr
);
224 read_lock_bh(&n
->lock
);
225 nud_state
= n
->nud_state
;
226 ether_addr_copy(ha
, n
->ha
);
227 read_unlock_bh(&n
->lock
);
229 if (!(nud_state
& NUD_VALID
)) {
236 flow_offload_mangle(entry0
, FLOW_ACT_MANGLE_HDR_TYPE_ETH
, 0,
240 memcpy(&val16
, ha
+ 4, 2);
242 flow_offload_mangle(entry1
, FLOW_ACT_MANGLE_HDR_TYPE_ETH
, 4,
249 static void flow_offload_ipv4_snat(struct net
*net
,
250 const struct flow_offload
*flow
,
251 enum flow_offload_tuple_dir dir
,
252 struct nf_flow_rule
*flow_rule
)
254 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
255 u32 mask
= ~htonl(0xffffffff);
260 case FLOW_OFFLOAD_DIR_ORIGINAL
:
261 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v4
.s_addr
;
262 offset
= offsetof(struct iphdr
, saddr
);
264 case FLOW_OFFLOAD_DIR_REPLY
:
265 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v4
.s_addr
;
266 offset
= offsetof(struct iphdr
, daddr
);
272 flow_offload_mangle(entry
, FLOW_ACT_MANGLE_HDR_TYPE_IP4
, offset
,
276 static void flow_offload_ipv4_dnat(struct net
*net
,
277 const struct flow_offload
*flow
,
278 enum flow_offload_tuple_dir dir
,
279 struct nf_flow_rule
*flow_rule
)
281 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
282 u32 mask
= ~htonl(0xffffffff);
287 case FLOW_OFFLOAD_DIR_ORIGINAL
:
288 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v4
.s_addr
;
289 offset
= offsetof(struct iphdr
, daddr
);
291 case FLOW_OFFLOAD_DIR_REPLY
:
292 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v4
.s_addr
;
293 offset
= offsetof(struct iphdr
, saddr
);
299 flow_offload_mangle(entry
, FLOW_ACT_MANGLE_HDR_TYPE_IP4
, offset
,
303 static void flow_offload_ipv6_mangle(struct nf_flow_rule
*flow_rule
,
305 const __be32
*addr
, const __be32
*mask
)
307 struct flow_action_entry
*entry
;
310 for (i
= 0; i
< sizeof(struct in6_addr
) / sizeof(u32
); i
+= sizeof(u32
)) {
311 entry
= flow_action_entry_next(flow_rule
);
312 flow_offload_mangle(entry
, FLOW_ACT_MANGLE_HDR_TYPE_IP6
,
313 offset
+ i
, &addr
[i
], mask
);
317 static void flow_offload_ipv6_snat(struct net
*net
,
318 const struct flow_offload
*flow
,
319 enum flow_offload_tuple_dir dir
,
320 struct nf_flow_rule
*flow_rule
)
322 u32 mask
= ~htonl(0xffffffff);
327 case FLOW_OFFLOAD_DIR_ORIGINAL
:
328 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v6
.s6_addr32
;
329 offset
= offsetof(struct ipv6hdr
, saddr
);
331 case FLOW_OFFLOAD_DIR_REPLY
:
332 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v6
.s6_addr32
;
333 offset
= offsetof(struct ipv6hdr
, daddr
);
339 flow_offload_ipv6_mangle(flow_rule
, offset
, addr
, &mask
);
342 static void flow_offload_ipv6_dnat(struct net
*net
,
343 const struct flow_offload
*flow
,
344 enum flow_offload_tuple_dir dir
,
345 struct nf_flow_rule
*flow_rule
)
347 u32 mask
= ~htonl(0xffffffff);
352 case FLOW_OFFLOAD_DIR_ORIGINAL
:
353 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v6
.s6_addr32
;
354 offset
= offsetof(struct ipv6hdr
, daddr
);
356 case FLOW_OFFLOAD_DIR_REPLY
:
357 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v6
.s6_addr32
;
358 offset
= offsetof(struct ipv6hdr
, saddr
);
364 flow_offload_ipv6_mangle(flow_rule
, offset
, addr
, &mask
);
367 static int flow_offload_l4proto(const struct flow_offload
*flow
)
369 u8 protonum
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.l4proto
;
374 type
= FLOW_ACT_MANGLE_HDR_TYPE_TCP
;
377 type
= FLOW_ACT_MANGLE_HDR_TYPE_UDP
;
386 static void flow_offload_port_snat(struct net
*net
,
387 const struct flow_offload
*flow
,
388 enum flow_offload_tuple_dir dir
,
389 struct nf_flow_rule
*flow_rule
)
391 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
396 case FLOW_OFFLOAD_DIR_ORIGINAL
:
397 port
= ntohs(flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_port
);
398 offset
= 0; /* offsetof(struct tcphdr, source); */
399 port
= htonl(port
<< 16);
400 mask
= ~htonl(0xffff0000);
402 case FLOW_OFFLOAD_DIR_REPLY
:
403 port
= ntohs(flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_port
);
404 offset
= 0; /* offsetof(struct tcphdr, dest); */
406 mask
= ~htonl(0xffff);
412 flow_offload_mangle(entry
, flow_offload_l4proto(flow
), offset
,
416 static void flow_offload_port_dnat(struct net
*net
,
417 const struct flow_offload
*flow
,
418 enum flow_offload_tuple_dir dir
,
419 struct nf_flow_rule
*flow_rule
)
421 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
426 case FLOW_OFFLOAD_DIR_ORIGINAL
:
427 port
= ntohs(flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_port
);
428 offset
= 0; /* offsetof(struct tcphdr, dest); */
430 mask
= ~htonl(0xffff);
432 case FLOW_OFFLOAD_DIR_REPLY
:
433 port
= ntohs(flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_port
);
434 offset
= 0; /* offsetof(struct tcphdr, source); */
435 port
= htonl(port
<< 16);
436 mask
= ~htonl(0xffff0000);
442 flow_offload_mangle(entry
, flow_offload_l4proto(flow
), offset
,
446 static void flow_offload_ipv4_checksum(struct net
*net
,
447 const struct flow_offload
*flow
,
448 struct nf_flow_rule
*flow_rule
)
450 u8 protonum
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.l4proto
;
451 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
453 entry
->id
= FLOW_ACTION_CSUM
;
454 entry
->csum_flags
= TCA_CSUM_UPDATE_FLAG_IPV4HDR
;
458 entry
->csum_flags
|= TCA_CSUM_UPDATE_FLAG_TCP
;
461 entry
->csum_flags
|= TCA_CSUM_UPDATE_FLAG_UDP
;
466 static void flow_offload_redirect(const struct flow_offload
*flow
,
467 enum flow_offload_tuple_dir dir
,
468 struct nf_flow_rule
*flow_rule
)
470 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
473 rt
= (struct rtable
*)flow
->tuplehash
[dir
].tuple
.dst_cache
;
474 entry
->id
= FLOW_ACTION_REDIRECT
;
475 entry
->dev
= rt
->dst
.dev
;
476 dev_hold(rt
->dst
.dev
);
479 static void flow_offload_encap_tunnel(const struct flow_offload
*flow
,
480 enum flow_offload_tuple_dir dir
,
481 struct nf_flow_rule
*flow_rule
)
483 struct flow_action_entry
*entry
;
484 struct dst_entry
*dst
;
486 dst
= flow
->tuplehash
[dir
].tuple
.dst_cache
;
487 if (dst
&& dst
->lwtstate
) {
488 struct ip_tunnel_info
*tun_info
;
490 tun_info
= lwt_tun_info(dst
->lwtstate
);
491 if (tun_info
&& (tun_info
->mode
& IP_TUNNEL_INFO_TX
)) {
492 entry
= flow_action_entry_next(flow_rule
);
493 entry
->id
= FLOW_ACTION_TUNNEL_ENCAP
;
494 entry
->tunnel
= tun_info
;
499 static void flow_offload_decap_tunnel(const struct flow_offload
*flow
,
500 enum flow_offload_tuple_dir dir
,
501 struct nf_flow_rule
*flow_rule
)
503 struct flow_action_entry
*entry
;
504 struct dst_entry
*dst
;
506 dst
= flow
->tuplehash
[!dir
].tuple
.dst_cache
;
507 if (dst
&& dst
->lwtstate
) {
508 struct ip_tunnel_info
*tun_info
;
510 tun_info
= lwt_tun_info(dst
->lwtstate
);
511 if (tun_info
&& (tun_info
->mode
& IP_TUNNEL_INFO_TX
)) {
512 entry
= flow_action_entry_next(flow_rule
);
513 entry
->id
= FLOW_ACTION_TUNNEL_DECAP
;
518 int nf_flow_rule_route_ipv4(struct net
*net
, const struct flow_offload
*flow
,
519 enum flow_offload_tuple_dir dir
,
520 struct nf_flow_rule
*flow_rule
)
522 flow_offload_decap_tunnel(flow
, dir
, flow_rule
);
523 flow_offload_encap_tunnel(flow
, dir
, flow_rule
);
525 if (flow_offload_eth_src(net
, flow
, dir
, flow_rule
) < 0 ||
526 flow_offload_eth_dst(net
, flow
, dir
, flow_rule
) < 0)
529 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
)) {
530 flow_offload_ipv4_snat(net
, flow
, dir
, flow_rule
);
531 flow_offload_port_snat(net
, flow
, dir
, flow_rule
);
533 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
)) {
534 flow_offload_ipv4_dnat(net
, flow
, dir
, flow_rule
);
535 flow_offload_port_dnat(net
, flow
, dir
, flow_rule
);
537 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
) ||
538 test_bit(NF_FLOW_DNAT
, &flow
->flags
))
539 flow_offload_ipv4_checksum(net
, flow
, flow_rule
);
541 flow_offload_redirect(flow
, dir
, flow_rule
);
545 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4
);
547 int nf_flow_rule_route_ipv6(struct net
*net
, const struct flow_offload
*flow
,
548 enum flow_offload_tuple_dir dir
,
549 struct nf_flow_rule
*flow_rule
)
551 flow_offload_decap_tunnel(flow
, dir
, flow_rule
);
552 flow_offload_encap_tunnel(flow
, dir
, flow_rule
);
554 if (flow_offload_eth_src(net
, flow
, dir
, flow_rule
) < 0 ||
555 flow_offload_eth_dst(net
, flow
, dir
, flow_rule
) < 0)
558 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
)) {
559 flow_offload_ipv6_snat(net
, flow
, dir
, flow_rule
);
560 flow_offload_port_snat(net
, flow
, dir
, flow_rule
);
562 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
)) {
563 flow_offload_ipv6_dnat(net
, flow
, dir
, flow_rule
);
564 flow_offload_port_dnat(net
, flow
, dir
, flow_rule
);
567 flow_offload_redirect(flow
, dir
, flow_rule
);
571 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6
);
573 #define NF_FLOW_RULE_ACTION_MAX 16
575 static struct nf_flow_rule
*
576 nf_flow_offload_rule_alloc(struct net
*net
,
577 const struct flow_offload_work
*offload
,
578 enum flow_offload_tuple_dir dir
)
580 const struct nf_flowtable
*flowtable
= offload
->flowtable
;
581 const struct flow_offload
*flow
= offload
->flow
;
582 const struct flow_offload_tuple
*tuple
;
583 struct nf_flow_rule
*flow_rule
;
584 struct dst_entry
*other_dst
;
587 flow_rule
= kzalloc(sizeof(*flow_rule
), GFP_KERNEL
);
591 flow_rule
->rule
= flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX
);
592 if (!flow_rule
->rule
)
595 flow_rule
->rule
->match
.dissector
= &flow_rule
->match
.dissector
;
596 flow_rule
->rule
->match
.mask
= &flow_rule
->match
.mask
;
597 flow_rule
->rule
->match
.key
= &flow_rule
->match
.key
;
599 tuple
= &flow
->tuplehash
[dir
].tuple
;
600 other_dst
= flow
->tuplehash
[!dir
].tuple
.dst_cache
;
601 err
= nf_flow_rule_match(&flow_rule
->match
, tuple
, other_dst
);
605 flow_rule
->rule
->action
.num_entries
= 0;
606 if (flowtable
->type
->action(net
, flow
, dir
, flow_rule
) < 0)
612 kfree(flow_rule
->rule
);
619 static void __nf_flow_offload_destroy(struct nf_flow_rule
*flow_rule
)
621 struct flow_action_entry
*entry
;
624 for (i
= 0; i
< flow_rule
->rule
->action
.num_entries
; i
++) {
625 entry
= &flow_rule
->rule
->action
.entries
[i
];
626 if (entry
->id
!= FLOW_ACTION_REDIRECT
)
631 kfree(flow_rule
->rule
);
635 static void nf_flow_offload_destroy(struct nf_flow_rule
*flow_rule
[])
639 for (i
= 0; i
< FLOW_OFFLOAD_DIR_MAX
; i
++)
640 __nf_flow_offload_destroy(flow_rule
[i
]);
643 static int nf_flow_offload_alloc(const struct flow_offload_work
*offload
,
644 struct nf_flow_rule
*flow_rule
[])
646 struct net
*net
= read_pnet(&offload
->flowtable
->net
);
648 flow_rule
[0] = nf_flow_offload_rule_alloc(net
, offload
,
649 FLOW_OFFLOAD_DIR_ORIGINAL
);
653 flow_rule
[1] = nf_flow_offload_rule_alloc(net
, offload
,
654 FLOW_OFFLOAD_DIR_REPLY
);
656 __nf_flow_offload_destroy(flow_rule
[0]);
663 static void nf_flow_offload_init(struct flow_cls_offload
*cls_flow
,
664 __be16 proto
, int priority
,
665 enum flow_cls_command cmd
,
666 const struct flow_offload_tuple
*tuple
,
667 struct netlink_ext_ack
*extack
)
669 cls_flow
->common
.protocol
= proto
;
670 cls_flow
->common
.prio
= priority
;
671 cls_flow
->common
.extack
= extack
;
672 cls_flow
->command
= cmd
;
673 cls_flow
->cookie
= (unsigned long)tuple
;
676 static int nf_flow_offload_tuple(struct nf_flowtable
*flowtable
,
677 struct flow_offload
*flow
,
678 struct nf_flow_rule
*flow_rule
,
679 enum flow_offload_tuple_dir dir
,
680 int priority
, int cmd
,
681 struct flow_stats
*stats
,
682 struct list_head
*block_cb_list
)
684 struct flow_cls_offload cls_flow
= {};
685 struct flow_block_cb
*block_cb
;
686 struct netlink_ext_ack extack
;
687 __be16 proto
= ETH_P_ALL
;
690 nf_flow_offload_init(&cls_flow
, proto
, priority
, cmd
,
691 &flow
->tuplehash
[dir
].tuple
, &extack
);
692 if (cmd
== FLOW_CLS_REPLACE
)
693 cls_flow
.rule
= flow_rule
->rule
;
695 down_read(&flowtable
->flow_block_lock
);
696 list_for_each_entry(block_cb
, block_cb_list
, list
) {
697 err
= block_cb
->cb(TC_SETUP_CLSFLOWER
, &cls_flow
,
704 up_read(&flowtable
->flow_block_lock
);
706 if (cmd
== FLOW_CLS_STATS
)
707 memcpy(stats
, &cls_flow
.stats
, sizeof(*stats
));
712 static int flow_offload_tuple_add(struct flow_offload_work
*offload
,
713 struct nf_flow_rule
*flow_rule
,
714 enum flow_offload_tuple_dir dir
)
716 return nf_flow_offload_tuple(offload
->flowtable
, offload
->flow
,
717 flow_rule
, dir
, offload
->priority
,
718 FLOW_CLS_REPLACE
, NULL
,
719 &offload
->flowtable
->flow_block
.cb_list
);
722 static void flow_offload_tuple_del(struct flow_offload_work
*offload
,
723 enum flow_offload_tuple_dir dir
)
725 nf_flow_offload_tuple(offload
->flowtable
, offload
->flow
, NULL
, dir
,
726 offload
->priority
, FLOW_CLS_DESTROY
, NULL
,
727 &offload
->flowtable
->flow_block
.cb_list
);
730 static int flow_offload_rule_add(struct flow_offload_work
*offload
,
731 struct nf_flow_rule
*flow_rule
[])
735 ok_count
+= flow_offload_tuple_add(offload
, flow_rule
[0],
736 FLOW_OFFLOAD_DIR_ORIGINAL
);
737 ok_count
+= flow_offload_tuple_add(offload
, flow_rule
[1],
738 FLOW_OFFLOAD_DIR_REPLY
);
745 static void flow_offload_work_add(struct flow_offload_work
*offload
)
747 struct nf_flow_rule
*flow_rule
[FLOW_OFFLOAD_DIR_MAX
];
750 err
= nf_flow_offload_alloc(offload
, flow_rule
);
754 err
= flow_offload_rule_add(offload
, flow_rule
);
756 set_bit(NF_FLOW_HW_REFRESH
, &offload
->flow
->flags
);
758 nf_flow_offload_destroy(flow_rule
);
761 static void flow_offload_work_del(struct flow_offload_work
*offload
)
763 flow_offload_tuple_del(offload
, FLOW_OFFLOAD_DIR_ORIGINAL
);
764 flow_offload_tuple_del(offload
, FLOW_OFFLOAD_DIR_REPLY
);
765 set_bit(NF_FLOW_HW_DEAD
, &offload
->flow
->flags
);
768 static void flow_offload_tuple_stats(struct flow_offload_work
*offload
,
769 enum flow_offload_tuple_dir dir
,
770 struct flow_stats
*stats
)
772 nf_flow_offload_tuple(offload
->flowtable
, offload
->flow
, NULL
, dir
,
773 offload
->priority
, FLOW_CLS_STATS
, stats
,
774 &offload
->flowtable
->flow_block
.cb_list
);
777 static void flow_offload_work_stats(struct flow_offload_work
*offload
)
779 struct flow_stats stats
[FLOW_OFFLOAD_DIR_MAX
] = {};
782 flow_offload_tuple_stats(offload
, FLOW_OFFLOAD_DIR_ORIGINAL
, &stats
[0]);
783 flow_offload_tuple_stats(offload
, FLOW_OFFLOAD_DIR_REPLY
, &stats
[1]);
785 lastused
= max_t(u64
, stats
[0].lastused
, stats
[1].lastused
);
786 offload
->flow
->timeout
= max_t(u64
, offload
->flow
->timeout
,
787 lastused
+ NF_FLOW_TIMEOUT
);
789 if (offload
->flowtable
->flags
& NF_FLOWTABLE_COUNTER
) {
791 nf_ct_acct_add(offload
->flow
->ct
,
792 FLOW_OFFLOAD_DIR_ORIGINAL
,
793 stats
[0].pkts
, stats
[0].bytes
);
795 nf_ct_acct_add(offload
->flow
->ct
,
796 FLOW_OFFLOAD_DIR_REPLY
,
797 stats
[1].pkts
, stats
[1].bytes
);
801 static void flow_offload_work_handler(struct work_struct
*work
)
803 struct flow_offload_work
*offload
;
805 offload
= container_of(work
, struct flow_offload_work
, work
);
806 switch (offload
->cmd
) {
807 case FLOW_CLS_REPLACE
:
808 flow_offload_work_add(offload
);
810 case FLOW_CLS_DESTROY
:
811 flow_offload_work_del(offload
);
814 flow_offload_work_stats(offload
);
820 clear_bit(NF_FLOW_HW_PENDING
, &offload
->flow
->flags
);
824 static void flow_offload_queue_work(struct flow_offload_work
*offload
)
826 queue_work(nf_flow_offload_wq
, &offload
->work
);
829 static struct flow_offload_work
*
830 nf_flow_offload_work_alloc(struct nf_flowtable
*flowtable
,
831 struct flow_offload
*flow
, unsigned int cmd
)
833 struct flow_offload_work
*offload
;
835 if (test_and_set_bit(NF_FLOW_HW_PENDING
, &flow
->flags
))
838 offload
= kmalloc(sizeof(struct flow_offload_work
), GFP_ATOMIC
);
840 clear_bit(NF_FLOW_HW_PENDING
, &flow
->flags
);
845 offload
->flow
= flow
;
846 offload
->priority
= flowtable
->priority
;
847 offload
->flowtable
= flowtable
;
848 INIT_WORK(&offload
->work
, flow_offload_work_handler
);
854 void nf_flow_offload_add(struct nf_flowtable
*flowtable
,
855 struct flow_offload
*flow
)
857 struct flow_offload_work
*offload
;
859 offload
= nf_flow_offload_work_alloc(flowtable
, flow
, FLOW_CLS_REPLACE
);
863 flow_offload_queue_work(offload
);
866 void nf_flow_offload_del(struct nf_flowtable
*flowtable
,
867 struct flow_offload
*flow
)
869 struct flow_offload_work
*offload
;
871 offload
= nf_flow_offload_work_alloc(flowtable
, flow
, FLOW_CLS_DESTROY
);
875 set_bit(NF_FLOW_HW_DYING
, &flow
->flags
);
876 flow_offload_queue_work(offload
);
879 void nf_flow_offload_stats(struct nf_flowtable
*flowtable
,
880 struct flow_offload
*flow
)
882 struct flow_offload_work
*offload
;
885 delta
= nf_flow_timeout_delta(flow
->timeout
);
886 if ((delta
>= (9 * NF_FLOW_TIMEOUT
) / 10))
889 offload
= nf_flow_offload_work_alloc(flowtable
, flow
, FLOW_CLS_STATS
);
893 flow_offload_queue_work(offload
);
896 void nf_flow_table_offload_flush(struct nf_flowtable
*flowtable
)
898 if (nf_flowtable_hw_offload(flowtable
))
899 flush_workqueue(nf_flow_offload_wq
);
902 static int nf_flow_table_block_setup(struct nf_flowtable
*flowtable
,
903 struct flow_block_offload
*bo
,
904 enum flow_block_command cmd
)
906 struct flow_block_cb
*block_cb
, *next
;
910 case FLOW_BLOCK_BIND
:
911 list_splice(&bo
->cb_list
, &flowtable
->flow_block
.cb_list
);
913 case FLOW_BLOCK_UNBIND
:
914 list_for_each_entry_safe(block_cb
, next
, &bo
->cb_list
, list
) {
915 list_del(&block_cb
->list
);
916 flow_block_cb_free(block_cb
);
927 static void nf_flow_table_block_offload_init(struct flow_block_offload
*bo
,
929 enum flow_block_command cmd
,
930 struct nf_flowtable
*flowtable
,
931 struct netlink_ext_ack
*extack
)
933 memset(bo
, 0, sizeof(*bo
));
935 bo
->block
= &flowtable
->flow_block
;
937 bo
->binder_type
= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS
;
939 INIT_LIST_HEAD(&bo
->cb_list
);
942 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload
*bo
,
943 struct nf_flowtable
*flowtable
,
944 struct net_device
*dev
,
945 enum flow_block_command cmd
,
946 struct netlink_ext_ack
*extack
)
948 nf_flow_table_block_offload_init(bo
, dev_net(dev
), cmd
, flowtable
,
950 flow_indr_block_call(dev
, bo
, cmd
, TC_SETUP_FT
);
952 if (list_empty(&bo
->cb_list
))
958 static int nf_flow_table_offload_cmd(struct flow_block_offload
*bo
,
959 struct nf_flowtable
*flowtable
,
960 struct net_device
*dev
,
961 enum flow_block_command cmd
,
962 struct netlink_ext_ack
*extack
)
966 nf_flow_table_block_offload_init(bo
, dev_net(dev
), cmd
, flowtable
,
968 err
= dev
->netdev_ops
->ndo_setup_tc(dev
, TC_SETUP_FT
, bo
);
975 int nf_flow_table_offload_setup(struct nf_flowtable
*flowtable
,
976 struct net_device
*dev
,
977 enum flow_block_command cmd
)
979 struct netlink_ext_ack extack
= {};
980 struct flow_block_offload bo
;
983 if (!nf_flowtable_hw_offload(flowtable
))
986 if (dev
->netdev_ops
->ndo_setup_tc
)
987 err
= nf_flow_table_offload_cmd(&bo
, flowtable
, dev
, cmd
,
990 err
= nf_flow_table_indr_offload_cmd(&bo
, flowtable
, dev
, cmd
,
995 return nf_flow_table_block_setup(flowtable
, &bo
, cmd
);
997 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup
);
999 static void nf_flow_table_indr_block_ing_cmd(struct net_device
*dev
,
1000 struct nf_flowtable
*flowtable
,
1001 flow_indr_block_bind_cb_t
*cb
,
1003 enum flow_block_command cmd
)
1005 struct netlink_ext_ack extack
= {};
1006 struct flow_block_offload bo
;
1011 nf_flow_table_block_offload_init(&bo
, dev_net(dev
), cmd
, flowtable
,
1014 cb(dev
, cb_priv
, TC_SETUP_FT
, &bo
);
1016 nf_flow_table_block_setup(flowtable
, &bo
, cmd
);
1019 static void nf_flow_table_indr_block_cb_cmd(struct nf_flowtable
*flowtable
,
1020 struct net_device
*dev
,
1021 flow_indr_block_bind_cb_t
*cb
,
1023 enum flow_block_command cmd
)
1025 if (!(flowtable
->flags
& NF_FLOWTABLE_HW_OFFLOAD
))
1028 nf_flow_table_indr_block_ing_cmd(dev
, flowtable
, cb
, cb_priv
, cmd
);
1031 static void nf_flow_table_indr_block_cb(struct net_device
*dev
,
1032 flow_indr_block_bind_cb_t
*cb
,
1034 enum flow_block_command cmd
)
1036 struct net
*net
= dev_net(dev
);
1037 struct nft_flowtable
*nft_ft
;
1038 struct nft_table
*table
;
1039 struct nft_hook
*hook
;
1041 mutex_lock(&net
->nft
.commit_mutex
);
1042 list_for_each_entry(table
, &net
->nft
.tables
, list
) {
1043 list_for_each_entry(nft_ft
, &table
->flowtables
, list
) {
1044 list_for_each_entry(hook
, &nft_ft
->hook_list
, list
) {
1045 if (hook
->ops
.dev
!= dev
)
1048 nf_flow_table_indr_block_cb_cmd(&nft_ft
->data
,
1054 mutex_unlock(&net
->nft
.commit_mutex
);
1057 static struct flow_indr_block_entry block_ing_entry
= {
1058 .cb
= nf_flow_table_indr_block_cb
,
1059 .list
= LIST_HEAD_INIT(block_ing_entry
.list
),
1062 int nf_flow_table_offload_init(void)
1064 nf_flow_offload_wq
= alloc_workqueue("nf_flow_table_offload",
1066 if (!nf_flow_offload_wq
)
1069 flow_indr_add_block_cb(&block_ing_entry
);
1074 void nf_flow_table_offload_exit(void)
1076 flow_indr_del_block_cb(&block_ing_entry
);
1077 destroy_workqueue(nf_flow_offload_wq
);