1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
7 #include <linux/ipv6.h>
8 #include <linux/netdevice.h>
11 #include <net/ip6_route.h>
12 #include <net/neighbour.h>
13 #include <net/netfilter/nf_flow_table.h>
14 /* For layer 4 checksum field offset. */
15 #include <linux/tcp.h>
16 #include <linux/udp.h>
18 static int nf_flow_state_check(struct flow_offload
*flow
, int proto
,
19 struct sk_buff
*skb
, unsigned int thoff
)
23 if (proto
!= IPPROTO_TCP
)
26 if (!pskb_may_pull(skb
, thoff
+ sizeof(*tcph
)))
29 tcph
= (void *)(skb_network_header(skb
) + thoff
);
30 if (unlikely(tcph
->fin
|| tcph
->rst
)) {
31 flow_offload_teardown(flow
);
38 static int nf_flow_nat_ip_tcp(struct sk_buff
*skb
, unsigned int thoff
,
39 __be32 addr
, __be32 new_addr
)
43 if (!pskb_may_pull(skb
, thoff
+ sizeof(*tcph
)) ||
44 skb_try_make_writable(skb
, thoff
+ sizeof(*tcph
)))
47 tcph
= (void *)(skb_network_header(skb
) + thoff
);
48 inet_proto_csum_replace4(&tcph
->check
, skb
, addr
, new_addr
, true);
53 static int nf_flow_nat_ip_udp(struct sk_buff
*skb
, unsigned int thoff
,
54 __be32 addr
, __be32 new_addr
)
58 if (!pskb_may_pull(skb
, thoff
+ sizeof(*udph
)) ||
59 skb_try_make_writable(skb
, thoff
+ sizeof(*udph
)))
62 udph
= (void *)(skb_network_header(skb
) + thoff
);
63 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
64 inet_proto_csum_replace4(&udph
->check
, skb
, addr
,
67 udph
->check
= CSUM_MANGLED_0
;
73 static int nf_flow_nat_ip_l4proto(struct sk_buff
*skb
, struct iphdr
*iph
,
74 unsigned int thoff
, __be32 addr
,
77 switch (iph
->protocol
) {
79 if (nf_flow_nat_ip_tcp(skb
, thoff
, addr
, new_addr
) < 0)
83 if (nf_flow_nat_ip_udp(skb
, thoff
, addr
, new_addr
) < 0)
91 static int nf_flow_snat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
92 struct iphdr
*iph
, unsigned int thoff
,
93 enum flow_offload_tuple_dir dir
)
95 __be32 addr
, new_addr
;
98 case FLOW_OFFLOAD_DIR_ORIGINAL
:
100 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v4
.s_addr
;
101 iph
->saddr
= new_addr
;
103 case FLOW_OFFLOAD_DIR_REPLY
:
105 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v4
.s_addr
;
106 iph
->daddr
= new_addr
;
111 csum_replace4(&iph
->check
, addr
, new_addr
);
113 return nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
116 static int nf_flow_dnat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
117 struct iphdr
*iph
, unsigned int thoff
,
118 enum flow_offload_tuple_dir dir
)
120 __be32 addr
, new_addr
;
123 case FLOW_OFFLOAD_DIR_ORIGINAL
:
125 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v4
.s_addr
;
126 iph
->daddr
= new_addr
;
128 case FLOW_OFFLOAD_DIR_REPLY
:
130 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v4
.s_addr
;
131 iph
->saddr
= new_addr
;
136 csum_replace4(&iph
->check
, addr
, new_addr
);
138 return nf_flow_nat_ip_l4proto(skb
, iph
, thoff
, addr
, new_addr
);
141 static int nf_flow_nat_ip(const struct flow_offload
*flow
, struct sk_buff
*skb
,
142 unsigned int thoff
, enum flow_offload_tuple_dir dir
)
144 struct iphdr
*iph
= ip_hdr(skb
);
146 if (flow
->flags
& FLOW_OFFLOAD_SNAT
&&
147 (nf_flow_snat_port(flow
, skb
, thoff
, iph
->protocol
, dir
) < 0 ||
148 nf_flow_snat_ip(flow
, skb
, iph
, thoff
, dir
) < 0))
150 if (flow
->flags
& FLOW_OFFLOAD_DNAT
&&
151 (nf_flow_dnat_port(flow
, skb
, thoff
, iph
->protocol
, dir
) < 0 ||
152 nf_flow_dnat_ip(flow
, skb
, iph
, thoff
, dir
) < 0))
158 static bool ip_has_options(unsigned int thoff
)
160 return thoff
!= sizeof(struct iphdr
);
163 static int nf_flow_tuple_ip(struct sk_buff
*skb
, const struct net_device
*dev
,
164 struct flow_offload_tuple
*tuple
)
166 struct flow_ports
*ports
;
170 if (!pskb_may_pull(skb
, sizeof(*iph
)))
174 thoff
= iph
->ihl
* 4;
176 if (ip_is_fragment(iph
) ||
177 unlikely(ip_has_options(thoff
)))
180 if (iph
->protocol
!= IPPROTO_TCP
&&
181 iph
->protocol
!= IPPROTO_UDP
)
184 thoff
= iph
->ihl
* 4;
185 if (!pskb_may_pull(skb
, thoff
+ sizeof(*ports
)))
188 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
190 tuple
->src_v4
.s_addr
= iph
->saddr
;
191 tuple
->dst_v4
.s_addr
= iph
->daddr
;
192 tuple
->src_port
= ports
->source
;
193 tuple
->dst_port
= ports
->dest
;
194 tuple
->l3proto
= AF_INET
;
195 tuple
->l4proto
= iph
->protocol
;
196 tuple
->iifidx
= dev
->ifindex
;
201 /* Based on ip_exceeds_mtu(). */
202 static bool nf_flow_exceeds_mtu(const struct sk_buff
*skb
, unsigned int mtu
)
207 if (skb_is_gso(skb
) && skb_gso_validate_network_len(skb
, mtu
))
214 nf_flow_offload_ip_hook(void *priv
, struct sk_buff
*skb
,
215 const struct nf_hook_state
*state
)
217 struct flow_offload_tuple_rhash
*tuplehash
;
218 struct nf_flowtable
*flow_table
= priv
;
219 struct flow_offload_tuple tuple
= {};
220 enum flow_offload_tuple_dir dir
;
221 struct flow_offload
*flow
;
222 struct net_device
*outdev
;
228 if (skb
->protocol
!= htons(ETH_P_IP
))
231 if (nf_flow_tuple_ip(skb
, state
->in
, &tuple
) < 0)
234 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
235 if (tuplehash
== NULL
)
238 outdev
= dev_get_by_index_rcu(state
->net
, tuplehash
->tuple
.oifidx
);
242 dir
= tuplehash
->tuple
.dir
;
243 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
244 rt
= (struct rtable
*)flow
->tuplehash
[dir
].tuple
.dst_cache
;
246 if (unlikely(nf_flow_exceeds_mtu(skb
, flow
->tuplehash
[dir
].tuple
.mtu
)) &&
247 (ip_hdr(skb
)->frag_off
& htons(IP_DF
)) != 0)
250 if (skb_try_make_writable(skb
, sizeof(*iph
)))
253 thoff
= ip_hdr(skb
)->ihl
* 4;
254 if (nf_flow_state_check(flow
, ip_hdr(skb
)->protocol
, skb
, thoff
))
257 if (nf_flow_nat_ip(flow
, skb
, thoff
, dir
) < 0)
260 flow
->timeout
= (u32
)jiffies
+ NF_FLOW_TIMEOUT
;
262 ip_decrease_ttl(iph
);
265 nexthop
= rt_nexthop(rt
, flow
->tuplehash
[!dir
].tuple
.src_v4
.s_addr
);
266 skb_dst_set_noref(skb
, &rt
->dst
);
267 neigh_xmit(NEIGH_ARP_TABLE
, outdev
, &nexthop
, skb
);
271 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook
);
273 static int nf_flow_nat_ipv6_tcp(struct sk_buff
*skb
, unsigned int thoff
,
274 struct in6_addr
*addr
,
275 struct in6_addr
*new_addr
)
279 if (!pskb_may_pull(skb
, thoff
+ sizeof(*tcph
)) ||
280 skb_try_make_writable(skb
, thoff
+ sizeof(*tcph
)))
283 tcph
= (void *)(skb_network_header(skb
) + thoff
);
284 inet_proto_csum_replace16(&tcph
->check
, skb
, addr
->s6_addr32
,
285 new_addr
->s6_addr32
, true);
290 static int nf_flow_nat_ipv6_udp(struct sk_buff
*skb
, unsigned int thoff
,
291 struct in6_addr
*addr
,
292 struct in6_addr
*new_addr
)
296 if (!pskb_may_pull(skb
, thoff
+ sizeof(*udph
)) ||
297 skb_try_make_writable(skb
, thoff
+ sizeof(*udph
)))
300 udph
= (void *)(skb_network_header(skb
) + thoff
);
301 if (udph
->check
|| skb
->ip_summed
== CHECKSUM_PARTIAL
) {
302 inet_proto_csum_replace16(&udph
->check
, skb
, addr
->s6_addr32
,
303 new_addr
->s6_addr32
, true);
305 udph
->check
= CSUM_MANGLED_0
;
311 static int nf_flow_nat_ipv6_l4proto(struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
312 unsigned int thoff
, struct in6_addr
*addr
,
313 struct in6_addr
*new_addr
)
315 switch (ip6h
->nexthdr
) {
317 if (nf_flow_nat_ipv6_tcp(skb
, thoff
, addr
, new_addr
) < 0)
321 if (nf_flow_nat_ipv6_udp(skb
, thoff
, addr
, new_addr
) < 0)
329 static int nf_flow_snat_ipv6(const struct flow_offload
*flow
,
330 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
332 enum flow_offload_tuple_dir dir
)
334 struct in6_addr addr
, new_addr
;
337 case FLOW_OFFLOAD_DIR_ORIGINAL
:
339 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v6
;
340 ip6h
->saddr
= new_addr
;
342 case FLOW_OFFLOAD_DIR_REPLY
:
344 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v6
;
345 ip6h
->daddr
= new_addr
;
351 return nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
354 static int nf_flow_dnat_ipv6(const struct flow_offload
*flow
,
355 struct sk_buff
*skb
, struct ipv6hdr
*ip6h
,
357 enum flow_offload_tuple_dir dir
)
359 struct in6_addr addr
, new_addr
;
362 case FLOW_OFFLOAD_DIR_ORIGINAL
:
364 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v6
;
365 ip6h
->daddr
= new_addr
;
367 case FLOW_OFFLOAD_DIR_REPLY
:
369 new_addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v6
;
370 ip6h
->saddr
= new_addr
;
376 return nf_flow_nat_ipv6_l4proto(skb
, ip6h
, thoff
, &addr
, &new_addr
);
379 static int nf_flow_nat_ipv6(const struct flow_offload
*flow
,
381 enum flow_offload_tuple_dir dir
)
383 struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
384 unsigned int thoff
= sizeof(*ip6h
);
386 if (flow
->flags
& FLOW_OFFLOAD_SNAT
&&
387 (nf_flow_snat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
) < 0 ||
388 nf_flow_snat_ipv6(flow
, skb
, ip6h
, thoff
, dir
) < 0))
390 if (flow
->flags
& FLOW_OFFLOAD_DNAT
&&
391 (nf_flow_dnat_port(flow
, skb
, thoff
, ip6h
->nexthdr
, dir
) < 0 ||
392 nf_flow_dnat_ipv6(flow
, skb
, ip6h
, thoff
, dir
) < 0))
398 static int nf_flow_tuple_ipv6(struct sk_buff
*skb
, const struct net_device
*dev
,
399 struct flow_offload_tuple
*tuple
)
401 struct flow_ports
*ports
;
402 struct ipv6hdr
*ip6h
;
405 if (!pskb_may_pull(skb
, sizeof(*ip6h
)))
408 ip6h
= ipv6_hdr(skb
);
410 if (ip6h
->nexthdr
!= IPPROTO_TCP
&&
411 ip6h
->nexthdr
!= IPPROTO_UDP
)
414 thoff
= sizeof(*ip6h
);
415 if (!pskb_may_pull(skb
, thoff
+ sizeof(*ports
)))
418 ports
= (struct flow_ports
*)(skb_network_header(skb
) + thoff
);
420 tuple
->src_v6
= ip6h
->saddr
;
421 tuple
->dst_v6
= ip6h
->daddr
;
422 tuple
->src_port
= ports
->source
;
423 tuple
->dst_port
= ports
->dest
;
424 tuple
->l3proto
= AF_INET6
;
425 tuple
->l4proto
= ip6h
->nexthdr
;
426 tuple
->iifidx
= dev
->ifindex
;
432 nf_flow_offload_ipv6_hook(void *priv
, struct sk_buff
*skb
,
433 const struct nf_hook_state
*state
)
435 struct flow_offload_tuple_rhash
*tuplehash
;
436 struct nf_flowtable
*flow_table
= priv
;
437 struct flow_offload_tuple tuple
= {};
438 enum flow_offload_tuple_dir dir
;
439 struct flow_offload
*flow
;
440 struct net_device
*outdev
;
441 struct in6_addr
*nexthop
;
442 struct ipv6hdr
*ip6h
;
445 if (skb
->protocol
!= htons(ETH_P_IPV6
))
448 if (nf_flow_tuple_ipv6(skb
, state
->in
, &tuple
) < 0)
451 tuplehash
= flow_offload_lookup(flow_table
, &tuple
);
452 if (tuplehash
== NULL
)
455 outdev
= dev_get_by_index_rcu(state
->net
, tuplehash
->tuple
.oifidx
);
459 dir
= tuplehash
->tuple
.dir
;
460 flow
= container_of(tuplehash
, struct flow_offload
, tuplehash
[dir
]);
461 rt
= (struct rt6_info
*)flow
->tuplehash
[dir
].tuple
.dst_cache
;
463 if (unlikely(nf_flow_exceeds_mtu(skb
, flow
->tuplehash
[dir
].tuple
.mtu
)))
466 if (nf_flow_state_check(flow
, ipv6_hdr(skb
)->nexthdr
, skb
,
470 if (skb_try_make_writable(skb
, sizeof(*ip6h
)))
473 if (nf_flow_nat_ipv6(flow
, skb
, dir
) < 0)
476 flow
->timeout
= (u32
)jiffies
+ NF_FLOW_TIMEOUT
;
477 ip6h
= ipv6_hdr(skb
);
481 nexthop
= rt6_nexthop(rt
, &flow
->tuplehash
[!dir
].tuple
.src_v6
);
482 skb_dst_set_noref(skb
, &rt
->dst
);
483 neigh_xmit(NEIGH_ND_TABLE
, outdev
, nexthop
, skb
);
487 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook
);