]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
875f19d610f668efb60f1067abcd070ecc957c8d
[thirdparty/kernel/stable-queue.git] /
1 From 83ee66e525b133e3c0b7a6beb0a373cbd7264fa3 Mon Sep 17 00:00:00 2001
2 From: Sasha Levin <sashal@kernel.org>
3 Date: Thu, 29 Oct 2020 03:56:06 +0100
4 Subject: netfilter: use actual socket sk rather than skb sk when routing
5 harder
6
7 From: Jason A. Donenfeld <Jason@zx2c4.com>
8
9 [ Upstream commit 46d6c5ae953cc0be38efd0e469284df7c4328cf8 ]
10
11 If netfilter changes the packet mark when mangling, the packet is
12 rerouted using the route_me_harder set of functions. Prior to this
13 commit, there's one big difference between route_me_harder and the
14 ordinary initial routing functions, described in the comment above
15 __ip_queue_xmit():
16
17 /* Note: skb->sk can be different from sk, in case of tunnels */
18 int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
19
20 That function goes on to correctly make use of sk->sk_bound_dev_if,
21 rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a
22 tunnel will receive a packet in ndo_start_xmit with an initial skb->sk.
23 It will make some transformations to that packet, and then it will send
24 the encapsulated packet out of a *new* socket. That new socket will
25 basically always have a different sk_bound_dev_if (otherwise there'd be
26 a routing loop). So for the purposes of routing the encapsulated packet,
27 the routing information as it pertains to the socket should come from
28 that socket's sk, rather than the packet's original skb->sk. For that
29 reason __ip_queue_xmit() and related functions all do the right thing.
30
31 One might argue that all tunnels should just call skb_orphan(skb) before
32 transmitting the encapsulated packet into the new socket. But tunnels do
33 *not* do this -- and this is wisely avoided in skb_scrub_packet() too --
34 because features like TSQ rely on skb->destructor() being called when
35 that buffer space is truely available again. Calling skb_orphan(skb) too
36 early would result in buffers filling up unnecessarily and accounting
37 info being all wrong. Instead, additional routing must take into account
38 the new sk, just as __ip_queue_xmit() notes.
39
40 So, this commit addresses the problem by fishing the correct sk out of
41 state->sk -- it's already set properly in the call to nf_hook() in
42 __ip_local_out(), which receives the sk as part of its normal
43 functionality. So we make sure to plumb state->sk through the various
44 route_me_harder functions, and then make correct use of it following the
45 example of __ip_queue_xmit().
46
47 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
48 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
49 Reviewed-by: Florian Westphal <fw@strlen.de>
50 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
51 Signed-off-by: Sasha Levin <sashal@kernel.org>
52 ---
53 include/linux/netfilter_ipv4.h | 2 +-
54 include/linux/netfilter_ipv6.h | 10 +++++-----
55 net/ipv4/netfilter.c | 8 +++++---
56 net/ipv4/netfilter/iptable_mangle.c | 2 +-
57 net/ipv4/netfilter/nf_reject_ipv4.c | 2 +-
58 net/ipv6/netfilter.c | 6 +++---
59 net/ipv6/netfilter/ip6table_mangle.c | 2 +-
60 net/netfilter/ipvs/ip_vs_core.c | 4 ++--
61 net/netfilter/nf_nat_proto.c | 4 ++--
62 net/netfilter/nf_synproxy_core.c | 2 +-
63 net/netfilter/nft_chain_route.c | 4 ++--
64 net/netfilter/utils.c | 4 ++--
65 12 files changed, 26 insertions(+), 24 deletions(-)
66
67 diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
68 index 082e2c41b7ff9..5b70ca868bb19 100644
69 --- a/include/linux/netfilter_ipv4.h
70 +++ b/include/linux/netfilter_ipv4.h
71 @@ -16,7 +16,7 @@ struct ip_rt_info {
72 u_int32_t mark;
73 };
74
75 -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
76 +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type);
77
78 struct nf_queue_entry;
79
80 diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
81 index 9b67394471e1c..48314ade1506f 100644
82 --- a/include/linux/netfilter_ipv6.h
83 +++ b/include/linux/netfilter_ipv6.h
84 @@ -42,7 +42,7 @@ struct nf_ipv6_ops {
85 #if IS_MODULE(CONFIG_IPV6)
86 int (*chk_addr)(struct net *net, const struct in6_addr *addr,
87 const struct net_device *dev, int strict);
88 - int (*route_me_harder)(struct net *net, struct sk_buff *skb);
89 + int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb);
90 int (*dev_get_saddr)(struct net *net, const struct net_device *dev,
91 const struct in6_addr *daddr, unsigned int srcprefs,
92 struct in6_addr *saddr);
93 @@ -143,9 +143,9 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
94 #endif
95 }
96
97 -int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
98 +int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb);
99
100 -static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
101 +static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb)
102 {
103 #if IS_MODULE(CONFIG_IPV6)
104 const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
105 @@ -153,9 +153,9 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
106 if (!v6_ops)
107 return -EHOSTUNREACH;
108
109 - return v6_ops->route_me_harder(net, skb);
110 + return v6_ops->route_me_harder(net, sk, skb);
111 #elif IS_BUILTIN(CONFIG_IPV6)
112 - return ip6_route_me_harder(net, skb);
113 + return ip6_route_me_harder(net, sk, skb);
114 #else
115 return -EHOSTUNREACH;
116 #endif
117 diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
118 index a058213b77a78..7c841037c5334 100644
119 --- a/net/ipv4/netfilter.c
120 +++ b/net/ipv4/netfilter.c
121 @@ -17,17 +17,19 @@
122 #include <net/netfilter/nf_queue.h>
123
124 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
125 -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type)
126 +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type)
127 {
128 const struct iphdr *iph = ip_hdr(skb);
129 struct rtable *rt;
130 struct flowi4 fl4 = {};
131 __be32 saddr = iph->saddr;
132 - const struct sock *sk = skb_to_full_sk(skb);
133 - __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
134 + __u8 flags;
135 struct net_device *dev = skb_dst(skb)->dev;
136 unsigned int hh_len;
137
138 + sk = sk_to_full_sk(sk);
139 + flags = sk ? inet_sk_flowi_flags(sk) : 0;
140 +
141 if (addr_type == RTN_UNSPEC)
142 addr_type = inet_addr_type_dev_table(net, dev, saddr);
143 if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
144 diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
145 index f703a717ab1d2..8330795892730 100644
146 --- a/net/ipv4/netfilter/iptable_mangle.c
147 +++ b/net/ipv4/netfilter/iptable_mangle.c
148 @@ -62,7 +62,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
149 iph->daddr != daddr ||
150 skb->mark != mark ||
151 iph->tos != tos) {
152 - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
153 + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
154 if (err < 0)
155 ret = NF_DROP_ERR(err);
156 }
157 diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
158 index 9dcfa4e461b65..93b07739807b2 100644
159 --- a/net/ipv4/netfilter/nf_reject_ipv4.c
160 +++ b/net/ipv4/netfilter/nf_reject_ipv4.c
161 @@ -145,7 +145,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
162 ip4_dst_hoplimit(skb_dst(nskb)));
163 nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
164
165 - if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
166 + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
167 goto free_nskb;
168
169 niph = ip_hdr(nskb);
170 diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
171 index 6d0e942d082d4..ab9a279dd6d47 100644
172 --- a/net/ipv6/netfilter.c
173 +++ b/net/ipv6/netfilter.c
174 @@ -20,10 +20,10 @@
175 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
176 #include "../bridge/br_private.h"
177
178 -int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
179 +int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb)
180 {
181 const struct ipv6hdr *iph = ipv6_hdr(skb);
182 - struct sock *sk = sk_to_full_sk(skb->sk);
183 + struct sock *sk = sk_to_full_sk(sk_partial);
184 unsigned int hh_len;
185 struct dst_entry *dst;
186 int strict = (ipv6_addr_type(&iph->daddr) &
187 @@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
188 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
189 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
190 skb->mark != rt_info->mark)
191 - return ip6_route_me_harder(entry->state.net, skb);
192 + return ip6_route_me_harder(entry->state.net, entry->state.sk, skb);
193 }
194 return 0;
195 }
196 diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
197 index 1a2748611e003..cee74803d7a1c 100644
198 --- a/net/ipv6/netfilter/ip6table_mangle.c
199 +++ b/net/ipv6/netfilter/ip6table_mangle.c
200 @@ -57,7 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
201 skb->mark != mark ||
202 ipv6_hdr(skb)->hop_limit != hop_limit ||
203 flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
204 - err = ip6_route_me_harder(state->net, skb);
205 + err = ip6_route_me_harder(state->net, state->sk, skb);
206 if (err < 0)
207 ret = NF_DROP_ERR(err);
208 }
209 diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
210 index e3668a6e54e47..570d8ef6fb8b6 100644
211 --- a/net/netfilter/ipvs/ip_vs_core.c
212 +++ b/net/netfilter/ipvs/ip_vs_core.c
213 @@ -742,12 +742,12 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
214 struct dst_entry *dst = skb_dst(skb);
215
216 if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
217 - ip6_route_me_harder(ipvs->net, skb) != 0)
218 + ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0)
219 return 1;
220 } else
221 #endif
222 if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
223 - ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
224 + ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0)
225 return 1;
226
227 return 0;
228 diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
229 index 59151dc07fdc1..e87b6bd6b3cdb 100644
230 --- a/net/netfilter/nf_nat_proto.c
231 +++ b/net/netfilter/nf_nat_proto.c
232 @@ -715,7 +715,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
233
234 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
235 ct->tuplehash[!dir].tuple.src.u3.ip) {
236 - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
237 + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
238 if (err < 0)
239 ret = NF_DROP_ERR(err);
240 }
241 @@ -953,7 +953,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
242
243 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
244 &ct->tuplehash[!dir].tuple.src.u3)) {
245 - err = nf_ip6_route_me_harder(state->net, skb);
246 + err = nf_ip6_route_me_harder(state->net, state->sk, skb);
247 if (err < 0)
248 ret = NF_DROP_ERR(err);
249 }
250 diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
251 index 9cca35d229273..d7d34a62d3bf5 100644
252 --- a/net/netfilter/nf_synproxy_core.c
253 +++ b/net/netfilter/nf_synproxy_core.c
254 @@ -446,7 +446,7 @@ synproxy_send_tcp(struct net *net,
255
256 skb_dst_set_noref(nskb, skb_dst(skb));
257 nskb->protocol = htons(ETH_P_IP);
258 - if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
259 + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
260 goto free_nskb;
261
262 if (nfct) {
263 diff --git a/net/netfilter/nft_chain_route.c b/net/netfilter/nft_chain_route.c
264 index 8826bbe71136c..edd02cda57fca 100644
265 --- a/net/netfilter/nft_chain_route.c
266 +++ b/net/netfilter/nft_chain_route.c
267 @@ -42,7 +42,7 @@ static unsigned int nf_route_table_hook4(void *priv,
268 iph->daddr != daddr ||
269 skb->mark != mark ||
270 iph->tos != tos) {
271 - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
272 + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
273 if (err < 0)
274 ret = NF_DROP_ERR(err);
275 }
276 @@ -92,7 +92,7 @@ static unsigned int nf_route_table_hook6(void *priv,
277 skb->mark != mark ||
278 ipv6_hdr(skb)->hop_limit != hop_limit ||
279 flowlabel != *((u32 *)ipv6_hdr(skb)))) {
280 - err = nf_ip6_route_me_harder(state->net, skb);
281 + err = nf_ip6_route_me_harder(state->net, state->sk, skb);
282 if (err < 0)
283 ret = NF_DROP_ERR(err);
284 }
285 diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
286 index cedf47ab3c6f9..2182d361e273f 100644
287 --- a/net/netfilter/utils.c
288 +++ b/net/netfilter/utils.c
289 @@ -191,8 +191,8 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry
290 skb->mark == rt_info->mark &&
291 iph->daddr == rt_info->daddr &&
292 iph->saddr == rt_info->saddr))
293 - return ip_route_me_harder(entry->state.net, skb,
294 - RTN_UNSPEC);
295 + return ip_route_me_harder(entry->state.net, entry->state.sk,
296 + skb, RTN_UNSPEC);
297 }
298 #endif
299 return 0;
300 --
301 2.27.0
302