git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob

   1 From 31116e2cc4380003461e8e9f6703e157b7bc79d4 Mon Sep 17 00:00:00 2001
   2 From: Sasha Levin <sashal@kernel.org>
   3 Date: Thu, 29 Oct 2020 03:56:06 +0100
   4 Subject: netfilter: use actual socket sk rather than skb sk when routing
   5  harder
   6
   7 From: Jason A. Donenfeld <Jason@zx2c4.com>
   8
   9 [ Upstream commit 46d6c5ae953cc0be38efd0e469284df7c4328cf8 ]
  10
  11 If netfilter changes the packet mark when mangling, the packet is
  12 rerouted using the route_me_harder set of functions. Prior to this
  13 commit, there's one big difference between route_me_harder and the
  14 ordinary initial routing functions, described in the comment above
  15 __ip_queue_xmit():
  16
  17    /* Note: skb->sk can be different from sk, in case of tunnels */
  18    int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
  19
  20 That function goes on to correctly make use of sk->sk_bound_dev_if,
  21 rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a
  22 tunnel will receive a packet in ndo_start_xmit with an initial skb->sk.
  23 It will make some transformations to that packet, and then it will send
  24 the encapsulated packet out of a *new* socket. That new socket will
  25 basically always have a different sk_bound_dev_if (otherwise there'd be
  26 a routing loop). So for the purposes of routing the encapsulated packet,
  27 the routing information as it pertains to the socket should come from
  28 that socket's sk, rather than the packet's original skb->sk. For that
  29 reason __ip_queue_xmit() and related functions all do the right thing.
  30
  31 One might argue that all tunnels should just call skb_orphan(skb) before
  32 transmitting the encapsulated packet into the new socket. But tunnels do
  33 *not* do this -- and this is wisely avoided in skb_scrub_packet() too --
  34 because features like TSQ rely on skb->destructor() being called when
  35 that buffer space is truely available again. Calling skb_orphan(skb) too
  36 early would result in buffers filling up unnecessarily and accounting
  37 info being all wrong. Instead, additional routing must take into account
  38 the new sk, just as __ip_queue_xmit() notes.
  39
  40 So, this commit addresses the problem by fishing the correct sk out of
  41 state->sk -- it's already set properly in the call to nf_hook() in
  42 __ip_local_out(), which receives the sk as part of its normal
  43 functionality. So we make sure to plumb state->sk through the various
  44 route_me_harder functions, and then make correct use of it following the
  45 example of __ip_queue_xmit().
  46
  47 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
  48 Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
  49 Reviewed-by: Florian Westphal <fw@strlen.de>
  50 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  51 Signed-off-by: Sasha Levin <sashal@kernel.org>
  52 ---
  53  include/linux/netfilter_ipv4.h       |  2 +-
  54  include/linux/netfilter_ipv6.h       | 10 +++++-----
  55  net/ipv4/netfilter.c                 |  8 +++++---
  56  net/ipv4/netfilter/iptable_mangle.c  |  2 +-
  57  net/ipv4/netfilter/nf_reject_ipv4.c  |  2 +-
  58  net/ipv6/netfilter.c                 |  6 +++---
  59  net/ipv6/netfilter/ip6table_mangle.c |  2 +-
  60  net/netfilter/ipvs/ip_vs_core.c      |  4 ++--
  61  net/netfilter/nf_nat_proto.c         |  4 ++--
  62  net/netfilter/nf_synproxy_core.c     |  2 +-
  63  net/netfilter/nft_chain_route.c      |  4 ++--
  64  net/netfilter/utils.c                |  4 ++--
  65  12 files changed, 26 insertions(+), 24 deletions(-)
  66
  67 diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
  68 index 082e2c41b7ff9..5b70ca868bb19 100644
  69 --- a/include/linux/netfilter_ipv4.h
  70 +++ b/include/linux/netfilter_ipv4.h
  71 @@ -16,7 +16,7 @@ struct ip_rt_info {
  72         u_int32_t mark;
  73  };
  74
  75 -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
  76 +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type);
  77
  78  struct nf_queue_entry;
  79
  80 diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
  81 index 9b67394471e1c..48314ade1506f 100644
  82 --- a/include/linux/netfilter_ipv6.h
  83 +++ b/include/linux/netfilter_ipv6.h
  84 @@ -42,7 +42,7 @@ struct nf_ipv6_ops {
  85  #if IS_MODULE(CONFIG_IPV6)
  86         int (*chk_addr)(struct net *net, const struct in6_addr *addr,
  87                         const struct net_device *dev, int strict);
  88 -       int (*route_me_harder)(struct net *net, struct sk_buff *skb);
  89 +       int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb);
  90         int (*dev_get_saddr)(struct net *net, const struct net_device *dev,
  91                        const struct in6_addr *daddr, unsigned int srcprefs,
  92                        struct in6_addr *saddr);
  93 @@ -143,9 +143,9 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
  94  #endif
  95  }
  96
  97 -int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
  98 +int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb);
  99
 100 -static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 101 +static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb)
 102  {
 103  #if IS_MODULE(CONFIG_IPV6)
 104         const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
 105 @@ -153,9 +153,9 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 106         if (!v6_ops)
 107                 return -EHOSTUNREACH;
 108
 109 -       return v6_ops->route_me_harder(net, skb);
 110 +       return v6_ops->route_me_harder(net, sk, skb);
 111  #elif IS_BUILTIN(CONFIG_IPV6)
 112 -       return ip6_route_me_harder(net, skb);
 113 +       return ip6_route_me_harder(net, sk, skb);
 114  #else
 115         return -EHOSTUNREACH;
 116  #endif
 117 diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
 118 index a058213b77a78..7c841037c5334 100644
 119 --- a/net/ipv4/netfilter.c
 120 +++ b/net/ipv4/netfilter.c
 121 @@ -17,17 +17,19 @@
 122  #include <net/netfilter/nf_queue.h>
 123
 124  /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 125 -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type)
 126 +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type)
 127  {
 128         const struct iphdr *iph = ip_hdr(skb);
 129         struct rtable *rt;
 130         struct flowi4 fl4 = {};
 131         __be32 saddr = iph->saddr;
 132 -       const struct sock *sk = skb_to_full_sk(skb);
 133 -       __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
 134 +       __u8 flags;
 135         struct net_device *dev = skb_dst(skb)->dev;
 136         unsigned int hh_len;
 137
 138 +       sk = sk_to_full_sk(sk);
 139 +       flags = sk ? inet_sk_flowi_flags(sk) : 0;
 140 +
 141         if (addr_type == RTN_UNSPEC)
 142                 addr_type = inet_addr_type_dev_table(net, dev, saddr);
 143         if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
 144 diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
 145 index bb9266ea37858..ae45bcdd335ea 100644
 146 --- a/net/ipv4/netfilter/iptable_mangle.c
 147 +++ b/net/ipv4/netfilter/iptable_mangle.c
 148 @@ -62,7 +62,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 149                     iph->daddr != daddr ||
 150                     skb->mark != mark ||
 151                     iph->tos != tos) {
 152 -                       err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
 153 +                       err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
 154                         if (err < 0)
 155                                 ret = NF_DROP_ERR(err);
 156                 }
 157 diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
 158 index 2361fdac2c438..57817313a85c1 100644
 159 --- a/net/ipv4/netfilter/nf_reject_ipv4.c
 160 +++ b/net/ipv4/netfilter/nf_reject_ipv4.c
 161 @@ -127,7 +127,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
 162                                    ip4_dst_hoplimit(skb_dst(nskb)));
 163         nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
 164
 165 -       if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
 166 +       if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
 167                 goto free_nskb;
 168
 169         niph = ip_hdr(nskb);
 170 diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
 171 index 6d0e942d082d4..ab9a279dd6d47 100644
 172 --- a/net/ipv6/netfilter.c
 173 +++ b/net/ipv6/netfilter.c
 174 @@ -20,10 +20,10 @@
 175  #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 176  #include "../bridge/br_private.h"
 177
 178 -int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 179 +int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb)
 180  {
 181         const struct ipv6hdr *iph = ipv6_hdr(skb);
 182 -       struct sock *sk = sk_to_full_sk(skb->sk);
 183 +       struct sock *sk = sk_to_full_sk(sk_partial);
 184         unsigned int hh_len;
 185         struct dst_entry *dst;
 186         int strict = (ipv6_addr_type(&iph->daddr) &
 187 @@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
 188                 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 189                     !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
 190                     skb->mark != rt_info->mark)
 191 -                       return ip6_route_me_harder(entry->state.net, skb);
 192 +                       return ip6_route_me_harder(entry->state.net, entry->state.sk, skb);
 193         }
 194         return 0;
 195  }
 196 diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
 197 index 070afb97fa2ba..401e8dcb2c84b 100644
 198 --- a/net/ipv6/netfilter/ip6table_mangle.c
 199 +++ b/net/ipv6/netfilter/ip6table_mangle.c
 200 @@ -57,7 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 201              skb->mark != mark ||
 202              ipv6_hdr(skb)->hop_limit != hop_limit ||
 203              flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
 204 -               err = ip6_route_me_harder(state->net, skb);
 205 +               err = ip6_route_me_harder(state->net, state->sk, skb);
 206                 if (err < 0)
 207                         ret = NF_DROP_ERR(err);
 208         }
 209 diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
 210 index 64a05906cc0e6..89aa1fc334b19 100644
 211 --- a/net/netfilter/ipvs/ip_vs_core.c
 212 +++ b/net/netfilter/ipvs/ip_vs_core.c
 213 @@ -748,12 +748,12 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
 214                 struct dst_entry *dst = skb_dst(skb);
 215
 216                 if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
 217 -                   ip6_route_me_harder(ipvs->net, skb) != 0)
 218 +                   ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0)
 219                         return 1;
 220         } else
 221  #endif
 222                 if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
 223 -                   ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
 224 +                   ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0)
 225                         return 1;
 226
 227         return 0;
 228 diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
 229 index 59151dc07fdc1..e87b6bd6b3cdb 100644
 230 --- a/net/netfilter/nf_nat_proto.c
 231 +++ b/net/netfilter/nf_nat_proto.c
 232 @@ -715,7 +715,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
 233
 234                 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
 235                     ct->tuplehash[!dir].tuple.src.u3.ip) {
 236 -                       err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
 237 +                       err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
 238                         if (err < 0)
 239                                 ret = NF_DROP_ERR(err);
 240                 }
 241 @@ -953,7 +953,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
 242
 243                 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
 244                                       &ct->tuplehash[!dir].tuple.src.u3)) {
 245 -                       err = nf_ip6_route_me_harder(state->net, skb);
 246 +                       err = nf_ip6_route_me_harder(state->net, state->sk, skb);
 247                         if (err < 0)
 248                                 ret = NF_DROP_ERR(err);
 249                 }
 250 diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
 251 index b9cbe1e2453e8..4bb4cfde28b47 100644
 252 --- a/net/netfilter/nf_synproxy_core.c
 253 +++ b/net/netfilter/nf_synproxy_core.c
 254 @@ -446,7 +446,7 @@ synproxy_send_tcp(struct net *net,
 255
 256         skb_dst_set_noref(nskb, skb_dst(skb));
 257         nskb->protocol = htons(ETH_P_IP);
 258 -       if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
 259 +       if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
 260                 goto free_nskb;
 261
 262         if (nfct) {
 263 diff --git a/net/netfilter/nft_chain_route.c b/net/netfilter/nft_chain_route.c
 264 index 8826bbe71136c..edd02cda57fca 100644
 265 --- a/net/netfilter/nft_chain_route.c
 266 +++ b/net/netfilter/nft_chain_route.c
 267 @@ -42,7 +42,7 @@ static unsigned int nf_route_table_hook4(void *priv,
 268                     iph->daddr != daddr ||
 269                     skb->mark != mark ||
 270                     iph->tos != tos) {
 271 -                       err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
 272 +                       err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
 273                         if (err < 0)
 274                                 ret = NF_DROP_ERR(err);
 275                 }
 276 @@ -92,7 +92,7 @@ static unsigned int nf_route_table_hook6(void *priv,
 277              skb->mark != mark ||
 278              ipv6_hdr(skb)->hop_limit != hop_limit ||
 279              flowlabel != *((u32 *)ipv6_hdr(skb)))) {
 280 -               err = nf_ip6_route_me_harder(state->net, skb);
 281 +               err = nf_ip6_route_me_harder(state->net, state->sk, skb);
 282                 if (err < 0)
 283                         ret = NF_DROP_ERR(err);
 284         }
 285 diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
 286 index 51b454d8fa9c9..924195861faf7 100644
 287 --- a/net/netfilter/utils.c
 288 +++ b/net/netfilter/utils.c
 289 @@ -191,8 +191,8 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry
 290                       skb->mark == rt_info->mark &&
 291                       iph->daddr == rt_info->daddr &&
 292                       iph->saddr == rt_info->saddr))
 293 -                       return ip_route_me_harder(entry->state.net, skb,
 294 -                                                 RTN_UNSPEC);
 295 +                       return ip_route_me_harder(entry->state.net, entry->state.sk,
 296 +                                                 skb, RTN_UNSPEC);
 297         }
 298  #endif
 299         return 0;
 300 --
 301 2.27.0
 302