]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.20.17/ipv6-route-purge-exception-on-removal.patch
Linux 4.20.17
[thirdparty/kernel/stable-queue.git] / releases / 4.20.17 / ipv6-route-purge-exception-on-removal.patch
1 From foo@baz Thu Mar 14 23:20:15 PDT 2019
2 From: Paolo Abeni <pabeni@redhat.com>
3 Date: Wed, 20 Feb 2019 18:18:12 +0100
4 Subject: ipv6: route: purge exception on removal
5
6 From: Paolo Abeni <pabeni@redhat.com>
7
8 [ Upstream commit f5b51fe804ec2a6edce0f8f6b11ea57283f5857b ]
9
10 When a netdevice is unregistered, we flush the relevant exception
11 via rt6_sync_down_dev() -> fib6_ifdown() -> fib6_del() -> fib6_del_route().
12
13 Finally, we end-up calling rt6_remove_exception(), where we release
14 the relevant dst, while we keep the references to the related fib6_info and
15 dev. Such references should be released later when the dst will be
16 destroyed.
17
18 There are a number of caches that can keep the exception around for an
19 unlimited amount of time - namely dst_cache, possibly even socket cache.
20 As a result device registration may hang, as demonstrated by this script:
21
22 ip netns add cl
23 ip netns add rt
24 ip netns add srv
25 ip netns exec rt sysctl -w net.ipv6.conf.all.forwarding=1
26
27 ip link add name cl_veth type veth peer name cl_rt_veth
28 ip link set dev cl_veth netns cl
29 ip -n cl link set dev cl_veth up
30 ip -n cl addr add dev cl_veth 2001::2/64
31 ip -n cl route add default via 2001::1
32
33 ip -n cl link add tunv6 type ip6tnl mode ip6ip6 local 2001::2 remote 2002::1 hoplimit 64 dev cl_veth
34 ip -n cl link set tunv6 up
35 ip -n cl addr add 2013::2/64 dev tunv6
36
37 ip link set dev cl_rt_veth netns rt
38 ip -n rt link set dev cl_rt_veth up
39 ip -n rt addr add dev cl_rt_veth 2001::1/64
40
41 ip link add name rt_srv_veth type veth peer name srv_veth
42 ip link set dev srv_veth netns srv
43 ip -n srv link set dev srv_veth up
44 ip -n srv addr add dev srv_veth 2002::1/64
45 ip -n srv route add default via 2002::2
46
47 ip -n srv link add tunv6 type ip6tnl mode ip6ip6 local 2002::1 remote 2001::2 hoplimit 64 dev srv_veth
48 ip -n srv link set tunv6 up
49 ip -n srv addr add 2013::1/64 dev tunv6
50
51 ip link set dev rt_srv_veth netns rt
52 ip -n rt link set dev rt_srv_veth up
53 ip -n rt addr add dev rt_srv_veth 2002::2/64
54
55 ip netns exec srv netserver & sleep 0.1
56 ip netns exec cl ping6 -c 4 2013::1
57 ip netns exec cl netperf -H 2013::1 -t TCP_STREAM -l 3 & sleep 1
58 ip -n rt link set dev rt_srv_veth mtu 1400
59 wait %2
60
61 ip -n cl link del cl_veth
62
63 This commit addresses the issue purging all the references held by the
64 exception at time, as we currently do for e.g. ipv6 pcpu dst entries.
65
66 v1 -> v2:
67 - re-order the code to avoid accessing dst and net after dst_dev_put()
68
69 Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes")
70 Signed-off-by: Paolo Abeni <pabeni@redhat.com>
71 Reviewed-by: David Ahern <dsahern@gmail.com>
72 Signed-off-by: David S. Miller <davem@davemloft.net>
73 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
74 ---
75 net/ipv6/route.c | 13 ++++++++++++-
76 1 file changed, 12 insertions(+), 1 deletion(-)
77
78 --- a/net/ipv6/route.c
79 +++ b/net/ipv6/route.c
80 @@ -1272,18 +1272,29 @@ static DEFINE_SPINLOCK(rt6_exception_loc
81 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
82 struct rt6_exception *rt6_ex)
83 {
84 + struct fib6_info *from;
85 struct net *net;
86
87 if (!bucket || !rt6_ex)
88 return;
89
90 net = dev_net(rt6_ex->rt6i->dst.dev);
91 + net->ipv6.rt6_stats->fib_rt_cache--;
92 +
93 + /* purge completely the exception to allow releasing the held resources:
94 + * some [sk] cache may keep the dst around for unlimited time
95 + */
96 + from = rcu_dereference_protected(rt6_ex->rt6i->from,
97 + lockdep_is_held(&rt6_exception_lock));
98 + rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
99 + fib6_info_release(from);
100 + dst_dev_put(&rt6_ex->rt6i->dst);
101 +
102 hlist_del_rcu(&rt6_ex->hlist);
103 dst_release(&rt6_ex->rt6i->dst);
104 kfree_rcu(rt6_ex, rcu);
105 WARN_ON_ONCE(!bucket->depth);
106 bucket->depth--;
107 - net->ipv6.rt6_stats->fib_rt_cache--;
108 }
109
110 /* Remove oldest rt6_ex in bucket and free the memory