]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-4.19/ipv6-fix-src-addr-routing-with-the-exception-table.patch
f5e21f910bae7735bad40e73ee1cfc73be149f45
[thirdparty/kernel/stable-queue.git] / queue-4.19 / ipv6-fix-src-addr-routing-with-the-exception-table.patch
1 From foo@baz Wed 22 May 2019 08:37:51 AM CEST
2 From: Wei Wang <weiwan@google.com>
3 Date: Thu, 16 May 2019 13:30:54 -0700
4 Subject: ipv6: fix src addr routing with the exception table
5
6 From: Wei Wang <weiwan@google.com>
7
8 [ Upstream commit 510e2ceda031eed97a7a0f9aad65d271a58b460d ]
9
10 When inserting route cache into the exception table, the key is
11 generated with both src_addr and dest_addr with src addr routing.
12 However, current logic always assumes the src_addr used to generate the
13 key is a /128 host address. This is not true in the following scenarios:
14 1. When the route is a gateway route or does not have next hop.
15 (rt6_is_gw_or_nonexthop() == false)
16 2. When calling ip6_rt_cache_alloc(), saddr is passed in as NULL.
17 This means, when looking for a route cache in the exception table, we
18 have to do the lookup twice: first time with the passed in /128 host
19 address, second time with the src_addr stored in fib6_info.
20
21 This solves the pmtu discovery issue reported by Mikael Magnusson where
22 a route cache with a lower mtu info is created for a gateway route with
23 src addr. However, the lookup code is not able to find this route cache.
24
25 Fixes: 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache")
26 Reported-by: Mikael Magnusson <mikael.kernel@lists.m7n.se>
27 Bisected-by: David Ahern <dsahern@gmail.com>
28 Signed-off-by: Wei Wang <weiwan@google.com>
29 Cc: Martin Lau <kafai@fb.com>
30 Cc: Eric Dumazet <edumazet@google.com>
31 Acked-by: Martin KaFai Lau <kafai@fb.com>
32 Signed-off-by: David S. Miller <davem@davemloft.net>
33 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
34 ---
35 net/ipv6/route.c | 51 +++++++++++++++++++++++++++------------------------
36 1 file changed, 27 insertions(+), 24 deletions(-)
37
38 --- a/net/ipv6/route.c
39 +++ b/net/ipv6/route.c
40 @@ -110,8 +110,8 @@ static int rt6_fill_node(struct net *net
41 int iif, int type, u32 portid, u32 seq,
42 unsigned int flags);
43 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
44 - struct in6_addr *daddr,
45 - struct in6_addr *saddr);
46 + const struct in6_addr *daddr,
47 + const struct in6_addr *saddr);
48
49 #ifdef CONFIG_IPV6_ROUTE_INFO
50 static struct fib6_info *rt6_add_route_info(struct net *net,
51 @@ -1542,31 +1542,44 @@ out:
52 * Caller has to hold rcu_read_lock()
53 */
54 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
55 - struct in6_addr *daddr,
56 - struct in6_addr *saddr)
57 + const struct in6_addr *daddr,
58 + const struct in6_addr *saddr)
59 {
60 + const struct in6_addr *src_key = NULL;
61 struct rt6_exception_bucket *bucket;
62 - struct in6_addr *src_key = NULL;
63 struct rt6_exception *rt6_ex;
64 struct rt6_info *res = NULL;
65
66 - bucket = rcu_dereference(rt->rt6i_exception_bucket);
67 -
68 #ifdef CONFIG_IPV6_SUBTREES
69 /* rt6i_src.plen != 0 indicates rt is in subtree
70 * and exception table is indexed by a hash of
71 * both rt6i_dst and rt6i_src.
72 - * Otherwise, the exception table is indexed by
73 - * a hash of only rt6i_dst.
74 + * However, the src addr used to create the hash
75 + * might not be exactly the passed in saddr which
76 + * is a /128 addr from the flow.
77 + * So we need to use f6i->fib6_src to redo lookup
78 + * if the passed in saddr does not find anything.
79 + * (See the logic in ip6_rt_cache_alloc() on how
80 + * rt->rt6i_src is updated.)
81 */
82 if (rt->fib6_src.plen)
83 src_key = saddr;
84 +find_ex:
85 #endif
86 + bucket = rcu_dereference(rt->rt6i_exception_bucket);
87 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
88
89 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
90 res = rt6_ex->rt6i;
91
92 +#ifdef CONFIG_IPV6_SUBTREES
93 + /* Use fib6_src as src_key and redo lookup */
94 + if (!res && src_key && src_key != &rt->fib6_src.addr) {
95 + src_key = &rt->fib6_src.addr;
96 + goto find_ex;
97 + }
98 +#endif
99 +
100 return res;
101 }
102
103 @@ -2650,10 +2663,8 @@ out:
104 u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
105 struct in6_addr *saddr)
106 {
107 - struct rt6_exception_bucket *bucket;
108 - struct rt6_exception *rt6_ex;
109 - struct in6_addr *src_key;
110 struct inet6_dev *idev;
111 + struct rt6_info *rt;
112 u32 mtu = 0;
113
114 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
115 @@ -2662,18 +2673,10 @@ u32 ip6_mtu_from_fib6(struct fib6_info *
116 goto out;
117 }
118
119 - src_key = NULL;
120 -#ifdef CONFIG_IPV6_SUBTREES
121 - if (f6i->fib6_src.plen)
122 - src_key = saddr;
123 -#endif
124 -
125 - bucket = rcu_dereference(f6i->rt6i_exception_bucket);
126 - rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
127 - if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
128 - mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
129 -
130 - if (likely(!mtu)) {
131 + rt = rt6_find_cached_rt(f6i, daddr, saddr);
132 + if (unlikely(rt)) {
133 + mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
134 + } else {
135 struct net_device *dev = fib6_info_nh_dev(f6i);
136
137 mtu = IPV6_MIN_MTU;