1 From foo@baz Wed 22 May 2019 08:37:51 AM CEST
2 From: Wei Wang <weiwan@google.com>
3 Date: Thu, 16 May 2019 13:30:54 -0700
4 Subject: ipv6: fix src addr routing with the exception table
6 From: Wei Wang <weiwan@google.com>
8 [ Upstream commit 510e2ceda031eed97a7a0f9aad65d271a58b460d ]
10 When inserting route cache into the exception table, the key is
11 generated with both src_addr and dest_addr with src addr routing.
12 However, current logic always assumes the src_addr used to generate the
13 key is a /128 host address. This is not true in the following scenarios:
14 1. When the route is a gateway route or does not have next hop.
15 (rt6_is_gw_or_nonexthop() == false)
16 2. When calling ip6_rt_cache_alloc(), saddr is passed in as NULL.
17 This means, when looking for a route cache in the exception table, we
18 have to do the lookup twice: first time with the passed in /128 host
19 address, second time with the src_addr stored in fib6_info.
21 This solves the pmtu discovery issue reported by Mikael Magnusson where
22 a route cache with a lower mtu info is created for a gateway route with
23 src addr. However, the lookup code is not able to find this route cache.
25 Fixes: 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache")
26 Reported-by: Mikael Magnusson <mikael.kernel@lists.m7n.se>
27 Bisected-by: David Ahern <dsahern@gmail.com>
28 Signed-off-by: Wei Wang <weiwan@google.com>
29 Cc: Martin Lau <kafai@fb.com>
30 Cc: Eric Dumazet <edumazet@google.com>
31 Acked-by: Martin KaFai Lau <kafai@fb.com>
32 Signed-off-by: David S. Miller <davem@davemloft.net>
33 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
35 net/ipv6/route.c | 51 +++++++++++++++++++++++++++------------------------
36 1 file changed, 27 insertions(+), 24 deletions(-)
38 --- a/net/ipv6/route.c
39 +++ b/net/ipv6/route.c
40 @@ -110,8 +110,8 @@ static int rt6_fill_node(struct net *net
41 int iif, int type, u32 portid, u32 seq,
43 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
44 - struct in6_addr *daddr,
45 - struct in6_addr *saddr);
46 + const struct in6_addr *daddr,
47 + const struct in6_addr *saddr);
49 #ifdef CONFIG_IPV6_ROUTE_INFO
50 static struct fib6_info *rt6_add_route_info(struct net *net,
51 @@ -1542,31 +1542,44 @@ out:
52 * Caller has to hold rcu_read_lock()
54 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
55 - struct in6_addr *daddr,
56 - struct in6_addr *saddr)
57 + const struct in6_addr *daddr,
58 + const struct in6_addr *saddr)
60 + const struct in6_addr *src_key = NULL;
61 struct rt6_exception_bucket *bucket;
62 - struct in6_addr *src_key = NULL;
63 struct rt6_exception *rt6_ex;
64 struct rt6_info *res = NULL;
66 - bucket = rcu_dereference(rt->rt6i_exception_bucket);
68 #ifdef CONFIG_IPV6_SUBTREES
69 /* rt6i_src.plen != 0 indicates rt is in subtree
70 * and exception table is indexed by a hash of
71 * both rt6i_dst and rt6i_src.
72 - * Otherwise, the exception table is indexed by
73 - * a hash of only rt6i_dst.
74 + * However, the src addr used to create the hash
75 + * might not be exactly the passed in saddr which
76 + * is a /128 addr from the flow.
77 + * So we need to use f6i->fib6_src to redo lookup
78 + * if the passed in saddr does not find anything.
79 + * (See the logic in ip6_rt_cache_alloc() on how
80 + * rt->rt6i_src is updated.)
82 if (rt->fib6_src.plen)
86 + bucket = rcu_dereference(rt->rt6i_exception_bucket);
87 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
89 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
92 +#ifdef CONFIG_IPV6_SUBTREES
93 + /* Use fib6_src as src_key and redo lookup */
94 + if (!res && src_key && src_key != &rt->fib6_src.addr) {
95 + src_key = &rt->fib6_src.addr;
103 @@ -2650,10 +2663,8 @@ out:
104 u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
105 struct in6_addr *saddr)
107 - struct rt6_exception_bucket *bucket;
108 - struct rt6_exception *rt6_ex;
109 - struct in6_addr *src_key;
110 struct inet6_dev *idev;
111 + struct rt6_info *rt;
114 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
115 @@ -2662,18 +2673,10 @@ u32 ip6_mtu_from_fib6(struct fib6_info *
120 -#ifdef CONFIG_IPV6_SUBTREES
121 - if (f6i->fib6_src.plen)
125 - bucket = rcu_dereference(f6i->rt6i_exception_bucket);
126 - rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
127 - if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
128 - mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
130 - if (likely(!mtu)) {
131 + rt = rt6_find_cached_rt(f6i, daddr, saddr);
132 + if (unlikely(rt)) {
133 + mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
135 struct net_device *dev = fib6_info_nh_dev(f6i);