]>
Commit | Line | Data |
---|---|---|
ff3875fa GKH |
1 | From foo@baz Sun Dec 31 11:12:48 CET 2017 |
2 | From: Shaohua Li <shli@fb.com> | |
3 | Date: Wed, 20 Dec 2017 12:10:21 -0800 | |
4 | Subject: net: reevalulate autoflowlabel setting after sysctl setting | |
5 | ||
6 | From: Shaohua Li <shli@fb.com> | |
7 | ||
8 | ||
9 | [ Upstream commit 513674b5a2c9c7a67501506419da5c3c77ac6f08 ] | |
10 | ||
11 | sysctl.ip6.auto_flowlabels is default 1. In our hosts, we set it to 2. | |
12 | If sockopt doesn't set autoflowlabel, outcome packets from the hosts are | |
13 | supposed to not include flowlabel. This is true for normal packet, but | |
14 | not for reset packet. | |
15 | ||
16 | The reason is ipv6_pinfo.autoflowlabel is set in sock creation. Later if | |
17 | we change sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't | |
18 | changed, so the sock will keep the old behavior in terms of auto | |
19 | flowlabel. Reset packet is suffering from this problem, because reset | |
20 | packet is sent from a special control socket, which is created at boot | |
21 | time. Since sysctl.ipv6.auto_flowlabels is 1 by default, the control | |
22 | socket will always have its ipv6_pinfo.autoflowlabel set, even after | |
23 | user set sysctl.ipv6.auto_flowlabels to 1, so reset packset will always | |
24 | have flowlabel. Normal sock created before sysctl setting suffers from | |
25 | the same issue. We can't even turn off autoflowlabel unless we kill all | |
26 | socks in the hosts. | |
27 | ||
28 | To fix this, if IPV6_AUTOFLOWLABEL sockopt is used, we use the | |
29 | autoflowlabel setting from user, otherwise we always call | |
30 | ip6_default_np_autolabel() which has the new settings of sysctl. | |
31 | ||
32 | Note, this changes behavior a little bit. Before commit 42240901f7c4 | |
33 | (ipv6: Implement different admin modes for automatic flow labels), the | |
34 | autoflowlabel behavior of a sock isn't sticky, eg, if sysctl changes, | |
35 | existing connection will change autoflowlabel behavior. After that | |
36 | commit, autoflowlabel behavior is sticky in the whole life of the sock. | |
37 | With this patch, the behavior isn't sticky again. | |
38 | ||
39 | Cc: Martin KaFai Lau <kafai@fb.com> | |
40 | Cc: Eric Dumazet <eric.dumazet@gmail.com> | |
41 | Cc: Tom Herbert <tom@quantonium.net> | |
42 | Signed-off-by: Shaohua Li <shli@fb.com> | |
43 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
44 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
45 | --- | |
46 | include/linux/ipv6.h | 3 ++- | |
47 | net/ipv6/af_inet6.c | 1 - | |
48 | net/ipv6/ip6_output.c | 12 ++++++++++-- | |
49 | net/ipv6/ipv6_sockglue.c | 1 + | |
50 | 4 files changed, 13 insertions(+), 4 deletions(-) | |
51 | ||
52 | --- a/include/linux/ipv6.h | |
53 | +++ b/include/linux/ipv6.h | |
54 | @@ -272,7 +272,8 @@ struct ipv6_pinfo { | |
55 | * 100: prefer care-of address | |
56 | */ | |
57 | dontfrag:1, | |
58 | - autoflowlabel:1; | |
59 | + autoflowlabel:1, | |
60 | + autoflowlabel_set:1; | |
61 | __u8 min_hopcount; | |
62 | __u8 tclass; | |
63 | __be32 rcv_flowinfo; | |
64 | --- a/net/ipv6/af_inet6.c | |
65 | +++ b/net/ipv6/af_inet6.c | |
66 | @@ -210,7 +210,6 @@ lookup_protocol: | |
67 | np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; | |
68 | np->mc_loop = 1; | |
69 | np->pmtudisc = IPV6_PMTUDISC_WANT; | |
70 | - np->autoflowlabel = ip6_default_np_autolabel(net); | |
71 | np->repflow = net->ipv6.sysctl.flowlabel_reflect; | |
72 | sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; | |
73 | ||
74 | --- a/net/ipv6/ip6_output.c | |
75 | +++ b/net/ipv6/ip6_output.c | |
76 | @@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct s | |
77 | !(IP6CB(skb)->flags & IP6SKB_REROUTED)); | |
78 | } | |
79 | ||
80 | +static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) | |
81 | +{ | |
82 | + if (!np->autoflowlabel_set) | |
83 | + return ip6_default_np_autolabel(net); | |
84 | + else | |
85 | + return np->autoflowlabel; | |
86 | +} | |
87 | + | |
88 | /* | |
89 | * xmit an sk_buff (used by TCP, SCTP and DCCP) | |
90 | * Note : socket lock is not held for SYNACK packets, but might be modified | |
91 | @@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, stru | |
92 | hlimit = ip6_dst_hoplimit(dst); | |
93 | ||
94 | ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, | |
95 | - np->autoflowlabel, fl6)); | |
96 | + ip6_autoflowlabel(net, np), fl6)); | |
97 | ||
98 | hdr->payload_len = htons(seg_len); | |
99 | hdr->nexthdr = proto; | |
100 | @@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct so | |
101 | ||
102 | ip6_flow_hdr(hdr, v6_cork->tclass, | |
103 | ip6_make_flowlabel(net, skb, fl6->flowlabel, | |
104 | - np->autoflowlabel, fl6)); | |
105 | + ip6_autoflowlabel(net, np), fl6)); | |
106 | hdr->hop_limit = v6_cork->hop_limit; | |
107 | hdr->nexthdr = proto; | |
108 | hdr->saddr = fl6->saddr; | |
109 | --- a/net/ipv6/ipv6_sockglue.c | |
110 | +++ b/net/ipv6/ipv6_sockglue.c | |
111 | @@ -878,6 +878,7 @@ pref_skip_coa: | |
112 | break; | |
113 | case IPV6_AUTOFLOWLABEL: | |
114 | np->autoflowlabel = valbool; | |
115 | + np->autoflowlabel_set = 1; | |
116 | retv = 0; | |
117 | break; | |
118 | case IPV6_RECVFRAGSIZE: |