]>
Commit | Line | Data |
---|---|---|
bd6035bb GKH |
1 | From foo@baz Mon 17 Jun 2019 06:55:37 PM CEST |
2 | From: Eric Dumazet <edumazet@google.com> | |
3 | Date: Thu, 6 Jun 2019 09:15:31 -0700 | |
4 | Subject: tcp: add tcp_min_snd_mss sysctl | |
5 | ||
6 | From: Eric Dumazet <edumazet@google.com> | |
7 | ||
8 | commit 5f3e2bf008c2221478101ee72f5cb4654b9fc363 upstream. | |
9 | ||
10 | Some TCP peers announce a very small MSS option in their SYN and/or | |
11 | SYN/ACK messages. | |
12 | ||
13 | This forces the stack to send packets with a very high network/cpu | |
14 | overhead. | |
15 | ||
16 | Linux has enforced a minimal value of 48. Since this value includes | |
17 | the size of TCP options, and that the options can consume up to 40 | |
18 | bytes, this means that each segment can include only 8 bytes of payload. | |
19 | ||
20 | In some cases, it can be useful to increase the minimal value | |
21 | to a saner value. | |
22 | ||
23 | We still let the default to 48 (TCP_MIN_SND_MSS), for compatibility | |
24 | reasons. | |
25 | ||
26 | Note that TCP_MAXSEG socket option enforces a minimal value | |
27 | of (TCP_MIN_MSS). David Miller increased this minimal value | |
28 | in commit c39508d6f118 ("tcp: Make TCP_MAXSEG minimum more correct.") | |
29 | from 64 to 88. | |
30 | ||
31 | We might in the future merge TCP_MIN_SND_MSS and TCP_MIN_MSS. | |
32 | ||
33 | CVE-2019-11479 -- tcp mss hardcoded to 48 | |
34 | ||
35 | Signed-off-by: Eric Dumazet <edumazet@google.com> | |
36 | Suggested-by: Jonathan Looney <jtl@netflix.com> | |
37 | Acked-by: Neal Cardwell <ncardwell@google.com> | |
38 | Cc: Yuchung Cheng <ycheng@google.com> | |
39 | Cc: Tyler Hicks <tyhicks@canonical.com> | |
40 | Cc: Bruce Curtis <brucec@netflix.com> | |
41 | Cc: Jonathan Lemon <jonathan.lemon@gmail.com> | |
42 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
43 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
44 | --- | |
45 | Documentation/networking/ip-sysctl.txt | 8 ++++++++ | |
46 | include/net/netns/ipv4.h | 1 + | |
47 | net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++ | |
48 | net/ipv4/tcp_ipv4.c | 1 + | |
49 | net/ipv4/tcp_output.c | 3 +-- | |
50 | 5 files changed, 22 insertions(+), 2 deletions(-) | |
51 | ||
52 | --- a/Documentation/networking/ip-sysctl.txt | |
53 | +++ b/Documentation/networking/ip-sysctl.txt | |
54 | @@ -250,6 +250,14 @@ tcp_base_mss - INTEGER | |
55 | Path MTU discovery (MTU probing). If MTU probing is enabled, | |
56 | this is the initial MSS used by the connection. | |
57 | ||
58 | +tcp_min_snd_mss - INTEGER | |
59 | + TCP SYN and SYNACK messages usually advertise an ADVMSS option, | |
60 | + as described in RFC 1122 and RFC 6691. | |
61 | + If this ADVMSS option is smaller than tcp_min_snd_mss, | |
62 | + it is silently capped to tcp_min_snd_mss. | |
63 | + | |
64 | + Default : 48 (at least 8 bytes of payload per segment) | |
65 | + | |
66 | tcp_congestion_control - STRING | |
67 | Set the congestion control algorithm to be used for new | |
68 | connections. The algorithm "reno" is always available, but | |
69 | --- a/include/net/netns/ipv4.h | |
70 | +++ b/include/net/netns/ipv4.h | |
71 | @@ -114,6 +114,7 @@ struct netns_ipv4 { | |
72 | #endif | |
73 | int sysctl_tcp_mtu_probing; | |
74 | int sysctl_tcp_base_mss; | |
75 | + int sysctl_tcp_min_snd_mss; | |
76 | int sysctl_tcp_probe_threshold; | |
77 | u32 sysctl_tcp_probe_interval; | |
78 | ||
79 | --- a/net/ipv4/sysctl_net_ipv4.c | |
80 | +++ b/net/ipv4/sysctl_net_ipv4.c | |
81 | @@ -39,6 +39,8 @@ static int ip_local_port_range_min[] = { | |
82 | static int ip_local_port_range_max[] = { 65535, 65535 }; | |
83 | static int tcp_adv_win_scale_min = -31; | |
84 | static int tcp_adv_win_scale_max = 31; | |
85 | +static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; | |
86 | +static int tcp_min_snd_mss_max = 65535; | |
87 | static int ip_privileged_port_min; | |
88 | static int ip_privileged_port_max = 65535; | |
89 | static int ip_ttl_min = 1; | |
90 | @@ -738,6 +740,15 @@ static struct ctl_table ipv4_net_table[] | |
91 | .proc_handler = proc_dointvec, | |
92 | }, | |
93 | { | |
94 | + .procname = "tcp_min_snd_mss", | |
95 | + .data = &init_net.ipv4.sysctl_tcp_min_snd_mss, | |
96 | + .maxlen = sizeof(int), | |
97 | + .mode = 0644, | |
98 | + .proc_handler = proc_dointvec_minmax, | |
99 | + .extra1 = &tcp_min_snd_mss_min, | |
100 | + .extra2 = &tcp_min_snd_mss_max, | |
101 | + }, | |
102 | + { | |
103 | .procname = "tcp_probe_threshold", | |
104 | .data = &init_net.ipv4.sysctl_tcp_probe_threshold, | |
105 | .maxlen = sizeof(int), | |
106 | --- a/net/ipv4/tcp_ipv4.c | |
107 | +++ b/net/ipv4/tcp_ipv4.c | |
108 | @@ -2527,6 +2527,7 @@ static int __net_init tcp_sk_init(struct | |
109 | net->ipv4.sysctl_tcp_ecn_fallback = 1; | |
110 | ||
111 | net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; | |
112 | + net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; | |
113 | net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; | |
114 | net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; | |
115 | ||
116 | --- a/net/ipv4/tcp_output.c | |
117 | +++ b/net/ipv4/tcp_output.c | |
118 | @@ -1462,8 +1462,7 @@ static inline int __tcp_mtu_to_mss(struc | |
119 | mss_now -= icsk->icsk_ext_hdr_len; | |
120 | ||
121 | /* Then reserve room for full set of TCP options and 8 bytes of data */ | |
122 | - if (mss_now < TCP_MIN_SND_MSS) | |
123 | - mss_now = TCP_MIN_SND_MSS; | |
124 | + mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); | |
125 | return mss_now; | |
126 | } | |
127 |