From: Greg Kroah-Hartman Date: Mon, 17 Jun 2019 17:48:14 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v5.1.11~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3dc1e02107df92b95d21354cb1128f9a8c365dd0;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: tcp-add-tcp_min_snd_mss-sysctl.patch tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch tcp-limit-payload-size-of-sacked-skbs.patch tcp-reduce-tcp_fastretrans_alert-verbosity.patch tcp-tcp_fragment-should-apply-sane-memory-limits.patch --- diff --git a/queue-4.14/tcp-add-tcp_min_snd_mss-sysctl.patch b/queue-4.14/tcp-add-tcp_min_snd_mss-sysctl.patch new file mode 100644 index 00000000000..f94965bd7dc --- /dev/null +++ b/queue-4.14/tcp-add-tcp_min_snd_mss-sysctl.patch @@ -0,0 +1,127 @@ +From foo@baz Mon 17 Jun 2019 06:56:41 PM CEST +From: Eric Dumazet +Date: Sat, 15 Jun 2019 17:44:24 -0700 +Subject: tcp: add tcp_min_snd_mss sysctl + +From: Eric Dumazet + +commit 5f3e2bf008c2221478101ee72f5cb4654b9fc363 upstream. + +Some TCP peers announce a very small MSS option in their SYN and/or +SYN/ACK messages. + +This forces the stack to send packets with a very high network/cpu +overhead. + +Linux has enforced a minimal value of 48. Since this value includes +the size of TCP options, and that the options can consume up to 40 +bytes, this means that each segment can include only 8 bytes of payload. + +In some cases, it can be useful to increase the minimal value +to a saner value. + +We still let the default to 48 (TCP_MIN_SND_MSS), for compatibility +reasons. + +Note that TCP_MAXSEG socket option enforces a minimal value +of (TCP_MIN_MSS). David Miller increased this minimal value +in commit c39508d6f118 ("tcp: Make TCP_MAXSEG minimum more correct.") +from 64 to 88. + +We might in the future merge TCP_MIN_SND_MSS and TCP_MIN_MSS. + +CVE-2019-11479 -- tcp mss hardcoded to 48 + +Signed-off-by: Eric Dumazet +Suggested-by: Jonathan Looney +Acked-by: Neal Cardwell +Cc: Yuchung Cheng +Cc: Tyler Hicks +Cc: Bruce Curtis +Cc: Jonathan Lemon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/networking/ip-sysctl.txt | 8 ++++++++ + include/net/netns/ipv4.h | 1 + + net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++ + net/ipv4/tcp_ipv4.c | 1 + + net/ipv4/tcp_output.c | 3 +-- + 5 files changed, 22 insertions(+), 2 deletions(-) + +--- a/Documentation/networking/ip-sysctl.txt ++++ b/Documentation/networking/ip-sysctl.txt +@@ -241,6 +241,14 @@ tcp_base_mss - INTEGER + Path MTU discovery (MTU probing). If MTU probing is enabled, + this is the initial MSS used by the connection. + ++tcp_min_snd_mss - INTEGER ++ TCP SYN and SYNACK messages usually advertise an ADVMSS option, ++ as described in RFC 1122 and RFC 6691. ++ If this ADVMSS option is smaller than tcp_min_snd_mss, ++ it is silently capped to tcp_min_snd_mss. ++ ++ Default : 48 (at least 8 bytes of payload per segment) ++ + tcp_congestion_control - STRING + Set the congestion control algorithm to be used for new + connections. The algorithm "reno" is always available, but +--- a/include/net/netns/ipv4.h ++++ b/include/net/netns/ipv4.h +@@ -107,6 +107,7 @@ struct netns_ipv4 { + #endif + int sysctl_tcp_mtu_probing; + int sysctl_tcp_base_mss; ++ int sysctl_tcp_min_snd_mss; + int sysctl_tcp_probe_threshold; + u32 sysctl_tcp_probe_interval; + +--- a/net/ipv4/sysctl_net_ipv4.c ++++ b/net/ipv4/sysctl_net_ipv4.c +@@ -37,6 +37,8 @@ static int ip_local_port_range_min[] = { + static int ip_local_port_range_max[] = { 65535, 65535 }; + static int tcp_adv_win_scale_min = -31; + static int tcp_adv_win_scale_max = 31; ++static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; ++static int tcp_min_snd_mss_max = 65535; + static int ip_privileged_port_min; + static int ip_privileged_port_max = 65535; + static int ip_ttl_min = 1; +@@ -944,6 +946,15 @@ static struct ctl_table ipv4_net_table[] + .proc_handler = proc_dointvec, + }, + { ++ .procname = "tcp_min_snd_mss", ++ .data = &init_net.ipv4.sysctl_tcp_min_snd_mss, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = &tcp_min_snd_mss_min, ++ .extra2 = &tcp_min_snd_mss_max, ++ }, ++ { + .procname = "tcp_probe_threshold", + .data = &init_net.ipv4.sysctl_tcp_probe_threshold, + .maxlen = sizeof(int), +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -2477,6 +2477,7 @@ static int __net_init tcp_sk_init(struct + net->ipv4.sysctl_tcp_ecn_fallback = 1; + + net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; ++ net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; + net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; + net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1447,8 +1447,7 @@ static inline int __tcp_mtu_to_mss(struc + mss_now -= icsk->icsk_ext_hdr_len; + + /* Then reserve room for full set of TCP options and 8 bytes of data */ +- if (mss_now < TCP_MIN_SND_MSS) +- mss_now = TCP_MIN_SND_MSS; ++ mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); + return mss_now; + } + diff --git a/queue-4.14/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch b/queue-4.14/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch new file mode 100644 index 00000000000..5336e9e83c2 --- /dev/null +++ b/queue-4.14/tcp-enforce-tcp_min_snd_mss-in-tcp_mtu_probing.patch @@ -0,0 +1,40 @@ +From foo@baz Mon 17 Jun 2019 06:56:41 PM CEST +From: Eric Dumazet +Date: Sat, 15 Jun 2019 17:47:27 -0700 +Subject: tcp: enforce tcp_min_snd_mss in tcp_mtu_probing() + +From: Eric Dumazet + +commit 967c05aee439e6e5d7d805e195b3a20ef5c433d6 upstream. + +If mtu probing is enabled tcp_mtu_probing() could very well end up +with a too small MSS. + +Use the new sysctl tcp_min_snd_mss to make sure MSS search +is performed in an acceptable range. + +CVE-2019-11479 -- tcp mss hardcoded to 48 + +Signed-off-by: Eric Dumazet +Reported-by: Jonathan Lemon +Cc: Jonathan Looney +Acked-by: Neal Cardwell +Cc: Yuchung Cheng +Cc: Tyler Hicks +Cc: Bruce Curtis +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_timer.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -141,6 +141,7 @@ static void tcp_mtu_probing(struct inet_ + mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; + mss = min(net->ipv4.sysctl_tcp_base_mss, mss); + mss = max(mss, 68 - tp->tcp_header_len); ++ mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); + } diff --git a/queue-4.14/tcp-limit-payload-size-of-sacked-skbs.patch b/queue-4.14/tcp-limit-payload-size-of-sacked-skbs.patch new file mode 100644 index 00000000000..64300c7ab4b --- /dev/null +++ b/queue-4.14/tcp-limit-payload-size-of-sacked-skbs.patch @@ -0,0 +1,182 @@ +From foo@baz Mon 17 Jun 2019 06:56:41 PM CEST +From: Eric Dumazet +Date: Sat, 15 Jun 2019 17:31:03 -0700 +Subject: tcp: limit payload size of sacked skbs + +From: Eric Dumazet + +commit 3b4929f65b0d8249f19a50245cd88ed1a2f78cff upstream. + +Jonathan Looney reported that TCP can trigger the following crash +in tcp_shifted_skb() : + + BUG_ON(tcp_skb_pcount(skb) < pcount); + +This can happen if the remote peer has advertized the smallest +MSS that linux TCP accepts : 48 + +An skb can hold 17 fragments, and each fragment can hold 32KB +on x86, or 64KB on PowerPC. + +This means that the 16bit witdh of TCP_SKB_CB(skb)->tcp_gso_segs +can overflow. + +Note that tcp_sendmsg() builds skbs with less than 64KB +of payload, so this problem needs SACK to be enabled. +SACK blocks allow TCP to coalesce multiple skbs in the retransmit +queue, thus filling the 17 fragments to maximal capacity. + +CVE-2019-11477 -- u16 overflow of TCP_SKB_CB(skb)->tcp_gso_segs + +Backport notes, provided by Joao Martins + +v4.15 or since commit 737ff314563 ("tcp: use sequence distance to +detect reordering") had switched from the packet-based FACK tracking and +switched to sequence-based. + +v4.14 and older still have the old logic and hence on +tcp_skb_shift_data() needs to retain its original logic and have +@fack_count in sync. In other words, we keep the increment of pcount with +tcp_skb_pcount(skb) to later used that to update fack_count. To make it +more explicit we track the new skb that gets incremented to pcount in +@next_pcount, and we get to avoid the constant invocation of +tcp_skb_pcount(skb) all together. + +Fixes: 832d11c5cd07 ("tcp: Try to restore large SKBs while SACK processing") +Signed-off-by: Eric Dumazet +Reported-by: Jonathan Looney +Acked-by: Neal Cardwell +Reviewed-by: Tyler Hicks +Cc: Yuchung Cheng +Cc: Bruce Curtis +Cc: Jonathan Lemon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/tcp.h | 4 ++++ + include/net/tcp.h | 2 ++ + net/ipv4/tcp.c | 1 + + net/ipv4/tcp_input.c | 28 ++++++++++++++++++++++------ + net/ipv4/tcp_output.c | 6 +++--- + 5 files changed, 32 insertions(+), 9 deletions(-) + +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -450,4 +450,8 @@ static inline u16 tcp_mss_clamp(const st + + return (user_mss && user_mss < mss) ? user_mss : mss; + } ++ ++int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, ++ int shiftlen); ++ + #endif /* _LINUX_TCP_H */ +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -57,6 +57,8 @@ void tcp_time_wait(struct sock *sk, int + + #define MAX_TCP_HEADER (128 + MAX_HEADER) + #define MAX_TCP_OPTION_SPACE 40 ++#define TCP_MIN_SND_MSS 48 ++#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) + + /* + * Never offer a window over 32767 without using window scaling. Some +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3480,6 +3480,7 @@ void __init tcp_init(void) + unsigned long limit; + unsigned int i; + ++ BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); + BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > + FIELD_SIZEOF(struct sk_buff, cb)); + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -1329,7 +1329,7 @@ static bool tcp_shifted_skb(struct sock + TCP_SKB_CB(skb)->seq += shifted; + + tcp_skb_pcount_add(prev, pcount); +- BUG_ON(tcp_skb_pcount(skb) < pcount); ++ WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); + tcp_skb_pcount_add(skb, -pcount); + + /* When we're adding to gso_segs == 1, gso_size will be zero, +@@ -1396,6 +1396,21 @@ static int skb_can_shift(const struct sk + return !skb_headlen(skb) && skb_is_nonlinear(skb); + } + ++int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, ++ int pcount, int shiftlen) ++{ ++ /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) ++ * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need ++ * to make sure not storing more than 65535 * 8 bytes per skb, ++ * even if current MSS is bigger. ++ */ ++ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) ++ return 0; ++ if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) ++ return 0; ++ return skb_shift(to, from, shiftlen); ++} ++ + /* Try collapsing SACK blocks spanning across multiple skbs to a single + * skb. + */ +@@ -1407,6 +1422,7 @@ static struct sk_buff *tcp_shift_skb_dat + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *prev; + int mss; ++ int next_pcount; + int pcount = 0; + int len; + int in_sack; +@@ -1504,7 +1520,7 @@ static struct sk_buff *tcp_shift_skb_dat + if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) + goto fallback; + +- if (!skb_shift(prev, skb, len)) ++ if (!tcp_skb_shift(prev, skb, pcount, len)) + goto fallback; + if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) + goto out; +@@ -1523,11 +1539,11 @@ static struct sk_buff *tcp_shift_skb_dat + goto out; + + len = skb->len; +- if (skb_shift(prev, skb, len)) { +- pcount += tcp_skb_pcount(skb); +- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); ++ next_pcount = tcp_skb_pcount(skb); ++ if (tcp_skb_shift(prev, skb, next_pcount, len)) { ++ pcount += next_pcount; ++ tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0); + } +- + out: + state->fack_count += pcount; + return prev; +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1442,8 +1442,8 @@ static inline int __tcp_mtu_to_mss(struc + mss_now -= icsk->icsk_ext_hdr_len; + + /* Then reserve room for full set of TCP options and 8 bytes of data */ +- if (mss_now < 48) +- mss_now = 48; ++ if (mss_now < TCP_MIN_SND_MSS) ++ mss_now = TCP_MIN_SND_MSS; + return mss_now; + } + +@@ -2724,7 +2724,7 @@ static bool tcp_collapse_retrans(struct + if (next_skb_size <= skb_availroom(skb)) + skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), + next_skb_size); +- else if (!skb_shift(skb, next_skb, next_skb_size)) ++ else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size)) + return false; + } + tcp_highest_sack_replace(sk, next_skb, skb); diff --git a/queue-4.14/tcp-reduce-tcp_fastretrans_alert-verbosity.patch b/queue-4.14/tcp-reduce-tcp_fastretrans_alert-verbosity.patch new file mode 100644 index 00000000000..97c62d939b2 --- /dev/null +++ b/queue-4.14/tcp-reduce-tcp_fastretrans_alert-verbosity.patch @@ -0,0 +1,34 @@ +From foo@baz Mon 17 Jun 2019 06:56:41 PM CEST +From: Eric Dumazet +Date: Thu, 5 Oct 2017 22:21:25 -0700 +Subject: tcp: reduce tcp_fastretrans_alert() verbosity + +From: Eric Dumazet + +commit 8ba6ddaaf86c4c6814774e4e4ef158b732bd9f9f upstream. + +With upcoming rb-tree implementation, the checks will trigger +more often, and this is expected. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Cc: Amit Shah +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2810,9 +2810,9 @@ static void tcp_fastretrans_alert(struct + bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && + (tcp_fackets_out(tp) > tp->reordering)); + +- if (WARN_ON(!tp->packets_out && tp->sacked_out)) ++ if (!tp->packets_out && tp->sacked_out) + tp->sacked_out = 0; +- if (WARN_ON(!tp->sacked_out && tp->fackets_out)) ++ if (!tp->sacked_out && tp->fackets_out) + tp->fackets_out = 0; + + /* Now state machine starts. diff --git a/queue-4.14/tcp-tcp_fragment-should-apply-sane-memory-limits.patch b/queue-4.14/tcp-tcp_fragment-should-apply-sane-memory-limits.patch new file mode 100644 index 00000000000..b6b41e2f876 --- /dev/null +++ b/queue-4.14/tcp-tcp_fragment-should-apply-sane-memory-limits.patch @@ -0,0 +1,75 @@ +From foo@baz Mon 17 Jun 2019 06:56:41 PM CEST +From: Eric Dumazet +Date: Sat, 15 Jun 2019 17:40:56 -0700 +Subject: tcp: tcp_fragment() should apply sane memory limits + +From: Eric Dumazet + +commit f070ef2ac66716357066b683fb0baf55f8191a2e upstream. + +Jonathan Looney reported that a malicious peer can force a sender +to fragment its retransmit queue into tiny skbs, inflating memory +usage and/or overflow 32bit counters. + +TCP allows an application to queue up to sk_sndbuf bytes, +so we need to give some allowance for non malicious splitting +of retransmit queue. + +A new SNMP counter is added to monitor how many times TCP +did not allow to split an skb if the allowance was exceeded. + +Note that this counter might increase in the case applications +use SO_SNDBUF socket option to lower sk_sndbuf. + +CVE-2019-11478 : tcp_fragment, prevent fragmenting a packet when the + socket is already using more than half the allowed space + +Signed-off-by: Eric Dumazet +Reported-by: Jonathan Looney +Acked-by: Neal Cardwell +Acked-by: Yuchung Cheng +Reviewed-by: Tyler Hicks +Cc: Bruce Curtis +Cc: Jonathan Lemon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/snmp.h | 1 + + net/ipv4/proc.c | 1 + + net/ipv4/tcp_output.c | 5 +++++ + 3 files changed, 7 insertions(+) + +--- a/include/uapi/linux/snmp.h ++++ b/include/uapi/linux/snmp.h +@@ -278,6 +278,7 @@ enum + LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */ + LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */ + LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */ ++ LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */ + __LINUX_MIB_MAX + }; + +--- a/net/ipv4/proc.c ++++ b/net/ipv4/proc.c +@@ -299,6 +299,7 @@ static const struct snmp_mib snmp4_net_l + SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), + SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL), + SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS), ++ SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), + SNMP_MIB_SENTINEL + }; + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1274,6 +1274,11 @@ int tcp_fragment(struct sock *sk, struct + if (nsize < 0) + nsize = 0; + ++ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { ++ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); ++ return -ENOMEM; ++ } ++ + if (skb_unclone(skb, gfp)) + return -ENOMEM; +