]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/4.4.182/tcp-limit-payload-size-of-sacked-skbs.patch
Linux 4.4.182
[thirdparty/kernel/stable-queue.git] / releases / 4.4.182 / tcp-limit-payload-size-of-sacked-skbs.patch
CommitLineData
ad93f15b
GKH
1From foo@baz Mon 17 Jun 2019 06:58:02 PM CEST
2From: Eric Dumazet <edumazet@google.com>
3Date: Sat, 15 Jun 2019 17:31:03 -0700
4Subject: tcp: limit payload size of sacked skbs
5
6From: Eric Dumazet <edumazet@google.com>
7
8commit 3b4929f65b0d8249f19a50245cd88ed1a2f78cff upstream.
9
10Jonathan Looney reported that TCP can trigger the following crash
11in tcp_shifted_skb() :
12
13 BUG_ON(tcp_skb_pcount(skb) < pcount);
14
15This can happen if the remote peer has advertized the smallest
16MSS that linux TCP accepts : 48
17
18An skb can hold 17 fragments, and each fragment can hold 32KB
19on x86, or 64KB on PowerPC.
20
21This means that the 16bit witdh of TCP_SKB_CB(skb)->tcp_gso_segs
22can overflow.
23
24Note that tcp_sendmsg() builds skbs with less than 64KB
25of payload, so this problem needs SACK to be enabled.
26SACK blocks allow TCP to coalesce multiple skbs in the retransmit
27queue, thus filling the 17 fragments to maximal capacity.
28
29CVE-2019-11477 -- u16 overflow of TCP_SKB_CB(skb)->tcp_gso_segs
30
31Backport notes, provided by Joao Martins <joao.m.martins@oracle.com>
32
33v4.15 or since commit 737ff314563 ("tcp: use sequence distance to
34detect reordering") had switched from the packet-based FACK tracking and
35switched to sequence-based.
36
37v4.14 and older still have the old logic and hence on
38tcp_skb_shift_data() needs to retain its original logic and have
39@fack_count in sync. In other words, we keep the increment of pcount with
40tcp_skb_pcount(skb) to later used that to update fack_count. To make it
41more explicit we track the new skb that gets incremented to pcount in
42@next_pcount, and we get to avoid the constant invocation of
43tcp_skb_pcount(skb) all together.
44
45Fixes: 832d11c5cd07 ("tcp: Try to restore large SKBs while SACK processing")
46Signed-off-by: Eric Dumazet <edumazet@google.com>
47Reported-by: Jonathan Looney <jtl@netflix.com>
48Acked-by: Neal Cardwell <ncardwell@google.com>
49Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
50Cc: Yuchung Cheng <ycheng@google.com>
51Cc: Bruce Curtis <brucec@netflix.com>
52Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
53Signed-off-by: David S. Miller <davem@davemloft.net>
54Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
55---
56 include/linux/tcp.h | 3 +++
57 include/net/tcp.h | 2 ++
58 net/ipv4/tcp.c | 1 +
59 net/ipv4/tcp_input.c | 28 ++++++++++++++++++++++------
60 net/ipv4/tcp_output.c | 4 ++--
61 5 files changed, 30 insertions(+), 8 deletions(-)
62
63--- a/include/linux/tcp.h
64+++ b/include/linux/tcp.h
65@@ -419,4 +419,7 @@ static inline void tcp_saved_syn_free(st
66 tp->saved_syn = NULL;
67 }
68
69+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
70+ int shiftlen);
71+
72 #endif /* _LINUX_TCP_H */
73--- a/include/net/tcp.h
74+++ b/include/net/tcp.h
75@@ -54,6 +54,8 @@ void tcp_time_wait(struct sock *sk, int
76
77 #define MAX_TCP_HEADER (128 + MAX_HEADER)
78 #define MAX_TCP_OPTION_SPACE 40
79+#define TCP_MIN_SND_MSS 48
80+#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
81
82 /*
83 * Never offer a window over 32767 without using window scaling. Some
84--- a/net/ipv4/tcp.c
85+++ b/net/ipv4/tcp.c
86@@ -3144,6 +3144,7 @@ void __init tcp_init(void)
87 int max_rshare, max_wshare, cnt;
88 unsigned int i;
89
90+ BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
91 sock_skb_cb_check_size(sizeof(struct tcp_skb_cb));
92
93 percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
94--- a/net/ipv4/tcp_input.c
95+++ b/net/ipv4/tcp_input.c
96@@ -1275,7 +1275,7 @@ static bool tcp_shifted_skb(struct sock
97 TCP_SKB_CB(skb)->seq += shifted;
98
99 tcp_skb_pcount_add(prev, pcount);
100- BUG_ON(tcp_skb_pcount(skb) < pcount);
101+ WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
102 tcp_skb_pcount_add(skb, -pcount);
103
104 /* When we're adding to gso_segs == 1, gso_size will be zero,
105@@ -1337,6 +1337,21 @@ static int skb_can_shift(const struct sk
106 return !skb_headlen(skb) && skb_is_nonlinear(skb);
107 }
108
109+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
110+ int pcount, int shiftlen)
111+{
112+ /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
113+ * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
114+ * to make sure not storing more than 65535 * 8 bytes per skb,
115+ * even if current MSS is bigger.
116+ */
117+ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
118+ return 0;
119+ if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
120+ return 0;
121+ return skb_shift(to, from, shiftlen);
122+}
123+
124 /* Try collapsing SACK blocks spanning across multiple skbs to a single
125 * skb.
126 */
127@@ -1348,6 +1363,7 @@ static struct sk_buff *tcp_shift_skb_dat
128 struct tcp_sock *tp = tcp_sk(sk);
129 struct sk_buff *prev;
130 int mss;
131+ int next_pcount;
132 int pcount = 0;
133 int len;
134 int in_sack;
135@@ -1442,7 +1458,7 @@ static struct sk_buff *tcp_shift_skb_dat
136 if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
137 goto fallback;
138
139- if (!skb_shift(prev, skb, len))
140+ if (!tcp_skb_shift(prev, skb, pcount, len))
141 goto fallback;
142 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
143 goto out;
144@@ -1461,11 +1477,11 @@ static struct sk_buff *tcp_shift_skb_dat
145 goto out;
146
147 len = skb->len;
148- if (skb_shift(prev, skb, len)) {
149- pcount += tcp_skb_pcount(skb);
150- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
151+ next_pcount = tcp_skb_pcount(skb);
152+ if (tcp_skb_shift(prev, skb, next_pcount, len)) {
153+ pcount += next_pcount;
154+ tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0);
155 }
156-
157 out:
158 state->fack_count += pcount;
159 return prev;
160--- a/net/ipv4/tcp_output.c
161+++ b/net/ipv4/tcp_output.c
162@@ -1327,8 +1327,8 @@ static inline int __tcp_mtu_to_mss(struc
163 mss_now -= icsk->icsk_ext_hdr_len;
164
165 /* Then reserve room for full set of TCP options and 8 bytes of data */
166- if (mss_now < 48)
167- mss_now = 48;
168+ if (mss_now < TCP_MIN_SND_MSS)
169+ mss_now = TCP_MIN_SND_MSS;
170 return mss_now;
171 }
172