]>
Commit | Line | Data |
---|---|---|
52e8d254 GKH |
1 | From foo@baz Mon 17 Jun 2019 06:57:32 PM CEST |
2 | From: Eric Dumazet <edumazet@google.com> | |
3 | Date: Sat, 15 Jun 2019 17:31:03 -0700 | |
4 | Subject: tcp: limit payload size of sacked skbs | |
5 | ||
6 | From: Eric Dumazet <edumazet@google.com> | |
7 | ||
8 | commit 3b4929f65b0d8249f19a50245cd88ed1a2f78cff upstream. | |
9 | ||
10 | Jonathan Looney reported that TCP can trigger the following crash | |
11 | in tcp_shifted_skb() : | |
12 | ||
13 | BUG_ON(tcp_skb_pcount(skb) < pcount); | |
14 | ||
15 | This can happen if the remote peer has advertized the smallest | |
16 | MSS that linux TCP accepts : 48 | |
17 | ||
18 | An skb can hold 17 fragments, and each fragment can hold 32KB | |
19 | on x86, or 64KB on PowerPC. | |
20 | ||
21 | This means that the 16bit witdh of TCP_SKB_CB(skb)->tcp_gso_segs | |
22 | can overflow. | |
23 | ||
24 | Note that tcp_sendmsg() builds skbs with less than 64KB | |
25 | of payload, so this problem needs SACK to be enabled. | |
26 | SACK blocks allow TCP to coalesce multiple skbs in the retransmit | |
27 | queue, thus filling the 17 fragments to maximal capacity. | |
28 | ||
29 | CVE-2019-11477 -- u16 overflow of TCP_SKB_CB(skb)->tcp_gso_segs | |
30 | ||
31 | Backport notes, provided by Joao Martins <joao.m.martins@oracle.com> | |
32 | ||
33 | v4.15 or since commit 737ff314563 ("tcp: use sequence distance to | |
34 | detect reordering") had switched from the packet-based FACK tracking and | |
35 | switched to sequence-based. | |
36 | ||
37 | v4.14 and older still have the old logic and hence on | |
38 | tcp_skb_shift_data() needs to retain its original logic and have | |
39 | @fack_count in sync. In other words, we keep the increment of pcount with | |
40 | tcp_skb_pcount(skb) to later used that to update fack_count. To make it | |
41 | more explicit we track the new skb that gets incremented to pcount in | |
42 | @next_pcount, and we get to avoid the constant invocation of | |
43 | tcp_skb_pcount(skb) all together. | |
44 | ||
45 | Fixes: 832d11c5cd07 ("tcp: Try to restore large SKBs while SACK processing") | |
46 | Signed-off-by: Eric Dumazet <edumazet@google.com> | |
47 | Reported-by: Jonathan Looney <jtl@netflix.com> | |
48 | Acked-by: Neal Cardwell <ncardwell@google.com> | |
49 | Reviewed-by: Tyler Hicks <tyhicks@canonical.com> | |
50 | Cc: Yuchung Cheng <ycheng@google.com> | |
51 | Cc: Bruce Curtis <brucec@netflix.com> | |
52 | Cc: Jonathan Lemon <jonathan.lemon@gmail.com> | |
53 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
54 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
55 | --- | |
56 | include/linux/tcp.h | 3 +++ | |
57 | include/net/tcp.h | 2 ++ | |
58 | net/ipv4/tcp.c | 1 + | |
59 | net/ipv4/tcp_input.c | 28 ++++++++++++++++++++++------ | |
60 | net/ipv4/tcp_output.c | 4 ++-- | |
61 | 5 files changed, 30 insertions(+), 8 deletions(-) | |
62 | ||
63 | --- a/include/linux/tcp.h | |
64 | +++ b/include/linux/tcp.h | |
65 | @@ -433,4 +433,7 @@ static inline void tcp_saved_syn_free(st | |
66 | tp->saved_syn = NULL; | |
67 | } | |
68 | ||
69 | +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, | |
70 | + int shiftlen); | |
71 | + | |
72 | #endif /* _LINUX_TCP_H */ | |
73 | --- a/include/net/tcp.h | |
74 | +++ b/include/net/tcp.h | |
75 | @@ -53,6 +53,8 @@ void tcp_time_wait(struct sock *sk, int | |
76 | ||
77 | #define MAX_TCP_HEADER (128 + MAX_HEADER) | |
78 | #define MAX_TCP_OPTION_SPACE 40 | |
79 | +#define TCP_MIN_SND_MSS 48 | |
80 | +#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) | |
81 | ||
82 | /* | |
83 | * Never offer a window over 32767 without using window scaling. Some | |
84 | --- a/net/ipv4/tcp.c | |
85 | +++ b/net/ipv4/tcp.c | |
86 | @@ -3307,6 +3307,7 @@ void __init tcp_init(void) | |
87 | unsigned long limit; | |
88 | unsigned int i; | |
89 | ||
90 | + BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); | |
91 | BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > | |
92 | FIELD_SIZEOF(struct sk_buff, cb)); | |
93 | ||
94 | --- a/net/ipv4/tcp_input.c | |
95 | +++ b/net/ipv4/tcp_input.c | |
96 | @@ -1320,7 +1320,7 @@ static bool tcp_shifted_skb(struct sock | |
97 | TCP_SKB_CB(skb)->seq += shifted; | |
98 | ||
99 | tcp_skb_pcount_add(prev, pcount); | |
100 | - BUG_ON(tcp_skb_pcount(skb) < pcount); | |
101 | + WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); | |
102 | tcp_skb_pcount_add(skb, -pcount); | |
103 | ||
104 | /* When we're adding to gso_segs == 1, gso_size will be zero, | |
105 | @@ -1387,6 +1387,21 @@ static int skb_can_shift(const struct sk | |
106 | return !skb_headlen(skb) && skb_is_nonlinear(skb); | |
107 | } | |
108 | ||
109 | +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, | |
110 | + int pcount, int shiftlen) | |
111 | +{ | |
112 | + /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) | |
113 | + * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need | |
114 | + * to make sure not storing more than 65535 * 8 bytes per skb, | |
115 | + * even if current MSS is bigger. | |
116 | + */ | |
117 | + if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) | |
118 | + return 0; | |
119 | + if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) | |
120 | + return 0; | |
121 | + return skb_shift(to, from, shiftlen); | |
122 | +} | |
123 | + | |
124 | /* Try collapsing SACK blocks spanning across multiple skbs to a single | |
125 | * skb. | |
126 | */ | |
127 | @@ -1398,6 +1413,7 @@ static struct sk_buff *tcp_shift_skb_dat | |
128 | struct tcp_sock *tp = tcp_sk(sk); | |
129 | struct sk_buff *prev; | |
130 | int mss; | |
131 | + int next_pcount; | |
132 | int pcount = 0; | |
133 | int len; | |
134 | int in_sack; | |
135 | @@ -1495,7 +1511,7 @@ static struct sk_buff *tcp_shift_skb_dat | |
136 | if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) | |
137 | goto fallback; | |
138 | ||
139 | - if (!skb_shift(prev, skb, len)) | |
140 | + if (!tcp_skb_shift(prev, skb, pcount, len)) | |
141 | goto fallback; | |
142 | if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) | |
143 | goto out; | |
144 | @@ -1514,11 +1530,11 @@ static struct sk_buff *tcp_shift_skb_dat | |
145 | goto out; | |
146 | ||
147 | len = skb->len; | |
148 | - if (skb_shift(prev, skb, len)) { | |
149 | - pcount += tcp_skb_pcount(skb); | |
150 | - tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); | |
151 | + next_pcount = tcp_skb_pcount(skb); | |
152 | + if (tcp_skb_shift(prev, skb, next_pcount, len)) { | |
153 | + pcount += next_pcount; | |
154 | + tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0); | |
155 | } | |
156 | - | |
157 | out: | |
158 | state->fack_count += pcount; | |
159 | return prev; | |
160 | --- a/net/ipv4/tcp_output.c | |
161 | +++ b/net/ipv4/tcp_output.c | |
162 | @@ -1355,8 +1355,8 @@ static inline int __tcp_mtu_to_mss(struc | |
163 | mss_now -= icsk->icsk_ext_hdr_len; | |
164 | ||
165 | /* Then reserve room for full set of TCP options and 8 bytes of data */ | |
166 | - if (mss_now < 48) | |
167 | - mss_now = 48; | |
168 | + if (mss_now < TCP_MIN_SND_MSS) | |
169 | + mss_now = TCP_MIN_SND_MSS; | |
170 | return mss_now; | |
171 | } | |
172 |