]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/4.14.114/net-ip6-defrag-use-rbtrees-for-ipv6-defrag.patch
Linux 4.14.114
[thirdparty/kernel/stable-queue.git] / releases / 4.14.114 / net-ip6-defrag-use-rbtrees-for-ipv6-defrag.patch
CommitLineData
82b2f3ba
SL
1From 3cce1561230cd3939fe4662c2a899a322b51f76a Mon Sep 17 00:00:00 2001
2From: Peter Oskolkov <posk@google.com>
3Date: Tue, 23 Apr 2019 10:48:24 -0700
4Subject: net: IP6 defrag: use rbtrees for IPv6 defrag
5
6[ Upstream commit d4289fcc9b16b89619ee1c54f829e05e56de8b9a ]
7
8Currently, IPv6 defragmentation code drops non-last fragments that
9are smaller than 1280 bytes: see
10commit 0ed4229b08c1 ("ipv6: defrag: drop non-last frags smaller than min mtu")
11
12This behavior is not specified in IPv6 RFCs and appears to break
13compatibility with some IPv6 implemenations, as reported here:
14https://www.spinics.net/lists/netdev/msg543846.html
15
16This patch re-uses common IP defragmentation queueing and reassembly
17code in IPv6, removing the 1280 byte restriction.
18
19v2: change handling of overlaps to match that of upstream.
20
21Signed-off-by: Peter Oskolkov <posk@google.com>
22Reported-by: Tom Herbert <tom@herbertland.com>
23Cc: Eric Dumazet <edumazet@google.com>
24Cc: Florian Westphal <fw@strlen.de>
25Signed-off-by: David S. Miller <davem@davemloft.net>
26Signed-off-by: Sasha Levin <sashal@kernel.org>
27---
28 include/net/ipv6_frag.h | 11 +-
29 net/ipv6/reassembly.c | 249 +++++++++++-----------------------------
30 2 files changed, 77 insertions(+), 183 deletions(-)
31
32diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
33index 6ced1e6899b6..28aa9b30aece 100644
34--- a/include/net/ipv6_frag.h
35+++ b/include/net/ipv6_frag.h
36@@ -82,8 +82,15 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
37 __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
38
39 /* Don't send error if the first segment did not arrive. */
40- head = fq->q.fragments;
41- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
42+ if (!(fq->q.flags & INET_FRAG_FIRST_IN))
43+ goto out;
44+
45+ /* sk_buff::dev and sk_buff::rbnode are unionized. So we
46+ * pull the head out of the tree in order to be able to
47+ * deal with head->dev.
48+ */
49+ head = inet_frag_pull_head(&fq->q);
50+ if (!head)
51 goto out;
52
53 head->dev = dev;
54diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
55index e5ab3b7813d6..fe797b29ca89 100644
56--- a/net/ipv6/reassembly.c
57+++ b/net/ipv6/reassembly.c
58@@ -62,13 +62,6 @@
59
60 static const char ip6_frag_cache_name[] = "ip6-frags";
61
62-struct ip6frag_skb_cb {
63- struct inet6_skb_parm h;
64- int offset;
65-};
66-
67-#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb))
68-
69 static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
70 {
71 return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
72@@ -76,8 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
73
74 static struct inet_frags ip6_frags;
75
76-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
77- struct net_device *dev);
78+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
79+ struct sk_buff *prev_tail, struct net_device *dev);
80
81 static void ip6_frag_expire(struct timer_list *t)
82 {
83@@ -118,21 +111,26 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
84 struct frag_hdr *fhdr, int nhoff,
85 u32 *prob_offset)
86 {
87- struct sk_buff *prev, *next;
88- struct net_device *dev;
89- int offset, end, fragsize;
90 struct net *net = dev_net(skb_dst(skb)->dev);
91+ int offset, end, fragsize;
92+ struct sk_buff *prev_tail;
93+ struct net_device *dev;
94+ int err = -ENOENT;
95 u8 ecn;
96
97 if (fq->q.flags & INET_FRAG_COMPLETE)
98 goto err;
99
100+ err = -EINVAL;
101 offset = ntohs(fhdr->frag_off) & ~0x7;
102 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
103 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
104
105 if ((unsigned int)end > IPV6_MAXPLEN) {
106 *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
107+ /* note that if prob_offset is set, the skb is freed elsewhere,
108+ * we do not free it here.
109+ */
110 return -1;
111 }
112
113@@ -152,7 +150,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
114 */
115 if (end < fq->q.len ||
116 ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
117- goto err;
118+ goto discard_fq;
119 fq->q.flags |= INET_FRAG_LAST_IN;
120 fq->q.len = end;
121 } else {
122@@ -169,70 +167,36 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
123 if (end > fq->q.len) {
124 /* Some bits beyond end -> corruption. */
125 if (fq->q.flags & INET_FRAG_LAST_IN)
126- goto err;
127+ goto discard_fq;
128 fq->q.len = end;
129 }
130 }
131
132 if (end == offset)
133- goto err;
134+ goto discard_fq;
135
136+ err = -ENOMEM;
137 /* Point into the IP datagram 'data' part. */
138 if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
139- goto err;
140-
141- if (pskb_trim_rcsum(skb, end - offset))
142- goto err;
143-
144- /* Find out which fragments are in front and at the back of us
145- * in the chain of fragments so far. We must know where to put
146- * this fragment, right?
147- */
148- prev = fq->q.fragments_tail;
149- if (!prev || FRAG6_CB(prev)->offset < offset) {
150- next = NULL;
151- goto found;
152- }
153- prev = NULL;
154- for (next = fq->q.fragments; next != NULL; next = next->next) {
155- if (FRAG6_CB(next)->offset >= offset)
156- break; /* bingo! */
157- prev = next;
158- }
159-
160-found:
161- /* RFC5722, Section 4, amended by Errata ID : 3089
162- * When reassembling an IPv6 datagram, if
163- * one or more its constituent fragments is determined to be an
164- * overlapping fragment, the entire datagram (and any constituent
165- * fragments) MUST be silently discarded.
166- */
167-
168- /* Check for overlap with preceding fragment. */
169- if (prev &&
170- (FRAG6_CB(prev)->offset + prev->len) > offset)
171 goto discard_fq;
172
173- /* Look for overlap with succeeding segment. */
174- if (next && FRAG6_CB(next)->offset < end)
175+ err = pskb_trim_rcsum(skb, end - offset);
176+ if (err)
177 goto discard_fq;
178
179- FRAG6_CB(skb)->offset = offset;
180+ /* Note : skb->rbnode and skb->dev share the same location. */
181+ dev = skb->dev;
182+ /* Makes sure compiler wont do silly aliasing games */
183+ barrier();
184
185- /* Insert this fragment in the chain of fragments. */
186- skb->next = next;
187- if (!next)
188- fq->q.fragments_tail = skb;
189- if (prev)
190- prev->next = skb;
191- else
192- fq->q.fragments = skb;
193+ prev_tail = fq->q.fragments_tail;
194+ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
195+ if (err)
196+ goto insert_error;
197
198- dev = skb->dev;
199- if (dev) {
200+ if (dev)
201 fq->iif = dev->ifindex;
202- skb->dev = NULL;
203- }
204+
205 fq->q.stamp = skb->tstamp;
206 fq->q.meat += skb->len;
207 fq->ecn |= ecn;
208@@ -252,44 +216,48 @@ found:
209
210 if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
211 fq->q.meat == fq->q.len) {
212- int res;
213 unsigned long orefdst = skb->_skb_refdst;
214
215 skb->_skb_refdst = 0UL;
216- res = ip6_frag_reasm(fq, prev, dev);
217+ err = ip6_frag_reasm(fq, skb, prev_tail, dev);
218 skb->_skb_refdst = orefdst;
219- return res;
220+ return err;
221 }
222
223 skb_dst_drop(skb);
224- return -1;
225+ return -EINPROGRESS;
226
227+insert_error:
228+ if (err == IPFRAG_DUP) {
229+ kfree_skb(skb);
230+ return -EINVAL;
231+ }
232+ err = -EINVAL;
233+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
234+ IPSTATS_MIB_REASM_OVERLAPS);
235 discard_fq:
236 inet_frag_kill(&fq->q);
237-err:
238 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
239 IPSTATS_MIB_REASMFAILS);
240+err:
241 kfree_skb(skb);
242- return -1;
243+ return err;
244 }
245
246 /*
247 * Check if this packet is complete.
248- * Returns NULL on failure by any reason, and pointer
249- * to current nexthdr field in reassembled frame.
250 *
251 * It is called with locked fq, and caller must check that
252 * queue is eligible for reassembly i.e. it is not COMPLETE,
253 * the last and the first frames arrived and all the bits are here.
254 */
255-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
256- struct net_device *dev)
257+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
258+ struct sk_buff *prev_tail, struct net_device *dev)
259 {
260 struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
261- struct sk_buff *fp, *head = fq->q.fragments;
262- int payload_len, delta;
263 unsigned int nhoff;
264- int sum_truesize;
265+ void *reasm_data;
266+ int payload_len;
267 u8 ecn;
268
269 inet_frag_kill(&fq->q);
270@@ -298,120 +266,40 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
271 if (unlikely(ecn == 0xff))
272 goto out_fail;
273
274- /* Make the one we just received the head. */
275- if (prev) {
276- head = prev->next;
277- fp = skb_clone(head, GFP_ATOMIC);
278-
279- if (!fp)
280- goto out_oom;
281-
282- fp->next = head->next;
283- if (!fp->next)
284- fq->q.fragments_tail = fp;
285- prev->next = fp;
286-
287- skb_morph(head, fq->q.fragments);
288- head->next = fq->q.fragments->next;
289-
290- consume_skb(fq->q.fragments);
291- fq->q.fragments = head;
292- }
293-
294- WARN_ON(head == NULL);
295- WARN_ON(FRAG6_CB(head)->offset != 0);
296+ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
297+ if (!reasm_data)
298+ goto out_oom;
299
300- /* Unfragmented part is taken from the first segment. */
301- payload_len = ((head->data - skb_network_header(head)) -
302+ payload_len = ((skb->data - skb_network_header(skb)) -
303 sizeof(struct ipv6hdr) + fq->q.len -
304 sizeof(struct frag_hdr));
305 if (payload_len > IPV6_MAXPLEN)
306 goto out_oversize;
307
308- delta = - head->truesize;
309-
310- /* Head of list must not be cloned. */
311- if (skb_unclone(head, GFP_ATOMIC))
312- goto out_oom;
313-
314- delta += head->truesize;
315- if (delta)
316- add_frag_mem_limit(fq->q.net, delta);
317-
318- /* If the first fragment is fragmented itself, we split
319- * it to two chunks: the first with data and paged part
320- * and the second, holding only fragments. */
321- if (skb_has_frag_list(head)) {
322- struct sk_buff *clone;
323- int i, plen = 0;
324-
325- clone = alloc_skb(0, GFP_ATOMIC);
326- if (!clone)
327- goto out_oom;
328- clone->next = head->next;
329- head->next = clone;
330- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
331- skb_frag_list_init(head);
332- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
333- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
334- clone->len = clone->data_len = head->data_len - plen;
335- head->data_len -= clone->len;
336- head->len -= clone->len;
337- clone->csum = 0;
338- clone->ip_summed = head->ip_summed;
339- add_frag_mem_limit(fq->q.net, clone->truesize);
340- }
341-
342 /* We have to remove fragment header from datagram and to relocate
343 * header in order to calculate ICV correctly. */
344 nhoff = fq->nhoffset;
345- skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
346- memmove(head->head + sizeof(struct frag_hdr), head->head,
347- (head->data - head->head) - sizeof(struct frag_hdr));
348- if (skb_mac_header_was_set(head))
349- head->mac_header += sizeof(struct frag_hdr);
350- head->network_header += sizeof(struct frag_hdr);
351-
352- skb_reset_transport_header(head);
353- skb_push(head, head->data - skb_network_header(head));
354-
355- sum_truesize = head->truesize;
356- for (fp = head->next; fp;) {
357- bool headstolen;
358- int delta;
359- struct sk_buff *next = fp->next;
360-
361- sum_truesize += fp->truesize;
362- if (head->ip_summed != fp->ip_summed)
363- head->ip_summed = CHECKSUM_NONE;
364- else if (head->ip_summed == CHECKSUM_COMPLETE)
365- head->csum = csum_add(head->csum, fp->csum);
366-
367- if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
368- kfree_skb_partial(fp, headstolen);
369- } else {
370- if (!skb_shinfo(head)->frag_list)
371- skb_shinfo(head)->frag_list = fp;
372- head->data_len += fp->len;
373- head->len += fp->len;
374- head->truesize += fp->truesize;
375- }
376- fp = next;
377- }
378- sub_frag_mem_limit(fq->q.net, sum_truesize);
379+ skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
380+ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
381+ (skb->data - skb->head) - sizeof(struct frag_hdr));
382+ if (skb_mac_header_was_set(skb))
383+ skb->mac_header += sizeof(struct frag_hdr);
384+ skb->network_header += sizeof(struct frag_hdr);
385+
386+ skb_reset_transport_header(skb);
387+
388+ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
389
390- head->next = NULL;
391- head->dev = dev;
392- head->tstamp = fq->q.stamp;
393- ipv6_hdr(head)->payload_len = htons(payload_len);
394- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
395- IP6CB(head)->nhoff = nhoff;
396- IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
397- IP6CB(head)->frag_max_size = fq->q.max_size;
398+ skb->dev = dev;
399+ ipv6_hdr(skb)->payload_len = htons(payload_len);
400+ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
401+ IP6CB(skb)->nhoff = nhoff;
402+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
403+ IP6CB(skb)->frag_max_size = fq->q.max_size;
404
405 /* Yes, and fold redundant checksum back. 8) */
406- skb_postpush_rcsum(head, skb_network_header(head),
407- skb_network_header_len(head));
408+ skb_postpush_rcsum(skb, skb_network_header(skb),
409+ skb_network_header_len(skb));
410
411 rcu_read_lock();
412 __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
413@@ -419,6 +307,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
414 fq->q.fragments = NULL;
415 fq->q.rb_fragments = RB_ROOT;
416 fq->q.fragments_tail = NULL;
417+ fq->q.last_run_head = NULL;
418 return 1;
419
420 out_oversize:
421@@ -430,6 +319,7 @@ out_fail:
422 rcu_read_lock();
423 __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
424 rcu_read_unlock();
425+ inet_frag_kill(&fq->q);
426 return -1;
427 }
428
429@@ -468,10 +358,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
430 return 1;
431 }
432
433- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
434- fhdr->frag_off & htons(IP6_MF))
435- goto fail_hdr;
436-
437 iif = skb->dev ? skb->dev->ifindex : 0;
438 fq = fq_find(net, fhdr->identification, hdr, iif);
439 if (fq) {
440@@ -489,6 +375,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
441 if (prob_offset) {
442 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
443 IPSTATS_MIB_INHDRERRORS);
444+ /* icmpv6_param_prob() calls kfree_skb(skb) */
445 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
446 }
447 return ret;
448--
4492.19.1
450