]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/4.14.114/net-ip-defrag-encapsulate-rbtree-defrag-code-into-ca.patch
Linux 4.14.114
[thirdparty/kernel/stable-queue.git] / releases / 4.14.114 / net-ip-defrag-encapsulate-rbtree-defrag-code-into-ca.patch
CommitLineData
82b2f3ba
SL
1From 95847c69d0f559fd6701bcd60e8e8b889fee2499 Mon Sep 17 00:00:00 2001
2From: Peter Oskolkov <posk@google.com>
3Date: Tue, 23 Apr 2019 10:48:22 -0700
4Subject: net: IP defrag: encapsulate rbtree defrag code into callable
5 functions
6
7[ Upstream commit c23f35d19db3b36ffb9e04b08f1d91565d15f84f ]
8
9This is a refactoring patch: without changing runtime behavior,
10it moves rbtree-related code from IPv4-specific files/functions
11into .h/.c defrag files shared with IPv6 defragmentation code.
12
13v2: make handling of overlapping packets match upstream.
14
15Signed-off-by: Peter Oskolkov <posk@google.com>
16Cc: Eric Dumazet <edumazet@google.com>
17Cc: Florian Westphal <fw@strlen.de>
18Cc: Tom Herbert <tom@herbertland.com>
19Signed-off-by: David S. Miller <davem@davemloft.net>
20Signed-off-by: Sasha Levin <sashal@kernel.org>
21---
22 include/net/inet_frag.h | 16 ++-
23 net/ipv4/inet_fragment.c | 293 +++++++++++++++++++++++++++++++++++++
24 net/ipv4/ip_fragment.c | 302 +++++----------------------------------
25 3 files changed, 342 insertions(+), 269 deletions(-)
26
27diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
28index 335cf7851f12..008f64823c41 100644
29--- a/include/net/inet_frag.h
30+++ b/include/net/inet_frag.h
31@@ -77,8 +77,8 @@ struct inet_frag_queue {
32 struct timer_list timer;
33 spinlock_t lock;
34 refcount_t refcnt;
35- struct sk_buff *fragments; /* Used in IPv6. */
36- struct rb_root rb_fragments; /* Used in IPv4. */
37+ struct sk_buff *fragments; /* used in 6lopwpan IPv6. */
38+ struct rb_root rb_fragments; /* Used in IPv4/IPv6. */
39 struct sk_buff *fragments_tail;
40 struct sk_buff *last_run_head;
41 ktime_t stamp;
42@@ -153,4 +153,16 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
43
44 extern const u8 ip_frag_ecn_table[16];
45
46+/* Return values of inet_frag_queue_insert() */
47+#define IPFRAG_OK 0
48+#define IPFRAG_DUP 1
49+#define IPFRAG_OVERLAP 2
50+int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
51+ int offset, int end);
52+void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
53+ struct sk_buff *parent);
54+void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
55+ void *reasm_data);
56+struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q);
57+
58 #endif
59diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
60index 6ffee9d2b0e5..481cded81b2d 100644
61--- a/net/ipv4/inet_fragment.c
62+++ b/net/ipv4/inet_fragment.c
63@@ -24,6 +24,62 @@
64 #include <net/sock.h>
65 #include <net/inet_frag.h>
66 #include <net/inet_ecn.h>
67+#include <net/ip.h>
68+#include <net/ipv6.h>
69+
70+/* Use skb->cb to track consecutive/adjacent fragments coming at
71+ * the end of the queue. Nodes in the rb-tree queue will
72+ * contain "runs" of one or more adjacent fragments.
73+ *
74+ * Invariants:
75+ * - next_frag is NULL at the tail of a "run";
76+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
77+ */
78+struct ipfrag_skb_cb {
79+ union {
80+ struct inet_skb_parm h4;
81+ struct inet6_skb_parm h6;
82+ };
83+ struct sk_buff *next_frag;
84+ int frag_run_len;
85+};
86+
87+#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
88+
89+static void fragcb_clear(struct sk_buff *skb)
90+{
91+ RB_CLEAR_NODE(&skb->rbnode);
92+ FRAG_CB(skb)->next_frag = NULL;
93+ FRAG_CB(skb)->frag_run_len = skb->len;
94+}
95+
96+/* Append skb to the last "run". */
97+static void fragrun_append_to_last(struct inet_frag_queue *q,
98+ struct sk_buff *skb)
99+{
100+ fragcb_clear(skb);
101+
102+ FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
103+ FRAG_CB(q->fragments_tail)->next_frag = skb;
104+ q->fragments_tail = skb;
105+}
106+
107+/* Create a new "run" with the skb. */
108+static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
109+{
110+ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
111+ fragcb_clear(skb);
112+
113+ if (q->last_run_head)
114+ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
115+ &q->last_run_head->rbnode.rb_right);
116+ else
117+ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
118+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
119+
120+ q->fragments_tail = skb;
121+ q->last_run_head = skb;
122+}
123
124 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
125 * Value : 0xff if frame should be dropped.
126@@ -122,6 +178,28 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
127 kmem_cache_free(f->frags_cachep, q);
128 }
129
130+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
131+{
132+ struct rb_node *p = rb_first(root);
133+ unsigned int sum = 0;
134+
135+ while (p) {
136+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
137+
138+ p = rb_next(p);
139+ rb_erase(&skb->rbnode, root);
140+ while (skb) {
141+ struct sk_buff *next = FRAG_CB(skb)->next_frag;
142+
143+ sum += skb->truesize;
144+ kfree_skb(skb);
145+ skb = next;
146+ }
147+ }
148+ return sum;
149+}
150+EXPORT_SYMBOL(inet_frag_rbtree_purge);
151+
152 void inet_frag_destroy(struct inet_frag_queue *q)
153 {
154 struct sk_buff *fp;
155@@ -224,3 +302,218 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
156 return fq;
157 }
158 EXPORT_SYMBOL(inet_frag_find);
159+
160+int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
161+ int offset, int end)
162+{
163+ struct sk_buff *last = q->fragments_tail;
164+
165+ /* RFC5722, Section 4, amended by Errata ID : 3089
166+ * When reassembling an IPv6 datagram, if
167+ * one or more its constituent fragments is determined to be an
168+ * overlapping fragment, the entire datagram (and any constituent
169+ * fragments) MUST be silently discarded.
170+ *
171+ * Duplicates, however, should be ignored (i.e. skb dropped, but the
172+ * queue/fragments kept for later reassembly).
173+ */
174+ if (!last)
175+ fragrun_create(q, skb); /* First fragment. */
176+ else if (last->ip_defrag_offset + last->len < end) {
177+ /* This is the common case: skb goes to the end. */
178+ /* Detect and discard overlaps. */
179+ if (offset < last->ip_defrag_offset + last->len)
180+ return IPFRAG_OVERLAP;
181+ if (offset == last->ip_defrag_offset + last->len)
182+ fragrun_append_to_last(q, skb);
183+ else
184+ fragrun_create(q, skb);
185+ } else {
186+ /* Binary search. Note that skb can become the first fragment,
187+ * but not the last (covered above).
188+ */
189+ struct rb_node **rbn, *parent;
190+
191+ rbn = &q->rb_fragments.rb_node;
192+ do {
193+ struct sk_buff *curr;
194+ int curr_run_end;
195+
196+ parent = *rbn;
197+ curr = rb_to_skb(parent);
198+ curr_run_end = curr->ip_defrag_offset +
199+ FRAG_CB(curr)->frag_run_len;
200+ if (end <= curr->ip_defrag_offset)
201+ rbn = &parent->rb_left;
202+ else if (offset >= curr_run_end)
203+ rbn = &parent->rb_right;
204+ else if (offset >= curr->ip_defrag_offset &&
205+ end <= curr_run_end)
206+ return IPFRAG_DUP;
207+ else
208+ return IPFRAG_OVERLAP;
209+ } while (*rbn);
210+ /* Here we have parent properly set, and rbn pointing to
211+ * one of its NULL left/right children. Insert skb.
212+ */
213+ fragcb_clear(skb);
214+ rb_link_node(&skb->rbnode, parent, rbn);
215+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
216+ }
217+
218+ skb->ip_defrag_offset = offset;
219+
220+ return IPFRAG_OK;
221+}
222+EXPORT_SYMBOL(inet_frag_queue_insert);
223+
224+void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
225+ struct sk_buff *parent)
226+{
227+ struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
228+ struct sk_buff **nextp;
229+ int delta;
230+
231+ if (head != skb) {
232+ fp = skb_clone(skb, GFP_ATOMIC);
233+ if (!fp)
234+ return NULL;
235+ FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
236+ if (RB_EMPTY_NODE(&skb->rbnode))
237+ FRAG_CB(parent)->next_frag = fp;
238+ else
239+ rb_replace_node(&skb->rbnode, &fp->rbnode,
240+ &q->rb_fragments);
241+ if (q->fragments_tail == skb)
242+ q->fragments_tail = fp;
243+ skb_morph(skb, head);
244+ FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
245+ rb_replace_node(&head->rbnode, &skb->rbnode,
246+ &q->rb_fragments);
247+ consume_skb(head);
248+ head = skb;
249+ }
250+ WARN_ON(head->ip_defrag_offset != 0);
251+
252+ delta = -head->truesize;
253+
254+ /* Head of list must not be cloned. */
255+ if (skb_unclone(head, GFP_ATOMIC))
256+ return NULL;
257+
258+ delta += head->truesize;
259+ if (delta)
260+ add_frag_mem_limit(q->net, delta);
261+
262+ /* If the first fragment is fragmented itself, we split
263+ * it to two chunks: the first with data and paged part
264+ * and the second, holding only fragments.
265+ */
266+ if (skb_has_frag_list(head)) {
267+ struct sk_buff *clone;
268+ int i, plen = 0;
269+
270+ clone = alloc_skb(0, GFP_ATOMIC);
271+ if (!clone)
272+ return NULL;
273+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
274+ skb_frag_list_init(head);
275+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
276+ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
277+ clone->data_len = head->data_len - plen;
278+ clone->len = clone->data_len;
279+ head->truesize += clone->truesize;
280+ clone->csum = 0;
281+ clone->ip_summed = head->ip_summed;
282+ add_frag_mem_limit(q->net, clone->truesize);
283+ skb_shinfo(head)->frag_list = clone;
284+ nextp = &clone->next;
285+ } else {
286+ nextp = &skb_shinfo(head)->frag_list;
287+ }
288+
289+ return nextp;
290+}
291+EXPORT_SYMBOL(inet_frag_reasm_prepare);
292+
293+void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
294+ void *reasm_data)
295+{
296+ struct sk_buff **nextp = (struct sk_buff **)reasm_data;
297+ struct rb_node *rbn;
298+ struct sk_buff *fp;
299+
300+ skb_push(head, head->data - skb_network_header(head));
301+
302+ /* Traverse the tree in order, to build frag_list. */
303+ fp = FRAG_CB(head)->next_frag;
304+ rbn = rb_next(&head->rbnode);
305+ rb_erase(&head->rbnode, &q->rb_fragments);
306+ while (rbn || fp) {
307+ /* fp points to the next sk_buff in the current run;
308+ * rbn points to the next run.
309+ */
310+ /* Go through the current run. */
311+ while (fp) {
312+ *nextp = fp;
313+ nextp = &fp->next;
314+ fp->prev = NULL;
315+ memset(&fp->rbnode, 0, sizeof(fp->rbnode));
316+ fp->sk = NULL;
317+ head->data_len += fp->len;
318+ head->len += fp->len;
319+ if (head->ip_summed != fp->ip_summed)
320+ head->ip_summed = CHECKSUM_NONE;
321+ else if (head->ip_summed == CHECKSUM_COMPLETE)
322+ head->csum = csum_add(head->csum, fp->csum);
323+ head->truesize += fp->truesize;
324+ fp = FRAG_CB(fp)->next_frag;
325+ }
326+ /* Move to the next run. */
327+ if (rbn) {
328+ struct rb_node *rbnext = rb_next(rbn);
329+
330+ fp = rb_to_skb(rbn);
331+ rb_erase(rbn, &q->rb_fragments);
332+ rbn = rbnext;
333+ }
334+ }
335+ sub_frag_mem_limit(q->net, head->truesize);
336+
337+ *nextp = NULL;
338+ head->next = NULL;
339+ head->prev = NULL;
340+ head->tstamp = q->stamp;
341+}
342+EXPORT_SYMBOL(inet_frag_reasm_finish);
343+
344+struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
345+{
346+ struct sk_buff *head;
347+
348+ if (q->fragments) {
349+ head = q->fragments;
350+ q->fragments = head->next;
351+ } else {
352+ struct sk_buff *skb;
353+
354+ head = skb_rb_first(&q->rb_fragments);
355+ if (!head)
356+ return NULL;
357+ skb = FRAG_CB(head)->next_frag;
358+ if (skb)
359+ rb_replace_node(&head->rbnode, &skb->rbnode,
360+ &q->rb_fragments);
361+ else
362+ rb_erase(&head->rbnode, &q->rb_fragments);
363+ memset(&head->rbnode, 0, sizeof(head->rbnode));
364+ barrier();
365+ }
366+ if (head == q->fragments_tail)
367+ q->fragments_tail = NULL;
368+
369+ sub_frag_mem_limit(q->net, head->truesize);
370+
371+ return head;
372+}
373+EXPORT_SYMBOL(inet_frag_pull_head);
374diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
375index d95b32af4a0e..5a1d39e32196 100644
376--- a/net/ipv4/ip_fragment.c
377+++ b/net/ipv4/ip_fragment.c
378@@ -57,57 +57,6 @@
379 */
380 static const char ip_frag_cache_name[] = "ip4-frags";
381
382-/* Use skb->cb to track consecutive/adjacent fragments coming at
383- * the end of the queue. Nodes in the rb-tree queue will
384- * contain "runs" of one or more adjacent fragments.
385- *
386- * Invariants:
387- * - next_frag is NULL at the tail of a "run";
388- * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
389- */
390-struct ipfrag_skb_cb {
391- struct inet_skb_parm h;
392- struct sk_buff *next_frag;
393- int frag_run_len;
394-};
395-
396-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
397-
398-static void ip4_frag_init_run(struct sk_buff *skb)
399-{
400- BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
401-
402- FRAG_CB(skb)->next_frag = NULL;
403- FRAG_CB(skb)->frag_run_len = skb->len;
404-}
405-
406-/* Append skb to the last "run". */
407-static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
408- struct sk_buff *skb)
409-{
410- RB_CLEAR_NODE(&skb->rbnode);
411- FRAG_CB(skb)->next_frag = NULL;
412-
413- FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
414- FRAG_CB(q->fragments_tail)->next_frag = skb;
415- q->fragments_tail = skb;
416-}
417-
418-/* Create a new "run" with the skb. */
419-static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
420-{
421- if (q->last_run_head)
422- rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
423- &q->last_run_head->rbnode.rb_right);
424- else
425- rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
426- rb_insert_color(&skb->rbnode, &q->rb_fragments);
427-
428- ip4_frag_init_run(skb);
429- q->fragments_tail = skb;
430- q->last_run_head = skb;
431-}
432-
433 /* Describe an entry in the "incomplete datagrams" queue. */
434 struct ipq {
435 struct inet_frag_queue q;
436@@ -212,27 +161,9 @@ static void ip_expire(struct timer_list *t)
437 * pull the head out of the tree in order to be able to
438 * deal with head->dev.
439 */
440- if (qp->q.fragments) {
441- head = qp->q.fragments;
442- qp->q.fragments = head->next;
443- } else {
444- head = skb_rb_first(&qp->q.rb_fragments);
445- if (!head)
446- goto out;
447- if (FRAG_CB(head)->next_frag)
448- rb_replace_node(&head->rbnode,
449- &FRAG_CB(head)->next_frag->rbnode,
450- &qp->q.rb_fragments);
451- else
452- rb_erase(&head->rbnode, &qp->q.rb_fragments);
453- memset(&head->rbnode, 0, sizeof(head->rbnode));
454- barrier();
455- }
456- if (head == qp->q.fragments_tail)
457- qp->q.fragments_tail = NULL;
458-
459- sub_frag_mem_limit(qp->q.net, head->truesize);
460-
461+ head = inet_frag_pull_head(&qp->q);
462+ if (!head)
463+ goto out;
464 head->dev = dev_get_by_index_rcu(net, qp->iif);
465 if (!head->dev)
466 goto out;
467@@ -345,12 +276,10 @@ static int ip_frag_reinit(struct ipq *qp)
468 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
469 {
470 struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
471- struct rb_node **rbn, *parent;
472- struct sk_buff *skb1, *prev_tail;
473- int ihl, end, skb1_run_end;
474+ int ihl, end, flags, offset;
475+ struct sk_buff *prev_tail;
476 struct net_device *dev;
477 unsigned int fragsize;
478- int flags, offset;
479 int err = -ENOENT;
480 u8 ecn;
481
482@@ -382,7 +311,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
483 */
484 if (end < qp->q.len ||
485 ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
486- goto err;
487+ goto discard_qp;
488 qp->q.flags |= INET_FRAG_LAST_IN;
489 qp->q.len = end;
490 } else {
491@@ -394,82 +323,33 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
492 if (end > qp->q.len) {
493 /* Some bits beyond end -> corruption. */
494 if (qp->q.flags & INET_FRAG_LAST_IN)
495- goto err;
496+ goto discard_qp;
497 qp->q.len = end;
498 }
499 }
500 if (end == offset)
501- goto err;
502+ goto discard_qp;
503
504 err = -ENOMEM;
505 if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
506- goto err;
507+ goto discard_qp;
508
509 err = pskb_trim_rcsum(skb, end - offset);
510 if (err)
511- goto err;
512+ goto discard_qp;
513
514 /* Note : skb->rbnode and skb->dev share the same location. */
515 dev = skb->dev;
516 /* Makes sure compiler wont do silly aliasing games */
517 barrier();
518
519- /* RFC5722, Section 4, amended by Errata ID : 3089
520- * When reassembling an IPv6 datagram, if
521- * one or more its constituent fragments is determined to be an
522- * overlapping fragment, the entire datagram (and any constituent
523- * fragments) MUST be silently discarded.
524- *
525- * We do the same here for IPv4 (and increment an snmp counter) but
526- * we do not want to drop the whole queue in response to a duplicate
527- * fragment.
528- */
529-
530- err = -EINVAL;
531- /* Find out where to put this fragment. */
532 prev_tail = qp->q.fragments_tail;
533- if (!prev_tail)
534- ip4_frag_create_run(&qp->q, skb); /* First fragment. */
535- else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
536- /* This is the common case: skb goes to the end. */
537- /* Detect and discard overlaps. */
538- if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
539- goto discard_qp;
540- if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
541- ip4_frag_append_to_last_run(&qp->q, skb);
542- else
543- ip4_frag_create_run(&qp->q, skb);
544- } else {
545- /* Binary search. Note that skb can become the first fragment,
546- * but not the last (covered above).
547- */
548- rbn = &qp->q.rb_fragments.rb_node;
549- do {
550- parent = *rbn;
551- skb1 = rb_to_skb(parent);
552- skb1_run_end = skb1->ip_defrag_offset +
553- FRAG_CB(skb1)->frag_run_len;
554- if (end <= skb1->ip_defrag_offset)
555- rbn = &parent->rb_left;
556- else if (offset >= skb1_run_end)
557- rbn = &parent->rb_right;
558- else if (offset >= skb1->ip_defrag_offset &&
559- end <= skb1_run_end)
560- goto err; /* No new data, potential duplicate */
561- else
562- goto discard_qp; /* Found an overlap */
563- } while (*rbn);
564- /* Here we have parent properly set, and rbn pointing to
565- * one of its NULL left/right children. Insert skb.
566- */
567- ip4_frag_init_run(skb);
568- rb_link_node(&skb->rbnode, parent, rbn);
569- rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
570- }
571+ err = inet_frag_queue_insert(&qp->q, skb, offset, end);
572+ if (err)
573+ goto insert_error;
574
575 if (dev)
576 qp->iif = dev->ifindex;
577- skb->ip_defrag_offset = offset;
578
579 qp->q.stamp = skb->tstamp;
580 qp->q.meat += skb->len;
581@@ -494,15 +374,24 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
582 skb->_skb_refdst = 0UL;
583 err = ip_frag_reasm(qp, skb, prev_tail, dev);
584 skb->_skb_refdst = orefdst;
585+ if (err)
586+ inet_frag_kill(&qp->q);
587 return err;
588 }
589
590 skb_dst_drop(skb);
591 return -EINPROGRESS;
592
593+insert_error:
594+ if (err == IPFRAG_DUP) {
595+ kfree_skb(skb);
596+ return -EINVAL;
597+ }
598+ err = -EINVAL;
599+ __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
600 discard_qp:
601 inet_frag_kill(&qp->q);
602- __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
603+ __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
604 err:
605 kfree_skb(skb);
606 return err;
607@@ -514,13 +403,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
608 {
609 struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
610 struct iphdr *iph;
611- struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
612- struct sk_buff **nextp; /* To build frag_list. */
613- struct rb_node *rbn;
614- int len;
615- int ihlen;
616- int delta;
617- int err;
618+ void *reasm_data;
619+ int len, err;
620 u8 ecn;
621
622 ipq_kill(qp);
623@@ -530,117 +414,23 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
624 err = -EINVAL;
625 goto out_fail;
626 }
627- /* Make the one we just received the head. */
628- if (head != skb) {
629- fp = skb_clone(skb, GFP_ATOMIC);
630- if (!fp)
631- goto out_nomem;
632- FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
633- if (RB_EMPTY_NODE(&skb->rbnode))
634- FRAG_CB(prev_tail)->next_frag = fp;
635- else
636- rb_replace_node(&skb->rbnode, &fp->rbnode,
637- &qp->q.rb_fragments);
638- if (qp->q.fragments_tail == skb)
639- qp->q.fragments_tail = fp;
640- skb_morph(skb, head);
641- FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
642- rb_replace_node(&head->rbnode, &skb->rbnode,
643- &qp->q.rb_fragments);
644- consume_skb(head);
645- head = skb;
646- }
647
648- WARN_ON(head->ip_defrag_offset != 0);
649-
650- /* Allocate a new buffer for the datagram. */
651- ihlen = ip_hdrlen(head);
652- len = ihlen + qp->q.len;
653+ /* Make the one we just received the head. */
654+ reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail);
655+ if (!reasm_data)
656+ goto out_nomem;
657
658+ len = ip_hdrlen(skb) + qp->q.len;
659 err = -E2BIG;
660 if (len > 65535)
661 goto out_oversize;
662
663- delta = - head->truesize;
664-
665- /* Head of list must not be cloned. */
666- if (skb_unclone(head, GFP_ATOMIC))
667- goto out_nomem;
668-
669- delta += head->truesize;
670- if (delta)
671- add_frag_mem_limit(qp->q.net, delta);
672-
673- /* If the first fragment is fragmented itself, we split
674- * it to two chunks: the first with data and paged part
675- * and the second, holding only fragments. */
676- if (skb_has_frag_list(head)) {
677- struct sk_buff *clone;
678- int i, plen = 0;
679-
680- clone = alloc_skb(0, GFP_ATOMIC);
681- if (!clone)
682- goto out_nomem;
683- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
684- skb_frag_list_init(head);
685- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
686- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
687- clone->len = clone->data_len = head->data_len - plen;
688- head->truesize += clone->truesize;
689- clone->csum = 0;
690- clone->ip_summed = head->ip_summed;
691- add_frag_mem_limit(qp->q.net, clone->truesize);
692- skb_shinfo(head)->frag_list = clone;
693- nextp = &clone->next;
694- } else {
695- nextp = &skb_shinfo(head)->frag_list;
696- }
697+ inet_frag_reasm_finish(&qp->q, skb, reasm_data);
698
699- skb_push(head, head->data - skb_network_header(head));
700+ skb->dev = dev;
701+ IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
702
703- /* Traverse the tree in order, to build frag_list. */
704- fp = FRAG_CB(head)->next_frag;
705- rbn = rb_next(&head->rbnode);
706- rb_erase(&head->rbnode, &qp->q.rb_fragments);
707- while (rbn || fp) {
708- /* fp points to the next sk_buff in the current run;
709- * rbn points to the next run.
710- */
711- /* Go through the current run. */
712- while (fp) {
713- *nextp = fp;
714- nextp = &fp->next;
715- fp->prev = NULL;
716- memset(&fp->rbnode, 0, sizeof(fp->rbnode));
717- fp->sk = NULL;
718- head->data_len += fp->len;
719- head->len += fp->len;
720- if (head->ip_summed != fp->ip_summed)
721- head->ip_summed = CHECKSUM_NONE;
722- else if (head->ip_summed == CHECKSUM_COMPLETE)
723- head->csum = csum_add(head->csum, fp->csum);
724- head->truesize += fp->truesize;
725- fp = FRAG_CB(fp)->next_frag;
726- }
727- /* Move to the next run. */
728- if (rbn) {
729- struct rb_node *rbnext = rb_next(rbn);
730-
731- fp = rb_to_skb(rbn);
732- rb_erase(rbn, &qp->q.rb_fragments);
733- rbn = rbnext;
734- }
735- }
736- sub_frag_mem_limit(qp->q.net, head->truesize);
737-
738- *nextp = NULL;
739- head->next = NULL;
740- head->prev = NULL;
741- head->dev = dev;
742- head->tstamp = qp->q.stamp;
743- IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
744-
745- iph = ip_hdr(head);
746+ iph = ip_hdr(skb);
747 iph->tot_len = htons(len);
748 iph->tos |= ecn;
749
750@@ -653,7 +443,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
751 * from one very small df-fragment and one large non-df frag.
752 */
753 if (qp->max_df_size == qp->q.max_size) {
754- IPCB(head)->flags |= IPSKB_FRAG_PMTU;
755+ IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
756 iph->frag_off = htons(IP_DF);
757 } else {
758 iph->frag_off = 0;
759@@ -751,28 +541,6 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
760 }
761 EXPORT_SYMBOL(ip_check_defrag);
762
763-unsigned int inet_frag_rbtree_purge(struct rb_root *root)
764-{
765- struct rb_node *p = rb_first(root);
766- unsigned int sum = 0;
767-
768- while (p) {
769- struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
770-
771- p = rb_next(p);
772- rb_erase(&skb->rbnode, root);
773- while (skb) {
774- struct sk_buff *next = FRAG_CB(skb)->next_frag;
775-
776- sum += skb->truesize;
777- kfree_skb(skb);
778- skb = next;
779- }
780- }
781- return sum;
782-}
783-EXPORT_SYMBOL(inet_frag_rbtree_purge);
784-
785 #ifdef CONFIG_SYSCTL
786 static int dist_min;
787
788--
7892.19.1
790