]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.14.114/net-ip-defrag-encapsulate-rbtree-defrag-code-into-ca.patch
Linux 4.14.114
[thirdparty/kernel/stable-queue.git] / releases / 4.14.114 / net-ip-defrag-encapsulate-rbtree-defrag-code-into-ca.patch
1 From 95847c69d0f559fd6701bcd60e8e8b889fee2499 Mon Sep 17 00:00:00 2001
2 From: Peter Oskolkov <posk@google.com>
3 Date: Tue, 23 Apr 2019 10:48:22 -0700
4 Subject: net: IP defrag: encapsulate rbtree defrag code into callable
5 functions
6
7 [ Upstream commit c23f35d19db3b36ffb9e04b08f1d91565d15f84f ]
8
9 This is a refactoring patch: without changing runtime behavior,
10 it moves rbtree-related code from IPv4-specific files/functions
11 into .h/.c defrag files shared with IPv6 defragmentation code.
12
13 v2: make handling of overlapping packets match upstream.
14
15 Signed-off-by: Peter Oskolkov <posk@google.com>
16 Cc: Eric Dumazet <edumazet@google.com>
17 Cc: Florian Westphal <fw@strlen.de>
18 Cc: Tom Herbert <tom@herbertland.com>
19 Signed-off-by: David S. Miller <davem@davemloft.net>
20 Signed-off-by: Sasha Levin <sashal@kernel.org>
21 ---
22 include/net/inet_frag.h | 16 ++-
23 net/ipv4/inet_fragment.c | 293 +++++++++++++++++++++++++++++++++++++
24 net/ipv4/ip_fragment.c | 302 +++++----------------------------------
25 3 files changed, 342 insertions(+), 269 deletions(-)
26
27 diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
28 index 335cf7851f12..008f64823c41 100644
29 --- a/include/net/inet_frag.h
30 +++ b/include/net/inet_frag.h
31 @@ -77,8 +77,8 @@ struct inet_frag_queue {
32 struct timer_list timer;
33 spinlock_t lock;
34 refcount_t refcnt;
35 - struct sk_buff *fragments; /* Used in IPv6. */
36 - struct rb_root rb_fragments; /* Used in IPv4. */
37 + struct sk_buff *fragments; /* used in 6lopwpan IPv6. */
38 + struct rb_root rb_fragments; /* Used in IPv4/IPv6. */
39 struct sk_buff *fragments_tail;
40 struct sk_buff *last_run_head;
41 ktime_t stamp;
42 @@ -153,4 +153,16 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
43
44 extern const u8 ip_frag_ecn_table[16];
45
46 +/* Return values of inet_frag_queue_insert() */
47 +#define IPFRAG_OK 0
48 +#define IPFRAG_DUP 1
49 +#define IPFRAG_OVERLAP 2
50 +int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
51 + int offset, int end);
52 +void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
53 + struct sk_buff *parent);
54 +void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
55 + void *reasm_data);
56 +struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q);
57 +
58 #endif
59 diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
60 index 6ffee9d2b0e5..481cded81b2d 100644
61 --- a/net/ipv4/inet_fragment.c
62 +++ b/net/ipv4/inet_fragment.c
63 @@ -24,6 +24,62 @@
64 #include <net/sock.h>
65 #include <net/inet_frag.h>
66 #include <net/inet_ecn.h>
67 +#include <net/ip.h>
68 +#include <net/ipv6.h>
69 +
70 +/* Use skb->cb to track consecutive/adjacent fragments coming at
71 + * the end of the queue. Nodes in the rb-tree queue will
72 + * contain "runs" of one or more adjacent fragments.
73 + *
74 + * Invariants:
75 + * - next_frag is NULL at the tail of a "run";
76 + * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
77 + */
78 +struct ipfrag_skb_cb {
79 + union {
80 + struct inet_skb_parm h4;
81 + struct inet6_skb_parm h6;
82 + };
83 + struct sk_buff *next_frag;
84 + int frag_run_len;
85 +};
86 +
87 +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
88 +
89 +static void fragcb_clear(struct sk_buff *skb)
90 +{
91 + RB_CLEAR_NODE(&skb->rbnode);
92 + FRAG_CB(skb)->next_frag = NULL;
93 + FRAG_CB(skb)->frag_run_len = skb->len;
94 +}
95 +
96 +/* Append skb to the last "run". */
97 +static void fragrun_append_to_last(struct inet_frag_queue *q,
98 + struct sk_buff *skb)
99 +{
100 + fragcb_clear(skb);
101 +
102 + FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
103 + FRAG_CB(q->fragments_tail)->next_frag = skb;
104 + q->fragments_tail = skb;
105 +}
106 +
107 +/* Create a new "run" with the skb. */
108 +static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
109 +{
110 + BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
111 + fragcb_clear(skb);
112 +
113 + if (q->last_run_head)
114 + rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
115 + &q->last_run_head->rbnode.rb_right);
116 + else
117 + rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
118 + rb_insert_color(&skb->rbnode, &q->rb_fragments);
119 +
120 + q->fragments_tail = skb;
121 + q->last_run_head = skb;
122 +}
123
124 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
125 * Value : 0xff if frame should be dropped.
126 @@ -122,6 +178,28 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
127 kmem_cache_free(f->frags_cachep, q);
128 }
129
130 +unsigned int inet_frag_rbtree_purge(struct rb_root *root)
131 +{
132 + struct rb_node *p = rb_first(root);
133 + unsigned int sum = 0;
134 +
135 + while (p) {
136 + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
137 +
138 + p = rb_next(p);
139 + rb_erase(&skb->rbnode, root);
140 + while (skb) {
141 + struct sk_buff *next = FRAG_CB(skb)->next_frag;
142 +
143 + sum += skb->truesize;
144 + kfree_skb(skb);
145 + skb = next;
146 + }
147 + }
148 + return sum;
149 +}
150 +EXPORT_SYMBOL(inet_frag_rbtree_purge);
151 +
152 void inet_frag_destroy(struct inet_frag_queue *q)
153 {
154 struct sk_buff *fp;
155 @@ -224,3 +302,218 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
156 return fq;
157 }
158 EXPORT_SYMBOL(inet_frag_find);
159 +
160 +int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
161 + int offset, int end)
162 +{
163 + struct sk_buff *last = q->fragments_tail;
164 +
165 + /* RFC5722, Section 4, amended by Errata ID : 3089
166 + * When reassembling an IPv6 datagram, if
167 + * one or more its constituent fragments is determined to be an
168 + * overlapping fragment, the entire datagram (and any constituent
169 + * fragments) MUST be silently discarded.
170 + *
171 + * Duplicates, however, should be ignored (i.e. skb dropped, but the
172 + * queue/fragments kept for later reassembly).
173 + */
174 + if (!last)
175 + fragrun_create(q, skb); /* First fragment. */
176 + else if (last->ip_defrag_offset + last->len < end) {
177 + /* This is the common case: skb goes to the end. */
178 + /* Detect and discard overlaps. */
179 + if (offset < last->ip_defrag_offset + last->len)
180 + return IPFRAG_OVERLAP;
181 + if (offset == last->ip_defrag_offset + last->len)
182 + fragrun_append_to_last(q, skb);
183 + else
184 + fragrun_create(q, skb);
185 + } else {
186 + /* Binary search. Note that skb can become the first fragment,
187 + * but not the last (covered above).
188 + */
189 + struct rb_node **rbn, *parent;
190 +
191 + rbn = &q->rb_fragments.rb_node;
192 + do {
193 + struct sk_buff *curr;
194 + int curr_run_end;
195 +
196 + parent = *rbn;
197 + curr = rb_to_skb(parent);
198 + curr_run_end = curr->ip_defrag_offset +
199 + FRAG_CB(curr)->frag_run_len;
200 + if (end <= curr->ip_defrag_offset)
201 + rbn = &parent->rb_left;
202 + else if (offset >= curr_run_end)
203 + rbn = &parent->rb_right;
204 + else if (offset >= curr->ip_defrag_offset &&
205 + end <= curr_run_end)
206 + return IPFRAG_DUP;
207 + else
208 + return IPFRAG_OVERLAP;
209 + } while (*rbn);
210 + /* Here we have parent properly set, and rbn pointing to
211 + * one of its NULL left/right children. Insert skb.
212 + */
213 + fragcb_clear(skb);
214 + rb_link_node(&skb->rbnode, parent, rbn);
215 + rb_insert_color(&skb->rbnode, &q->rb_fragments);
216 + }
217 +
218 + skb->ip_defrag_offset = offset;
219 +
220 + return IPFRAG_OK;
221 +}
222 +EXPORT_SYMBOL(inet_frag_queue_insert);
223 +
224 +void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
225 + struct sk_buff *parent)
226 +{
227 + struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
228 + struct sk_buff **nextp;
229 + int delta;
230 +
231 + if (head != skb) {
232 + fp = skb_clone(skb, GFP_ATOMIC);
233 + if (!fp)
234 + return NULL;
235 + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
236 + if (RB_EMPTY_NODE(&skb->rbnode))
237 + FRAG_CB(parent)->next_frag = fp;
238 + else
239 + rb_replace_node(&skb->rbnode, &fp->rbnode,
240 + &q->rb_fragments);
241 + if (q->fragments_tail == skb)
242 + q->fragments_tail = fp;
243 + skb_morph(skb, head);
244 + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
245 + rb_replace_node(&head->rbnode, &skb->rbnode,
246 + &q->rb_fragments);
247 + consume_skb(head);
248 + head = skb;
249 + }
250 + WARN_ON(head->ip_defrag_offset != 0);
251 +
252 + delta = -head->truesize;
253 +
254 + /* Head of list must not be cloned. */
255 + if (skb_unclone(head, GFP_ATOMIC))
256 + return NULL;
257 +
258 + delta += head->truesize;
259 + if (delta)
260 + add_frag_mem_limit(q->net, delta);
261 +
262 + /* If the first fragment is fragmented itself, we split
263 + * it to two chunks: the first with data and paged part
264 + * and the second, holding only fragments.
265 + */
266 + if (skb_has_frag_list(head)) {
267 + struct sk_buff *clone;
268 + int i, plen = 0;
269 +
270 + clone = alloc_skb(0, GFP_ATOMIC);
271 + if (!clone)
272 + return NULL;
273 + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
274 + skb_frag_list_init(head);
275 + for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
276 + plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
277 + clone->data_len = head->data_len - plen;
278 + clone->len = clone->data_len;
279 + head->truesize += clone->truesize;
280 + clone->csum = 0;
281 + clone->ip_summed = head->ip_summed;
282 + add_frag_mem_limit(q->net, clone->truesize);
283 + skb_shinfo(head)->frag_list = clone;
284 + nextp = &clone->next;
285 + } else {
286 + nextp = &skb_shinfo(head)->frag_list;
287 + }
288 +
289 + return nextp;
290 +}
291 +EXPORT_SYMBOL(inet_frag_reasm_prepare);
292 +
293 +void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
294 + void *reasm_data)
295 +{
296 + struct sk_buff **nextp = (struct sk_buff **)reasm_data;
297 + struct rb_node *rbn;
298 + struct sk_buff *fp;
299 +
300 + skb_push(head, head->data - skb_network_header(head));
301 +
302 + /* Traverse the tree in order, to build frag_list. */
303 + fp = FRAG_CB(head)->next_frag;
304 + rbn = rb_next(&head->rbnode);
305 + rb_erase(&head->rbnode, &q->rb_fragments);
306 + while (rbn || fp) {
307 + /* fp points to the next sk_buff in the current run;
308 + * rbn points to the next run.
309 + */
310 + /* Go through the current run. */
311 + while (fp) {
312 + *nextp = fp;
313 + nextp = &fp->next;
314 + fp->prev = NULL;
315 + memset(&fp->rbnode, 0, sizeof(fp->rbnode));
316 + fp->sk = NULL;
317 + head->data_len += fp->len;
318 + head->len += fp->len;
319 + if (head->ip_summed != fp->ip_summed)
320 + head->ip_summed = CHECKSUM_NONE;
321 + else if (head->ip_summed == CHECKSUM_COMPLETE)
322 + head->csum = csum_add(head->csum, fp->csum);
323 + head->truesize += fp->truesize;
324 + fp = FRAG_CB(fp)->next_frag;
325 + }
326 + /* Move to the next run. */
327 + if (rbn) {
328 + struct rb_node *rbnext = rb_next(rbn);
329 +
330 + fp = rb_to_skb(rbn);
331 + rb_erase(rbn, &q->rb_fragments);
332 + rbn = rbnext;
333 + }
334 + }
335 + sub_frag_mem_limit(q->net, head->truesize);
336 +
337 + *nextp = NULL;
338 + head->next = NULL;
339 + head->prev = NULL;
340 + head->tstamp = q->stamp;
341 +}
342 +EXPORT_SYMBOL(inet_frag_reasm_finish);
343 +
344 +struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
345 +{
346 + struct sk_buff *head;
347 +
348 + if (q->fragments) {
349 + head = q->fragments;
350 + q->fragments = head->next;
351 + } else {
352 + struct sk_buff *skb;
353 +
354 + head = skb_rb_first(&q->rb_fragments);
355 + if (!head)
356 + return NULL;
357 + skb = FRAG_CB(head)->next_frag;
358 + if (skb)
359 + rb_replace_node(&head->rbnode, &skb->rbnode,
360 + &q->rb_fragments);
361 + else
362 + rb_erase(&head->rbnode, &q->rb_fragments);
363 + memset(&head->rbnode, 0, sizeof(head->rbnode));
364 + barrier();
365 + }
366 + if (head == q->fragments_tail)
367 + q->fragments_tail = NULL;
368 +
369 + sub_frag_mem_limit(q->net, head->truesize);
370 +
371 + return head;
372 +}
373 +EXPORT_SYMBOL(inet_frag_pull_head);
374 diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
375 index d95b32af4a0e..5a1d39e32196 100644
376 --- a/net/ipv4/ip_fragment.c
377 +++ b/net/ipv4/ip_fragment.c
378 @@ -57,57 +57,6 @@
379 */
380 static const char ip_frag_cache_name[] = "ip4-frags";
381
382 -/* Use skb->cb to track consecutive/adjacent fragments coming at
383 - * the end of the queue. Nodes in the rb-tree queue will
384 - * contain "runs" of one or more adjacent fragments.
385 - *
386 - * Invariants:
387 - * - next_frag is NULL at the tail of a "run";
388 - * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
389 - */
390 -struct ipfrag_skb_cb {
391 - struct inet_skb_parm h;
392 - struct sk_buff *next_frag;
393 - int frag_run_len;
394 -};
395 -
396 -#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
397 -
398 -static void ip4_frag_init_run(struct sk_buff *skb)
399 -{
400 - BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
401 -
402 - FRAG_CB(skb)->next_frag = NULL;
403 - FRAG_CB(skb)->frag_run_len = skb->len;
404 -}
405 -
406 -/* Append skb to the last "run". */
407 -static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
408 - struct sk_buff *skb)
409 -{
410 - RB_CLEAR_NODE(&skb->rbnode);
411 - FRAG_CB(skb)->next_frag = NULL;
412 -
413 - FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
414 - FRAG_CB(q->fragments_tail)->next_frag = skb;
415 - q->fragments_tail = skb;
416 -}
417 -
418 -/* Create a new "run" with the skb. */
419 -static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
420 -{
421 - if (q->last_run_head)
422 - rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
423 - &q->last_run_head->rbnode.rb_right);
424 - else
425 - rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
426 - rb_insert_color(&skb->rbnode, &q->rb_fragments);
427 -
428 - ip4_frag_init_run(skb);
429 - q->fragments_tail = skb;
430 - q->last_run_head = skb;
431 -}
432 -
433 /* Describe an entry in the "incomplete datagrams" queue. */
434 struct ipq {
435 struct inet_frag_queue q;
436 @@ -212,27 +161,9 @@ static void ip_expire(struct timer_list *t)
437 * pull the head out of the tree in order to be able to
438 * deal with head->dev.
439 */
440 - if (qp->q.fragments) {
441 - head = qp->q.fragments;
442 - qp->q.fragments = head->next;
443 - } else {
444 - head = skb_rb_first(&qp->q.rb_fragments);
445 - if (!head)
446 - goto out;
447 - if (FRAG_CB(head)->next_frag)
448 - rb_replace_node(&head->rbnode,
449 - &FRAG_CB(head)->next_frag->rbnode,
450 - &qp->q.rb_fragments);
451 - else
452 - rb_erase(&head->rbnode, &qp->q.rb_fragments);
453 - memset(&head->rbnode, 0, sizeof(head->rbnode));
454 - barrier();
455 - }
456 - if (head == qp->q.fragments_tail)
457 - qp->q.fragments_tail = NULL;
458 -
459 - sub_frag_mem_limit(qp->q.net, head->truesize);
460 -
461 + head = inet_frag_pull_head(&qp->q);
462 + if (!head)
463 + goto out;
464 head->dev = dev_get_by_index_rcu(net, qp->iif);
465 if (!head->dev)
466 goto out;
467 @@ -345,12 +276,10 @@ static int ip_frag_reinit(struct ipq *qp)
468 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
469 {
470 struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
471 - struct rb_node **rbn, *parent;
472 - struct sk_buff *skb1, *prev_tail;
473 - int ihl, end, skb1_run_end;
474 + int ihl, end, flags, offset;
475 + struct sk_buff *prev_tail;
476 struct net_device *dev;
477 unsigned int fragsize;
478 - int flags, offset;
479 int err = -ENOENT;
480 u8 ecn;
481
482 @@ -382,7 +311,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
483 */
484 if (end < qp->q.len ||
485 ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
486 - goto err;
487 + goto discard_qp;
488 qp->q.flags |= INET_FRAG_LAST_IN;
489 qp->q.len = end;
490 } else {
491 @@ -394,82 +323,33 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
492 if (end > qp->q.len) {
493 /* Some bits beyond end -> corruption. */
494 if (qp->q.flags & INET_FRAG_LAST_IN)
495 - goto err;
496 + goto discard_qp;
497 qp->q.len = end;
498 }
499 }
500 if (end == offset)
501 - goto err;
502 + goto discard_qp;
503
504 err = -ENOMEM;
505 if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
506 - goto err;
507 + goto discard_qp;
508
509 err = pskb_trim_rcsum(skb, end - offset);
510 if (err)
511 - goto err;
512 + goto discard_qp;
513
514 /* Note : skb->rbnode and skb->dev share the same location. */
515 dev = skb->dev;
516 /* Makes sure compiler wont do silly aliasing games */
517 barrier();
518
519 - /* RFC5722, Section 4, amended by Errata ID : 3089
520 - * When reassembling an IPv6 datagram, if
521 - * one or more its constituent fragments is determined to be an
522 - * overlapping fragment, the entire datagram (and any constituent
523 - * fragments) MUST be silently discarded.
524 - *
525 - * We do the same here for IPv4 (and increment an snmp counter) but
526 - * we do not want to drop the whole queue in response to a duplicate
527 - * fragment.
528 - */
529 -
530 - err = -EINVAL;
531 - /* Find out where to put this fragment. */
532 prev_tail = qp->q.fragments_tail;
533 - if (!prev_tail)
534 - ip4_frag_create_run(&qp->q, skb); /* First fragment. */
535 - else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
536 - /* This is the common case: skb goes to the end. */
537 - /* Detect and discard overlaps. */
538 - if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
539 - goto discard_qp;
540 - if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
541 - ip4_frag_append_to_last_run(&qp->q, skb);
542 - else
543 - ip4_frag_create_run(&qp->q, skb);
544 - } else {
545 - /* Binary search. Note that skb can become the first fragment,
546 - * but not the last (covered above).
547 - */
548 - rbn = &qp->q.rb_fragments.rb_node;
549 - do {
550 - parent = *rbn;
551 - skb1 = rb_to_skb(parent);
552 - skb1_run_end = skb1->ip_defrag_offset +
553 - FRAG_CB(skb1)->frag_run_len;
554 - if (end <= skb1->ip_defrag_offset)
555 - rbn = &parent->rb_left;
556 - else if (offset >= skb1_run_end)
557 - rbn = &parent->rb_right;
558 - else if (offset >= skb1->ip_defrag_offset &&
559 - end <= skb1_run_end)
560 - goto err; /* No new data, potential duplicate */
561 - else
562 - goto discard_qp; /* Found an overlap */
563 - } while (*rbn);
564 - /* Here we have parent properly set, and rbn pointing to
565 - * one of its NULL left/right children. Insert skb.
566 - */
567 - ip4_frag_init_run(skb);
568 - rb_link_node(&skb->rbnode, parent, rbn);
569 - rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
570 - }
571 + err = inet_frag_queue_insert(&qp->q, skb, offset, end);
572 + if (err)
573 + goto insert_error;
574
575 if (dev)
576 qp->iif = dev->ifindex;
577 - skb->ip_defrag_offset = offset;
578
579 qp->q.stamp = skb->tstamp;
580 qp->q.meat += skb->len;
581 @@ -494,15 +374,24 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
582 skb->_skb_refdst = 0UL;
583 err = ip_frag_reasm(qp, skb, prev_tail, dev);
584 skb->_skb_refdst = orefdst;
585 + if (err)
586 + inet_frag_kill(&qp->q);
587 return err;
588 }
589
590 skb_dst_drop(skb);
591 return -EINPROGRESS;
592
593 +insert_error:
594 + if (err == IPFRAG_DUP) {
595 + kfree_skb(skb);
596 + return -EINVAL;
597 + }
598 + err = -EINVAL;
599 + __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
600 discard_qp:
601 inet_frag_kill(&qp->q);
602 - __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
603 + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
604 err:
605 kfree_skb(skb);
606 return err;
607 @@ -514,13 +403,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
608 {
609 struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
610 struct iphdr *iph;
611 - struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
612 - struct sk_buff **nextp; /* To build frag_list. */
613 - struct rb_node *rbn;
614 - int len;
615 - int ihlen;
616 - int delta;
617 - int err;
618 + void *reasm_data;
619 + int len, err;
620 u8 ecn;
621
622 ipq_kill(qp);
623 @@ -530,117 +414,23 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
624 err = -EINVAL;
625 goto out_fail;
626 }
627 - /* Make the one we just received the head. */
628 - if (head != skb) {
629 - fp = skb_clone(skb, GFP_ATOMIC);
630 - if (!fp)
631 - goto out_nomem;
632 - FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
633 - if (RB_EMPTY_NODE(&skb->rbnode))
634 - FRAG_CB(prev_tail)->next_frag = fp;
635 - else
636 - rb_replace_node(&skb->rbnode, &fp->rbnode,
637 - &qp->q.rb_fragments);
638 - if (qp->q.fragments_tail == skb)
639 - qp->q.fragments_tail = fp;
640 - skb_morph(skb, head);
641 - FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
642 - rb_replace_node(&head->rbnode, &skb->rbnode,
643 - &qp->q.rb_fragments);
644 - consume_skb(head);
645 - head = skb;
646 - }
647
648 - WARN_ON(head->ip_defrag_offset != 0);
649 -
650 - /* Allocate a new buffer for the datagram. */
651 - ihlen = ip_hdrlen(head);
652 - len = ihlen + qp->q.len;
653 + /* Make the one we just received the head. */
654 + reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail);
655 + if (!reasm_data)
656 + goto out_nomem;
657
658 + len = ip_hdrlen(skb) + qp->q.len;
659 err = -E2BIG;
660 if (len > 65535)
661 goto out_oversize;
662
663 - delta = - head->truesize;
664 -
665 - /* Head of list must not be cloned. */
666 - if (skb_unclone(head, GFP_ATOMIC))
667 - goto out_nomem;
668 -
669 - delta += head->truesize;
670 - if (delta)
671 - add_frag_mem_limit(qp->q.net, delta);
672 -
673 - /* If the first fragment is fragmented itself, we split
674 - * it to two chunks: the first with data and paged part
675 - * and the second, holding only fragments. */
676 - if (skb_has_frag_list(head)) {
677 - struct sk_buff *clone;
678 - int i, plen = 0;
679 -
680 - clone = alloc_skb(0, GFP_ATOMIC);
681 - if (!clone)
682 - goto out_nomem;
683 - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
684 - skb_frag_list_init(head);
685 - for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
686 - plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
687 - clone->len = clone->data_len = head->data_len - plen;
688 - head->truesize += clone->truesize;
689 - clone->csum = 0;
690 - clone->ip_summed = head->ip_summed;
691 - add_frag_mem_limit(qp->q.net, clone->truesize);
692 - skb_shinfo(head)->frag_list = clone;
693 - nextp = &clone->next;
694 - } else {
695 - nextp = &skb_shinfo(head)->frag_list;
696 - }
697 + inet_frag_reasm_finish(&qp->q, skb, reasm_data);
698
699 - skb_push(head, head->data - skb_network_header(head));
700 + skb->dev = dev;
701 + IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
702
703 - /* Traverse the tree in order, to build frag_list. */
704 - fp = FRAG_CB(head)->next_frag;
705 - rbn = rb_next(&head->rbnode);
706 - rb_erase(&head->rbnode, &qp->q.rb_fragments);
707 - while (rbn || fp) {
708 - /* fp points to the next sk_buff in the current run;
709 - * rbn points to the next run.
710 - */
711 - /* Go through the current run. */
712 - while (fp) {
713 - *nextp = fp;
714 - nextp = &fp->next;
715 - fp->prev = NULL;
716 - memset(&fp->rbnode, 0, sizeof(fp->rbnode));
717 - fp->sk = NULL;
718 - head->data_len += fp->len;
719 - head->len += fp->len;
720 - if (head->ip_summed != fp->ip_summed)
721 - head->ip_summed = CHECKSUM_NONE;
722 - else if (head->ip_summed == CHECKSUM_COMPLETE)
723 - head->csum = csum_add(head->csum, fp->csum);
724 - head->truesize += fp->truesize;
725 - fp = FRAG_CB(fp)->next_frag;
726 - }
727 - /* Move to the next run. */
728 - if (rbn) {
729 - struct rb_node *rbnext = rb_next(rbn);
730 -
731 - fp = rb_to_skb(rbn);
732 - rb_erase(rbn, &qp->q.rb_fragments);
733 - rbn = rbnext;
734 - }
735 - }
736 - sub_frag_mem_limit(qp->q.net, head->truesize);
737 -
738 - *nextp = NULL;
739 - head->next = NULL;
740 - head->prev = NULL;
741 - head->dev = dev;
742 - head->tstamp = qp->q.stamp;
743 - IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
744 -
745 - iph = ip_hdr(head);
746 + iph = ip_hdr(skb);
747 iph->tot_len = htons(len);
748 iph->tos |= ecn;
749
750 @@ -653,7 +443,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
751 * from one very small df-fragment and one large non-df frag.
752 */
753 if (qp->max_df_size == qp->q.max_size) {
754 - IPCB(head)->flags |= IPSKB_FRAG_PMTU;
755 + IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
756 iph->frag_off = htons(IP_DF);
757 } else {
758 iph->frag_off = 0;
759 @@ -751,28 +541,6 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
760 }
761 EXPORT_SYMBOL(ip_check_defrag);
762
763 -unsigned int inet_frag_rbtree_purge(struct rb_root *root)
764 -{
765 - struct rb_node *p = rb_first(root);
766 - unsigned int sum = 0;
767 -
768 - while (p) {
769 - struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
770 -
771 - p = rb_next(p);
772 - rb_erase(&skb->rbnode, root);
773 - while (skb) {
774 - struct sk_buff *next = FRAG_CB(skb)->next_frag;
775 -
776 - sum += skb->truesize;
777 - kfree_skb(skb);
778 - skb = next;
779 - }
780 - }
781 - return sum;
782 -}
783 -EXPORT_SYMBOL(inet_frag_rbtree_purge);
784 -
785 #ifdef CONFIG_SYSCTL
786 static int dist_min;
787
788 --
789 2.19.1
790