]>
Commit | Line | Data |
---|---|---|
82b2f3ba SL |
1 | From 95847c69d0f559fd6701bcd60e8e8b889fee2499 Mon Sep 17 00:00:00 2001 |
2 | From: Peter Oskolkov <posk@google.com> | |
3 | Date: Tue, 23 Apr 2019 10:48:22 -0700 | |
4 | Subject: net: IP defrag: encapsulate rbtree defrag code into callable | |
5 | functions | |
6 | ||
7 | [ Upstream commit c23f35d19db3b36ffb9e04b08f1d91565d15f84f ] | |
8 | ||
9 | This is a refactoring patch: without changing runtime behavior, | |
10 | it moves rbtree-related code from IPv4-specific files/functions | |
11 | into .h/.c defrag files shared with IPv6 defragmentation code. | |
12 | ||
13 | v2: make handling of overlapping packets match upstream. | |
14 | ||
15 | Signed-off-by: Peter Oskolkov <posk@google.com> | |
16 | Cc: Eric Dumazet <edumazet@google.com> | |
17 | Cc: Florian Westphal <fw@strlen.de> | |
18 | Cc: Tom Herbert <tom@herbertland.com> | |
19 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
20 | Signed-off-by: Sasha Levin <sashal@kernel.org> | |
21 | --- | |
22 | include/net/inet_frag.h | 16 ++- | |
23 | net/ipv4/inet_fragment.c | 293 +++++++++++++++++++++++++++++++++++++ | |
24 | net/ipv4/ip_fragment.c | 302 +++++---------------------------------- | |
25 | 3 files changed, 342 insertions(+), 269 deletions(-) | |
26 | ||
27 | diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h | |
28 | index 335cf7851f12..008f64823c41 100644 | |
29 | --- a/include/net/inet_frag.h | |
30 | +++ b/include/net/inet_frag.h | |
31 | @@ -77,8 +77,8 @@ struct inet_frag_queue { | |
32 | struct timer_list timer; | |
33 | spinlock_t lock; | |
34 | refcount_t refcnt; | |
35 | - struct sk_buff *fragments; /* Used in IPv6. */ | |
36 | - struct rb_root rb_fragments; /* Used in IPv4. */ | |
37 | + struct sk_buff *fragments; /* used in 6lopwpan IPv6. */ | |
38 | + struct rb_root rb_fragments; /* Used in IPv4/IPv6. */ | |
39 | struct sk_buff *fragments_tail; | |
40 | struct sk_buff *last_run_head; | |
41 | ktime_t stamp; | |
42 | @@ -153,4 +153,16 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, long val) | |
43 | ||
44 | extern const u8 ip_frag_ecn_table[16]; | |
45 | ||
46 | +/* Return values of inet_frag_queue_insert() */ | |
47 | +#define IPFRAG_OK 0 | |
48 | +#define IPFRAG_DUP 1 | |
49 | +#define IPFRAG_OVERLAP 2 | |
50 | +int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, | |
51 | + int offset, int end); | |
52 | +void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, | |
53 | + struct sk_buff *parent); | |
54 | +void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, | |
55 | + void *reasm_data); | |
56 | +struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q); | |
57 | + | |
58 | #endif | |
59 | diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c | |
60 | index 6ffee9d2b0e5..481cded81b2d 100644 | |
61 | --- a/net/ipv4/inet_fragment.c | |
62 | +++ b/net/ipv4/inet_fragment.c | |
63 | @@ -24,6 +24,62 @@ | |
64 | #include <net/sock.h> | |
65 | #include <net/inet_frag.h> | |
66 | #include <net/inet_ecn.h> | |
67 | +#include <net/ip.h> | |
68 | +#include <net/ipv6.h> | |
69 | + | |
70 | +/* Use skb->cb to track consecutive/adjacent fragments coming at | |
71 | + * the end of the queue. Nodes in the rb-tree queue will | |
72 | + * contain "runs" of one or more adjacent fragments. | |
73 | + * | |
74 | + * Invariants: | |
75 | + * - next_frag is NULL at the tail of a "run"; | |
76 | + * - the head of a "run" has the sum of all fragment lengths in frag_run_len. | |
77 | + */ | |
78 | +struct ipfrag_skb_cb { | |
79 | + union { | |
80 | + struct inet_skb_parm h4; | |
81 | + struct inet6_skb_parm h6; | |
82 | + }; | |
83 | + struct sk_buff *next_frag; | |
84 | + int frag_run_len; | |
85 | +}; | |
86 | + | |
87 | +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) | |
88 | + | |
89 | +static void fragcb_clear(struct sk_buff *skb) | |
90 | +{ | |
91 | + RB_CLEAR_NODE(&skb->rbnode); | |
92 | + FRAG_CB(skb)->next_frag = NULL; | |
93 | + FRAG_CB(skb)->frag_run_len = skb->len; | |
94 | +} | |
95 | + | |
96 | +/* Append skb to the last "run". */ | |
97 | +static void fragrun_append_to_last(struct inet_frag_queue *q, | |
98 | + struct sk_buff *skb) | |
99 | +{ | |
100 | + fragcb_clear(skb); | |
101 | + | |
102 | + FRAG_CB(q->last_run_head)->frag_run_len += skb->len; | |
103 | + FRAG_CB(q->fragments_tail)->next_frag = skb; | |
104 | + q->fragments_tail = skb; | |
105 | +} | |
106 | + | |
107 | +/* Create a new "run" with the skb. */ | |
108 | +static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb) | |
109 | +{ | |
110 | + BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); | |
111 | + fragcb_clear(skb); | |
112 | + | |
113 | + if (q->last_run_head) | |
114 | + rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, | |
115 | + &q->last_run_head->rbnode.rb_right); | |
116 | + else | |
117 | + rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); | |
118 | + rb_insert_color(&skb->rbnode, &q->rb_fragments); | |
119 | + | |
120 | + q->fragments_tail = skb; | |
121 | + q->last_run_head = skb; | |
122 | +} | |
123 | ||
124 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements | |
125 | * Value : 0xff if frame should be dropped. | |
126 | @@ -122,6 +178,28 @@ static void inet_frag_destroy_rcu(struct rcu_head *head) | |
127 | kmem_cache_free(f->frags_cachep, q); | |
128 | } | |
129 | ||
130 | +unsigned int inet_frag_rbtree_purge(struct rb_root *root) | |
131 | +{ | |
132 | + struct rb_node *p = rb_first(root); | |
133 | + unsigned int sum = 0; | |
134 | + | |
135 | + while (p) { | |
136 | + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); | |
137 | + | |
138 | + p = rb_next(p); | |
139 | + rb_erase(&skb->rbnode, root); | |
140 | + while (skb) { | |
141 | + struct sk_buff *next = FRAG_CB(skb)->next_frag; | |
142 | + | |
143 | + sum += skb->truesize; | |
144 | + kfree_skb(skb); | |
145 | + skb = next; | |
146 | + } | |
147 | + } | |
148 | + return sum; | |
149 | +} | |
150 | +EXPORT_SYMBOL(inet_frag_rbtree_purge); | |
151 | + | |
152 | void inet_frag_destroy(struct inet_frag_queue *q) | |
153 | { | |
154 | struct sk_buff *fp; | |
155 | @@ -224,3 +302,218 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) | |
156 | return fq; | |
157 | } | |
158 | EXPORT_SYMBOL(inet_frag_find); | |
159 | + | |
160 | +int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, | |
161 | + int offset, int end) | |
162 | +{ | |
163 | + struct sk_buff *last = q->fragments_tail; | |
164 | + | |
165 | + /* RFC5722, Section 4, amended by Errata ID : 3089 | |
166 | + * When reassembling an IPv6 datagram, if | |
167 | + * one or more its constituent fragments is determined to be an | |
168 | + * overlapping fragment, the entire datagram (and any constituent | |
169 | + * fragments) MUST be silently discarded. | |
170 | + * | |
171 | + * Duplicates, however, should be ignored (i.e. skb dropped, but the | |
172 | + * queue/fragments kept for later reassembly). | |
173 | + */ | |
174 | + if (!last) | |
175 | + fragrun_create(q, skb); /* First fragment. */ | |
176 | + else if (last->ip_defrag_offset + last->len < end) { | |
177 | + /* This is the common case: skb goes to the end. */ | |
178 | + /* Detect and discard overlaps. */ | |
179 | + if (offset < last->ip_defrag_offset + last->len) | |
180 | + return IPFRAG_OVERLAP; | |
181 | + if (offset == last->ip_defrag_offset + last->len) | |
182 | + fragrun_append_to_last(q, skb); | |
183 | + else | |
184 | + fragrun_create(q, skb); | |
185 | + } else { | |
186 | + /* Binary search. Note that skb can become the first fragment, | |
187 | + * but not the last (covered above). | |
188 | + */ | |
189 | + struct rb_node **rbn, *parent; | |
190 | + | |
191 | + rbn = &q->rb_fragments.rb_node; | |
192 | + do { | |
193 | + struct sk_buff *curr; | |
194 | + int curr_run_end; | |
195 | + | |
196 | + parent = *rbn; | |
197 | + curr = rb_to_skb(parent); | |
198 | + curr_run_end = curr->ip_defrag_offset + | |
199 | + FRAG_CB(curr)->frag_run_len; | |
200 | + if (end <= curr->ip_defrag_offset) | |
201 | + rbn = &parent->rb_left; | |
202 | + else if (offset >= curr_run_end) | |
203 | + rbn = &parent->rb_right; | |
204 | + else if (offset >= curr->ip_defrag_offset && | |
205 | + end <= curr_run_end) | |
206 | + return IPFRAG_DUP; | |
207 | + else | |
208 | + return IPFRAG_OVERLAP; | |
209 | + } while (*rbn); | |
210 | + /* Here we have parent properly set, and rbn pointing to | |
211 | + * one of its NULL left/right children. Insert skb. | |
212 | + */ | |
213 | + fragcb_clear(skb); | |
214 | + rb_link_node(&skb->rbnode, parent, rbn); | |
215 | + rb_insert_color(&skb->rbnode, &q->rb_fragments); | |
216 | + } | |
217 | + | |
218 | + skb->ip_defrag_offset = offset; | |
219 | + | |
220 | + return IPFRAG_OK; | |
221 | +} | |
222 | +EXPORT_SYMBOL(inet_frag_queue_insert); | |
223 | + | |
224 | +void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, | |
225 | + struct sk_buff *parent) | |
226 | +{ | |
227 | + struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); | |
228 | + struct sk_buff **nextp; | |
229 | + int delta; | |
230 | + | |
231 | + if (head != skb) { | |
232 | + fp = skb_clone(skb, GFP_ATOMIC); | |
233 | + if (!fp) | |
234 | + return NULL; | |
235 | + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; | |
236 | + if (RB_EMPTY_NODE(&skb->rbnode)) | |
237 | + FRAG_CB(parent)->next_frag = fp; | |
238 | + else | |
239 | + rb_replace_node(&skb->rbnode, &fp->rbnode, | |
240 | + &q->rb_fragments); | |
241 | + if (q->fragments_tail == skb) | |
242 | + q->fragments_tail = fp; | |
243 | + skb_morph(skb, head); | |
244 | + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; | |
245 | + rb_replace_node(&head->rbnode, &skb->rbnode, | |
246 | + &q->rb_fragments); | |
247 | + consume_skb(head); | |
248 | + head = skb; | |
249 | + } | |
250 | + WARN_ON(head->ip_defrag_offset != 0); | |
251 | + | |
252 | + delta = -head->truesize; | |
253 | + | |
254 | + /* Head of list must not be cloned. */ | |
255 | + if (skb_unclone(head, GFP_ATOMIC)) | |
256 | + return NULL; | |
257 | + | |
258 | + delta += head->truesize; | |
259 | + if (delta) | |
260 | + add_frag_mem_limit(q->net, delta); | |
261 | + | |
262 | + /* If the first fragment is fragmented itself, we split | |
263 | + * it to two chunks: the first with data and paged part | |
264 | + * and the second, holding only fragments. | |
265 | + */ | |
266 | + if (skb_has_frag_list(head)) { | |
267 | + struct sk_buff *clone; | |
268 | + int i, plen = 0; | |
269 | + | |
270 | + clone = alloc_skb(0, GFP_ATOMIC); | |
271 | + if (!clone) | |
272 | + return NULL; | |
273 | + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; | |
274 | + skb_frag_list_init(head); | |
275 | + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) | |
276 | + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); | |
277 | + clone->data_len = head->data_len - plen; | |
278 | + clone->len = clone->data_len; | |
279 | + head->truesize += clone->truesize; | |
280 | + clone->csum = 0; | |
281 | + clone->ip_summed = head->ip_summed; | |
282 | + add_frag_mem_limit(q->net, clone->truesize); | |
283 | + skb_shinfo(head)->frag_list = clone; | |
284 | + nextp = &clone->next; | |
285 | + } else { | |
286 | + nextp = &skb_shinfo(head)->frag_list; | |
287 | + } | |
288 | + | |
289 | + return nextp; | |
290 | +} | |
291 | +EXPORT_SYMBOL(inet_frag_reasm_prepare); | |
292 | + | |
293 | +void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, | |
294 | + void *reasm_data) | |
295 | +{ | |
296 | + struct sk_buff **nextp = (struct sk_buff **)reasm_data; | |
297 | + struct rb_node *rbn; | |
298 | + struct sk_buff *fp; | |
299 | + | |
300 | + skb_push(head, head->data - skb_network_header(head)); | |
301 | + | |
302 | + /* Traverse the tree in order, to build frag_list. */ | |
303 | + fp = FRAG_CB(head)->next_frag; | |
304 | + rbn = rb_next(&head->rbnode); | |
305 | + rb_erase(&head->rbnode, &q->rb_fragments); | |
306 | + while (rbn || fp) { | |
307 | + /* fp points to the next sk_buff in the current run; | |
308 | + * rbn points to the next run. | |
309 | + */ | |
310 | + /* Go through the current run. */ | |
311 | + while (fp) { | |
312 | + *nextp = fp; | |
313 | + nextp = &fp->next; | |
314 | + fp->prev = NULL; | |
315 | + memset(&fp->rbnode, 0, sizeof(fp->rbnode)); | |
316 | + fp->sk = NULL; | |
317 | + head->data_len += fp->len; | |
318 | + head->len += fp->len; | |
319 | + if (head->ip_summed != fp->ip_summed) | |
320 | + head->ip_summed = CHECKSUM_NONE; | |
321 | + else if (head->ip_summed == CHECKSUM_COMPLETE) | |
322 | + head->csum = csum_add(head->csum, fp->csum); | |
323 | + head->truesize += fp->truesize; | |
324 | + fp = FRAG_CB(fp)->next_frag; | |
325 | + } | |
326 | + /* Move to the next run. */ | |
327 | + if (rbn) { | |
328 | + struct rb_node *rbnext = rb_next(rbn); | |
329 | + | |
330 | + fp = rb_to_skb(rbn); | |
331 | + rb_erase(rbn, &q->rb_fragments); | |
332 | + rbn = rbnext; | |
333 | + } | |
334 | + } | |
335 | + sub_frag_mem_limit(q->net, head->truesize); | |
336 | + | |
337 | + *nextp = NULL; | |
338 | + head->next = NULL; | |
339 | + head->prev = NULL; | |
340 | + head->tstamp = q->stamp; | |
341 | +} | |
342 | +EXPORT_SYMBOL(inet_frag_reasm_finish); | |
343 | + | |
344 | +struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) | |
345 | +{ | |
346 | + struct sk_buff *head; | |
347 | + | |
348 | + if (q->fragments) { | |
349 | + head = q->fragments; | |
350 | + q->fragments = head->next; | |
351 | + } else { | |
352 | + struct sk_buff *skb; | |
353 | + | |
354 | + head = skb_rb_first(&q->rb_fragments); | |
355 | + if (!head) | |
356 | + return NULL; | |
357 | + skb = FRAG_CB(head)->next_frag; | |
358 | + if (skb) | |
359 | + rb_replace_node(&head->rbnode, &skb->rbnode, | |
360 | + &q->rb_fragments); | |
361 | + else | |
362 | + rb_erase(&head->rbnode, &q->rb_fragments); | |
363 | + memset(&head->rbnode, 0, sizeof(head->rbnode)); | |
364 | + barrier(); | |
365 | + } | |
366 | + if (head == q->fragments_tail) | |
367 | + q->fragments_tail = NULL; | |
368 | + | |
369 | + sub_frag_mem_limit(q->net, head->truesize); | |
370 | + | |
371 | + return head; | |
372 | +} | |
373 | +EXPORT_SYMBOL(inet_frag_pull_head); | |
374 | diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c | |
375 | index d95b32af4a0e..5a1d39e32196 100644 | |
376 | --- a/net/ipv4/ip_fragment.c | |
377 | +++ b/net/ipv4/ip_fragment.c | |
378 | @@ -57,57 +57,6 @@ | |
379 | */ | |
380 | static const char ip_frag_cache_name[] = "ip4-frags"; | |
381 | ||
382 | -/* Use skb->cb to track consecutive/adjacent fragments coming at | |
383 | - * the end of the queue. Nodes in the rb-tree queue will | |
384 | - * contain "runs" of one or more adjacent fragments. | |
385 | - * | |
386 | - * Invariants: | |
387 | - * - next_frag is NULL at the tail of a "run"; | |
388 | - * - the head of a "run" has the sum of all fragment lengths in frag_run_len. | |
389 | - */ | |
390 | -struct ipfrag_skb_cb { | |
391 | - struct inet_skb_parm h; | |
392 | - struct sk_buff *next_frag; | |
393 | - int frag_run_len; | |
394 | -}; | |
395 | - | |
396 | -#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) | |
397 | - | |
398 | -static void ip4_frag_init_run(struct sk_buff *skb) | |
399 | -{ | |
400 | - BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); | |
401 | - | |
402 | - FRAG_CB(skb)->next_frag = NULL; | |
403 | - FRAG_CB(skb)->frag_run_len = skb->len; | |
404 | -} | |
405 | - | |
406 | -/* Append skb to the last "run". */ | |
407 | -static void ip4_frag_append_to_last_run(struct inet_frag_queue *q, | |
408 | - struct sk_buff *skb) | |
409 | -{ | |
410 | - RB_CLEAR_NODE(&skb->rbnode); | |
411 | - FRAG_CB(skb)->next_frag = NULL; | |
412 | - | |
413 | - FRAG_CB(q->last_run_head)->frag_run_len += skb->len; | |
414 | - FRAG_CB(q->fragments_tail)->next_frag = skb; | |
415 | - q->fragments_tail = skb; | |
416 | -} | |
417 | - | |
418 | -/* Create a new "run" with the skb. */ | |
419 | -static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb) | |
420 | -{ | |
421 | - if (q->last_run_head) | |
422 | - rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, | |
423 | - &q->last_run_head->rbnode.rb_right); | |
424 | - else | |
425 | - rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); | |
426 | - rb_insert_color(&skb->rbnode, &q->rb_fragments); | |
427 | - | |
428 | - ip4_frag_init_run(skb); | |
429 | - q->fragments_tail = skb; | |
430 | - q->last_run_head = skb; | |
431 | -} | |
432 | - | |
433 | /* Describe an entry in the "incomplete datagrams" queue. */ | |
434 | struct ipq { | |
435 | struct inet_frag_queue q; | |
436 | @@ -212,27 +161,9 @@ static void ip_expire(struct timer_list *t) | |
437 | * pull the head out of the tree in order to be able to | |
438 | * deal with head->dev. | |
439 | */ | |
440 | - if (qp->q.fragments) { | |
441 | - head = qp->q.fragments; | |
442 | - qp->q.fragments = head->next; | |
443 | - } else { | |
444 | - head = skb_rb_first(&qp->q.rb_fragments); | |
445 | - if (!head) | |
446 | - goto out; | |
447 | - if (FRAG_CB(head)->next_frag) | |
448 | - rb_replace_node(&head->rbnode, | |
449 | - &FRAG_CB(head)->next_frag->rbnode, | |
450 | - &qp->q.rb_fragments); | |
451 | - else | |
452 | - rb_erase(&head->rbnode, &qp->q.rb_fragments); | |
453 | - memset(&head->rbnode, 0, sizeof(head->rbnode)); | |
454 | - barrier(); | |
455 | - } | |
456 | - if (head == qp->q.fragments_tail) | |
457 | - qp->q.fragments_tail = NULL; | |
458 | - | |
459 | - sub_frag_mem_limit(qp->q.net, head->truesize); | |
460 | - | |
461 | + head = inet_frag_pull_head(&qp->q); | |
462 | + if (!head) | |
463 | + goto out; | |
464 | head->dev = dev_get_by_index_rcu(net, qp->iif); | |
465 | if (!head->dev) | |
466 | goto out; | |
467 | @@ -345,12 +276,10 @@ static int ip_frag_reinit(struct ipq *qp) | |
468 | static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |
469 | { | |
470 | struct net *net = container_of(qp->q.net, struct net, ipv4.frags); | |
471 | - struct rb_node **rbn, *parent; | |
472 | - struct sk_buff *skb1, *prev_tail; | |
473 | - int ihl, end, skb1_run_end; | |
474 | + int ihl, end, flags, offset; | |
475 | + struct sk_buff *prev_tail; | |
476 | struct net_device *dev; | |
477 | unsigned int fragsize; | |
478 | - int flags, offset; | |
479 | int err = -ENOENT; | |
480 | u8 ecn; | |
481 | ||
482 | @@ -382,7 +311,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |
483 | */ | |
484 | if (end < qp->q.len || | |
485 | ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) | |
486 | - goto err; | |
487 | + goto discard_qp; | |
488 | qp->q.flags |= INET_FRAG_LAST_IN; | |
489 | qp->q.len = end; | |
490 | } else { | |
491 | @@ -394,82 +323,33 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |
492 | if (end > qp->q.len) { | |
493 | /* Some bits beyond end -> corruption. */ | |
494 | if (qp->q.flags & INET_FRAG_LAST_IN) | |
495 | - goto err; | |
496 | + goto discard_qp; | |
497 | qp->q.len = end; | |
498 | } | |
499 | } | |
500 | if (end == offset) | |
501 | - goto err; | |
502 | + goto discard_qp; | |
503 | ||
504 | err = -ENOMEM; | |
505 | if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) | |
506 | - goto err; | |
507 | + goto discard_qp; | |
508 | ||
509 | err = pskb_trim_rcsum(skb, end - offset); | |
510 | if (err) | |
511 | - goto err; | |
512 | + goto discard_qp; | |
513 | ||
514 | /* Note : skb->rbnode and skb->dev share the same location. */ | |
515 | dev = skb->dev; | |
516 | /* Makes sure compiler wont do silly aliasing games */ | |
517 | barrier(); | |
518 | ||
519 | - /* RFC5722, Section 4, amended by Errata ID : 3089 | |
520 | - * When reassembling an IPv6 datagram, if | |
521 | - * one or more its constituent fragments is determined to be an | |
522 | - * overlapping fragment, the entire datagram (and any constituent | |
523 | - * fragments) MUST be silently discarded. | |
524 | - * | |
525 | - * We do the same here for IPv4 (and increment an snmp counter) but | |
526 | - * we do not want to drop the whole queue in response to a duplicate | |
527 | - * fragment. | |
528 | - */ | |
529 | - | |
530 | - err = -EINVAL; | |
531 | - /* Find out where to put this fragment. */ | |
532 | prev_tail = qp->q.fragments_tail; | |
533 | - if (!prev_tail) | |
534 | - ip4_frag_create_run(&qp->q, skb); /* First fragment. */ | |
535 | - else if (prev_tail->ip_defrag_offset + prev_tail->len < end) { | |
536 | - /* This is the common case: skb goes to the end. */ | |
537 | - /* Detect and discard overlaps. */ | |
538 | - if (offset < prev_tail->ip_defrag_offset + prev_tail->len) | |
539 | - goto discard_qp; | |
540 | - if (offset == prev_tail->ip_defrag_offset + prev_tail->len) | |
541 | - ip4_frag_append_to_last_run(&qp->q, skb); | |
542 | - else | |
543 | - ip4_frag_create_run(&qp->q, skb); | |
544 | - } else { | |
545 | - /* Binary search. Note that skb can become the first fragment, | |
546 | - * but not the last (covered above). | |
547 | - */ | |
548 | - rbn = &qp->q.rb_fragments.rb_node; | |
549 | - do { | |
550 | - parent = *rbn; | |
551 | - skb1 = rb_to_skb(parent); | |
552 | - skb1_run_end = skb1->ip_defrag_offset + | |
553 | - FRAG_CB(skb1)->frag_run_len; | |
554 | - if (end <= skb1->ip_defrag_offset) | |
555 | - rbn = &parent->rb_left; | |
556 | - else if (offset >= skb1_run_end) | |
557 | - rbn = &parent->rb_right; | |
558 | - else if (offset >= skb1->ip_defrag_offset && | |
559 | - end <= skb1_run_end) | |
560 | - goto err; /* No new data, potential duplicate */ | |
561 | - else | |
562 | - goto discard_qp; /* Found an overlap */ | |
563 | - } while (*rbn); | |
564 | - /* Here we have parent properly set, and rbn pointing to | |
565 | - * one of its NULL left/right children. Insert skb. | |
566 | - */ | |
567 | - ip4_frag_init_run(skb); | |
568 | - rb_link_node(&skb->rbnode, parent, rbn); | |
569 | - rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); | |
570 | - } | |
571 | + err = inet_frag_queue_insert(&qp->q, skb, offset, end); | |
572 | + if (err) | |
573 | + goto insert_error; | |
574 | ||
575 | if (dev) | |
576 | qp->iif = dev->ifindex; | |
577 | - skb->ip_defrag_offset = offset; | |
578 | ||
579 | qp->q.stamp = skb->tstamp; | |
580 | qp->q.meat += skb->len; | |
581 | @@ -494,15 +374,24 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |
582 | skb->_skb_refdst = 0UL; | |
583 | err = ip_frag_reasm(qp, skb, prev_tail, dev); | |
584 | skb->_skb_refdst = orefdst; | |
585 | + if (err) | |
586 | + inet_frag_kill(&qp->q); | |
587 | return err; | |
588 | } | |
589 | ||
590 | skb_dst_drop(skb); | |
591 | return -EINPROGRESS; | |
592 | ||
593 | +insert_error: | |
594 | + if (err == IPFRAG_DUP) { | |
595 | + kfree_skb(skb); | |
596 | + return -EINVAL; | |
597 | + } | |
598 | + err = -EINVAL; | |
599 | + __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); | |
600 | discard_qp: | |
601 | inet_frag_kill(&qp->q); | |
602 | - __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); | |
603 | + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); | |
604 | err: | |
605 | kfree_skb(skb); | |
606 | return err; | |
607 | @@ -514,13 +403,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, | |
608 | { | |
609 | struct net *net = container_of(qp->q.net, struct net, ipv4.frags); | |
610 | struct iphdr *iph; | |
611 | - struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments); | |
612 | - struct sk_buff **nextp; /* To build frag_list. */ | |
613 | - struct rb_node *rbn; | |
614 | - int len; | |
615 | - int ihlen; | |
616 | - int delta; | |
617 | - int err; | |
618 | + void *reasm_data; | |
619 | + int len, err; | |
620 | u8 ecn; | |
621 | ||
622 | ipq_kill(qp); | |
623 | @@ -530,117 +414,23 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, | |
624 | err = -EINVAL; | |
625 | goto out_fail; | |
626 | } | |
627 | - /* Make the one we just received the head. */ | |
628 | - if (head != skb) { | |
629 | - fp = skb_clone(skb, GFP_ATOMIC); | |
630 | - if (!fp) | |
631 | - goto out_nomem; | |
632 | - FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; | |
633 | - if (RB_EMPTY_NODE(&skb->rbnode)) | |
634 | - FRAG_CB(prev_tail)->next_frag = fp; | |
635 | - else | |
636 | - rb_replace_node(&skb->rbnode, &fp->rbnode, | |
637 | - &qp->q.rb_fragments); | |
638 | - if (qp->q.fragments_tail == skb) | |
639 | - qp->q.fragments_tail = fp; | |
640 | - skb_morph(skb, head); | |
641 | - FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; | |
642 | - rb_replace_node(&head->rbnode, &skb->rbnode, | |
643 | - &qp->q.rb_fragments); | |
644 | - consume_skb(head); | |
645 | - head = skb; | |
646 | - } | |
647 | ||
648 | - WARN_ON(head->ip_defrag_offset != 0); | |
649 | - | |
650 | - /* Allocate a new buffer for the datagram. */ | |
651 | - ihlen = ip_hdrlen(head); | |
652 | - len = ihlen + qp->q.len; | |
653 | + /* Make the one we just received the head. */ | |
654 | + reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail); | |
655 | + if (!reasm_data) | |
656 | + goto out_nomem; | |
657 | ||
658 | + len = ip_hdrlen(skb) + qp->q.len; | |
659 | err = -E2BIG; | |
660 | if (len > 65535) | |
661 | goto out_oversize; | |
662 | ||
663 | - delta = - head->truesize; | |
664 | - | |
665 | - /* Head of list must not be cloned. */ | |
666 | - if (skb_unclone(head, GFP_ATOMIC)) | |
667 | - goto out_nomem; | |
668 | - | |
669 | - delta += head->truesize; | |
670 | - if (delta) | |
671 | - add_frag_mem_limit(qp->q.net, delta); | |
672 | - | |
673 | - /* If the first fragment is fragmented itself, we split | |
674 | - * it to two chunks: the first with data and paged part | |
675 | - * and the second, holding only fragments. */ | |
676 | - if (skb_has_frag_list(head)) { | |
677 | - struct sk_buff *clone; | |
678 | - int i, plen = 0; | |
679 | - | |
680 | - clone = alloc_skb(0, GFP_ATOMIC); | |
681 | - if (!clone) | |
682 | - goto out_nomem; | |
683 | - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; | |
684 | - skb_frag_list_init(head); | |
685 | - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) | |
686 | - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); | |
687 | - clone->len = clone->data_len = head->data_len - plen; | |
688 | - head->truesize += clone->truesize; | |
689 | - clone->csum = 0; | |
690 | - clone->ip_summed = head->ip_summed; | |
691 | - add_frag_mem_limit(qp->q.net, clone->truesize); | |
692 | - skb_shinfo(head)->frag_list = clone; | |
693 | - nextp = &clone->next; | |
694 | - } else { | |
695 | - nextp = &skb_shinfo(head)->frag_list; | |
696 | - } | |
697 | + inet_frag_reasm_finish(&qp->q, skb, reasm_data); | |
698 | ||
699 | - skb_push(head, head->data - skb_network_header(head)); | |
700 | + skb->dev = dev; | |
701 | + IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); | |
702 | ||
703 | - /* Traverse the tree in order, to build frag_list. */ | |
704 | - fp = FRAG_CB(head)->next_frag; | |
705 | - rbn = rb_next(&head->rbnode); | |
706 | - rb_erase(&head->rbnode, &qp->q.rb_fragments); | |
707 | - while (rbn || fp) { | |
708 | - /* fp points to the next sk_buff in the current run; | |
709 | - * rbn points to the next run. | |
710 | - */ | |
711 | - /* Go through the current run. */ | |
712 | - while (fp) { | |
713 | - *nextp = fp; | |
714 | - nextp = &fp->next; | |
715 | - fp->prev = NULL; | |
716 | - memset(&fp->rbnode, 0, sizeof(fp->rbnode)); | |
717 | - fp->sk = NULL; | |
718 | - head->data_len += fp->len; | |
719 | - head->len += fp->len; | |
720 | - if (head->ip_summed != fp->ip_summed) | |
721 | - head->ip_summed = CHECKSUM_NONE; | |
722 | - else if (head->ip_summed == CHECKSUM_COMPLETE) | |
723 | - head->csum = csum_add(head->csum, fp->csum); | |
724 | - head->truesize += fp->truesize; | |
725 | - fp = FRAG_CB(fp)->next_frag; | |
726 | - } | |
727 | - /* Move to the next run. */ | |
728 | - if (rbn) { | |
729 | - struct rb_node *rbnext = rb_next(rbn); | |
730 | - | |
731 | - fp = rb_to_skb(rbn); | |
732 | - rb_erase(rbn, &qp->q.rb_fragments); | |
733 | - rbn = rbnext; | |
734 | - } | |
735 | - } | |
736 | - sub_frag_mem_limit(qp->q.net, head->truesize); | |
737 | - | |
738 | - *nextp = NULL; | |
739 | - head->next = NULL; | |
740 | - head->prev = NULL; | |
741 | - head->dev = dev; | |
742 | - head->tstamp = qp->q.stamp; | |
743 | - IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size); | |
744 | - | |
745 | - iph = ip_hdr(head); | |
746 | + iph = ip_hdr(skb); | |
747 | iph->tot_len = htons(len); | |
748 | iph->tos |= ecn; | |
749 | ||
750 | @@ -653,7 +443,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, | |
751 | * from one very small df-fragment and one large non-df frag. | |
752 | */ | |
753 | if (qp->max_df_size == qp->q.max_size) { | |
754 | - IPCB(head)->flags |= IPSKB_FRAG_PMTU; | |
755 | + IPCB(skb)->flags |= IPSKB_FRAG_PMTU; | |
756 | iph->frag_off = htons(IP_DF); | |
757 | } else { | |
758 | iph->frag_off = 0; | |
759 | @@ -751,28 +541,6 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) | |
760 | } | |
761 | EXPORT_SYMBOL(ip_check_defrag); | |
762 | ||
763 | -unsigned int inet_frag_rbtree_purge(struct rb_root *root) | |
764 | -{ | |
765 | - struct rb_node *p = rb_first(root); | |
766 | - unsigned int sum = 0; | |
767 | - | |
768 | - while (p) { | |
769 | - struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); | |
770 | - | |
771 | - p = rb_next(p); | |
772 | - rb_erase(&skb->rbnode, root); | |
773 | - while (skb) { | |
774 | - struct sk_buff *next = FRAG_CB(skb)->next_frag; | |
775 | - | |
776 | - sum += skb->truesize; | |
777 | - kfree_skb(skb); | |
778 | - skb = next; | |
779 | - } | |
780 | - } | |
781 | - return sum; | |
782 | -} | |
783 | -EXPORT_SYMBOL(inet_frag_rbtree_purge); | |
784 | - | |
785 | #ifdef CONFIG_SYSCTL | |
786 | static int dist_min; | |
787 | ||
788 | -- | |
789 | 2.19.1 | |
790 |