--- /dev/null
+From 7905dc07a3d1aac701694adcb0c0cb4439a47117 Mon Sep 17 00:00:00 2001
+From: Peter Oskolkov <posk@google.com>
+Date: Mon, 8 Apr 2019 17:10:03 -0700
+Subject: net: IP defrag: encapsulate rbtree defrag code into callable
+ functions
+
+[ Upstream commit c23f35d19db3b36ffb9e04b08f1d91565d15f84f ]
+
+This is a refactoring patch: without changing runtime behavior,
+it moves rbtree-related code from IPv4-specific files/functions
+into .h/.c defrag files shared with IPv6 defragmentation code.
+
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Cc: Tom Herbert <tom@herbertland.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/inet_frag.h | 16 ++-
+ net/ipv4/inet_fragment.c | 293 +++++++++++++++++++++++++++++++++++++++
+ net/ipv4/ip_fragment.c | 290 ++++----------------------------------
+ 3 files changed, 335 insertions(+), 264 deletions(-)
+
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index 1662cbc0b46b..b02bf737d019 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -77,8 +77,8 @@ struct inet_frag_queue {
+ struct timer_list timer;
+ spinlock_t lock;
+ refcount_t refcnt;
+- struct sk_buff *fragments; /* Used in IPv6. */
+- struct rb_root rb_fragments; /* Used in IPv4. */
++ struct sk_buff *fragments; /* used in 6lopwpan IPv6. */
++ struct rb_root rb_fragments; /* Used in IPv4/IPv6. */
+ struct sk_buff *fragments_tail;
+ struct sk_buff *last_run_head;
+ ktime_t stamp;
+@@ -153,4 +153,16 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
+
+ extern const u8 ip_frag_ecn_table[16];
+
++/* Return values of inet_frag_queue_insert() */
++#define IPFRAG_OK 0
++#define IPFRAG_DUP 1
++#define IPFRAG_OVERLAP 2
++int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
++ int offset, int end);
++void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
++ struct sk_buff *parent);
++void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
++ void *reasm_data);
++struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q);
++
+ #endif
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index 760a9e52e02b..9f69411251d0 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -25,6 +25,62 @@
+ #include <net/sock.h>
+ #include <net/inet_frag.h>
+ #include <net/inet_ecn.h>
++#include <net/ip.h>
++#include <net/ipv6.h>
++
++/* Use skb->cb to track consecutive/adjacent fragments coming at
++ * the end of the queue. Nodes in the rb-tree queue will
++ * contain "runs" of one or more adjacent fragments.
++ *
++ * Invariants:
++ * - next_frag is NULL at the tail of a "run";
++ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
++ */
++struct ipfrag_skb_cb {
++ union {
++ struct inet_skb_parm h4;
++ struct inet6_skb_parm h6;
++ };
++ struct sk_buff *next_frag;
++ int frag_run_len;
++};
++
++#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
++
++static void fragcb_clear(struct sk_buff *skb)
++{
++ RB_CLEAR_NODE(&skb->rbnode);
++ FRAG_CB(skb)->next_frag = NULL;
++ FRAG_CB(skb)->frag_run_len = skb->len;
++}
++
++/* Append skb to the last "run". */
++static void fragrun_append_to_last(struct inet_frag_queue *q,
++ struct sk_buff *skb)
++{
++ fragcb_clear(skb);
++
++ FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
++ FRAG_CB(q->fragments_tail)->next_frag = skb;
++ q->fragments_tail = skb;
++}
++
++/* Create a new "run" with the skb. */
++static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
++{
++ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
++ fragcb_clear(skb);
++
++ if (q->last_run_head)
++ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
++ &q->last_run_head->rbnode.rb_right);
++ else
++ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
++ rb_insert_color(&skb->rbnode, &q->rb_fragments);
++
++ q->fragments_tail = skb;
++ q->last_run_head = skb;
++}
+
+ /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
+ * Value : 0xff if frame should be dropped.
+@@ -123,6 +179,28 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
+ kmem_cache_free(f->frags_cachep, q);
+ }
+
++unsigned int inet_frag_rbtree_purge(struct rb_root *root)
++{
++ struct rb_node *p = rb_first(root);
++ unsigned int sum = 0;
++
++ while (p) {
++ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
++
++ p = rb_next(p);
++ rb_erase(&skb->rbnode, root);
++ while (skb) {
++ struct sk_buff *next = FRAG_CB(skb)->next_frag;
++
++ sum += skb->truesize;
++ kfree_skb(skb);
++ skb = next;
++ }
++ }
++ return sum;
++}
++EXPORT_SYMBOL(inet_frag_rbtree_purge);
++
+ void inet_frag_destroy(struct inet_frag_queue *q)
+ {
+ struct sk_buff *fp;
+@@ -224,3 +302,218 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
+ return fq;
+ }
+ EXPORT_SYMBOL(inet_frag_find);
++
++int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
++ int offset, int end)
++{
++ struct sk_buff *last = q->fragments_tail;
++
++ /* RFC5722, Section 4, amended by Errata ID : 3089
++ * When reassembling an IPv6 datagram, if
++ * one or more its constituent fragments is determined to be an
++ * overlapping fragment, the entire datagram (and any constituent
++ * fragments) MUST be silently discarded.
++ *
++ * Duplicates, however, should be ignored (i.e. skb dropped, but the
++ * queue/fragments kept for later reassembly).
++ */
++ if (!last)
++ fragrun_create(q, skb); /* First fragment. */
++ else if (last->ip_defrag_offset + last->len < end) {
++ /* This is the common case: skb goes to the end. */
++ /* Detect and discard overlaps. */
++ if (offset < last->ip_defrag_offset + last->len)
++ return IPFRAG_OVERLAP;
++ if (offset == last->ip_defrag_offset + last->len)
++ fragrun_append_to_last(q, skb);
++ else
++ fragrun_create(q, skb);
++ } else {
++ /* Binary search. Note that skb can become the first fragment,
++ * but not the last (covered above).
++ */
++ struct rb_node **rbn, *parent;
++
++ rbn = &q->rb_fragments.rb_node;
++ do {
++ struct sk_buff *curr;
++ int curr_run_end;
++
++ parent = *rbn;
++ curr = rb_to_skb(parent);
++ curr_run_end = curr->ip_defrag_offset +
++ FRAG_CB(curr)->frag_run_len;
++ if (end <= curr->ip_defrag_offset)
++ rbn = &parent->rb_left;
++ else if (offset >= curr_run_end)
++ rbn = &parent->rb_right;
++ else if (offset >= curr->ip_defrag_offset &&
++ end <= curr_run_end)
++ return IPFRAG_DUP;
++ else
++ return IPFRAG_OVERLAP;
++ } while (*rbn);
++ /* Here we have parent properly set, and rbn pointing to
++ * one of its NULL left/right children. Insert skb.
++ */
++ fragcb_clear(skb);
++ rb_link_node(&skb->rbnode, parent, rbn);
++ rb_insert_color(&skb->rbnode, &q->rb_fragments);
++ }
++
++ skb->ip_defrag_offset = offset;
++
++ return IPFRAG_OK;
++}
++EXPORT_SYMBOL(inet_frag_queue_insert);
++
++void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
++ struct sk_buff *parent)
++{
++ struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
++ struct sk_buff **nextp;
++ int delta;
++
++ if (head != skb) {
++ fp = skb_clone(skb, GFP_ATOMIC);
++ if (!fp)
++ return NULL;
++ FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
++ if (RB_EMPTY_NODE(&skb->rbnode))
++ FRAG_CB(parent)->next_frag = fp;
++ else
++ rb_replace_node(&skb->rbnode, &fp->rbnode,
++ &q->rb_fragments);
++ if (q->fragments_tail == skb)
++ q->fragments_tail = fp;
++ skb_morph(skb, head);
++ FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
++ rb_replace_node(&head->rbnode, &skb->rbnode,
++ &q->rb_fragments);
++ consume_skb(head);
++ head = skb;
++ }
++ WARN_ON(head->ip_defrag_offset != 0);
++
++ delta = -head->truesize;
++
++ /* Head of list must not be cloned. */
++ if (skb_unclone(head, GFP_ATOMIC))
++ return NULL;
++
++ delta += head->truesize;
++ if (delta)
++ add_frag_mem_limit(q->net, delta);
++
++ /* If the first fragment is fragmented itself, we split
++ * it to two chunks: the first with data and paged part
++ * and the second, holding only fragments.
++ */
++ if (skb_has_frag_list(head)) {
++ struct sk_buff *clone;
++ int i, plen = 0;
++
++ clone = alloc_skb(0, GFP_ATOMIC);
++ if (!clone)
++ return NULL;
++ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
++ skb_frag_list_init(head);
++ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
++ plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
++ clone->data_len = head->data_len - plen;
++ clone->len = clone->data_len;
++ head->truesize += clone->truesize;
++ clone->csum = 0;
++ clone->ip_summed = head->ip_summed;
++ add_frag_mem_limit(q->net, clone->truesize);
++ skb_shinfo(head)->frag_list = clone;
++ nextp = &clone->next;
++ } else {
++ nextp = &skb_shinfo(head)->frag_list;
++ }
++
++ return nextp;
++}
++EXPORT_SYMBOL(inet_frag_reasm_prepare);
++
++void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
++ void *reasm_data)
++{
++ struct sk_buff **nextp = (struct sk_buff **)reasm_data;
++ struct rb_node *rbn;
++ struct sk_buff *fp;
++
++ skb_push(head, head->data - skb_network_header(head));
++
++ /* Traverse the tree in order, to build frag_list. */
++ fp = FRAG_CB(head)->next_frag;
++ rbn = rb_next(&head->rbnode);
++ rb_erase(&head->rbnode, &q->rb_fragments);
++ while (rbn || fp) {
++ /* fp points to the next sk_buff in the current run;
++ * rbn points to the next run.
++ */
++ /* Go through the current run. */
++ while (fp) {
++ *nextp = fp;
++ nextp = &fp->next;
++ fp->prev = NULL;
++ memset(&fp->rbnode, 0, sizeof(fp->rbnode));
++ fp->sk = NULL;
++ head->data_len += fp->len;
++ head->len += fp->len;
++ if (head->ip_summed != fp->ip_summed)
++ head->ip_summed = CHECKSUM_NONE;
++ else if (head->ip_summed == CHECKSUM_COMPLETE)
++ head->csum = csum_add(head->csum, fp->csum);
++ head->truesize += fp->truesize;
++ fp = FRAG_CB(fp)->next_frag;
++ }
++ /* Move to the next run. */
++ if (rbn) {
++ struct rb_node *rbnext = rb_next(rbn);
++
++ fp = rb_to_skb(rbn);
++ rb_erase(rbn, &q->rb_fragments);
++ rbn = rbnext;
++ }
++ }
++ sub_frag_mem_limit(q->net, head->truesize);
++
++ *nextp = NULL;
++ skb_mark_not_on_list(head);
++ head->prev = NULL;
++ head->tstamp = q->stamp;
++}
++EXPORT_SYMBOL(inet_frag_reasm_finish);
++
++struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
++{
++ struct sk_buff *head;
++
++ if (q->fragments) {
++ head = q->fragments;
++ q->fragments = head->next;
++ } else {
++ struct sk_buff *skb;
++
++ head = skb_rb_first(&q->rb_fragments);
++ if (!head)
++ return NULL;
++ skb = FRAG_CB(head)->next_frag;
++ if (skb)
++ rb_replace_node(&head->rbnode, &skb->rbnode,
++ &q->rb_fragments);
++ else
++ rb_erase(&head->rbnode, &q->rb_fragments);
++ memset(&head->rbnode, 0, sizeof(head->rbnode));
++ barrier();
++ }
++ if (head == q->fragments_tail)
++ q->fragments_tail = NULL;
++
++ sub_frag_mem_limit(q->net, head->truesize);
++
++ return head;
++}
++EXPORT_SYMBOL(inet_frag_pull_head);
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index d95b32af4a0e..15c609e6f12e 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -57,57 +57,6 @@
+ */
+ static const char ip_frag_cache_name[] = "ip4-frags";
+
+-/* Use skb->cb to track consecutive/adjacent fragments coming at
+- * the end of the queue. Nodes in the rb-tree queue will
+- * contain "runs" of one or more adjacent fragments.
+- *
+- * Invariants:
+- * - next_frag is NULL at the tail of a "run";
+- * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+- */
+-struct ipfrag_skb_cb {
+- struct inet_skb_parm h;
+- struct sk_buff *next_frag;
+- int frag_run_len;
+-};
+-
+-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+-
+-static void ip4_frag_init_run(struct sk_buff *skb)
+-{
+- BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+-
+- FRAG_CB(skb)->next_frag = NULL;
+- FRAG_CB(skb)->frag_run_len = skb->len;
+-}
+-
+-/* Append skb to the last "run". */
+-static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
+- struct sk_buff *skb)
+-{
+- RB_CLEAR_NODE(&skb->rbnode);
+- FRAG_CB(skb)->next_frag = NULL;
+-
+- FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+- FRAG_CB(q->fragments_tail)->next_frag = skb;
+- q->fragments_tail = skb;
+-}
+-
+-/* Create a new "run" with the skb. */
+-static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
+-{
+- if (q->last_run_head)
+- rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+- &q->last_run_head->rbnode.rb_right);
+- else
+- rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+- rb_insert_color(&skb->rbnode, &q->rb_fragments);
+-
+- ip4_frag_init_run(skb);
+- q->fragments_tail = skb;
+- q->last_run_head = skb;
+-}
+-
+ /* Describe an entry in the "incomplete datagrams" queue. */
+ struct ipq {
+ struct inet_frag_queue q;
+@@ -212,27 +161,9 @@ static void ip_expire(struct timer_list *t)
+ * pull the head out of the tree in order to be able to
+ * deal with head->dev.
+ */
+- if (qp->q.fragments) {
+- head = qp->q.fragments;
+- qp->q.fragments = head->next;
+- } else {
+- head = skb_rb_first(&qp->q.rb_fragments);
+- if (!head)
+- goto out;
+- if (FRAG_CB(head)->next_frag)
+- rb_replace_node(&head->rbnode,
+- &FRAG_CB(head)->next_frag->rbnode,
+- &qp->q.rb_fragments);
+- else
+- rb_erase(&head->rbnode, &qp->q.rb_fragments);
+- memset(&head->rbnode, 0, sizeof(head->rbnode));
+- barrier();
+- }
+- if (head == qp->q.fragments_tail)
+- qp->q.fragments_tail = NULL;
+-
+- sub_frag_mem_limit(qp->q.net, head->truesize);
+-
++ head = inet_frag_pull_head(&qp->q);
++ if (!head)
++ goto out;
+ head->dev = dev_get_by_index_rcu(net, qp->iif);
+ if (!head->dev)
+ goto out;
+@@ -345,12 +276,10 @@ static int ip_frag_reinit(struct ipq *qp)
+ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+- struct rb_node **rbn, *parent;
+- struct sk_buff *skb1, *prev_tail;
+- int ihl, end, skb1_run_end;
++ int ihl, end, flags, offset;
++ struct sk_buff *prev_tail;
+ struct net_device *dev;
+ unsigned int fragsize;
+- int flags, offset;
+ int err = -ENOENT;
+ u8 ecn;
+
+@@ -414,62 +343,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+
+- /* RFC5722, Section 4, amended by Errata ID : 3089
+- * When reassembling an IPv6 datagram, if
+- * one or more its constituent fragments is determined to be an
+- * overlapping fragment, the entire datagram (and any constituent
+- * fragments) MUST be silently discarded.
+- *
+- * We do the same here for IPv4 (and increment an snmp counter) but
+- * we do not want to drop the whole queue in response to a duplicate
+- * fragment.
+- */
+-
+- err = -EINVAL;
+- /* Find out where to put this fragment. */
+ prev_tail = qp->q.fragments_tail;
+- if (!prev_tail)
+- ip4_frag_create_run(&qp->q, skb); /* First fragment. */
+- else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
+- /* This is the common case: skb goes to the end. */
+- /* Detect and discard overlaps. */
+- if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
+- goto discard_qp;
+- if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
+- ip4_frag_append_to_last_run(&qp->q, skb);
+- else
+- ip4_frag_create_run(&qp->q, skb);
+- } else {
+- /* Binary search. Note that skb can become the first fragment,
+- * but not the last (covered above).
+- */
+- rbn = &qp->q.rb_fragments.rb_node;
+- do {
+- parent = *rbn;
+- skb1 = rb_to_skb(parent);
+- skb1_run_end = skb1->ip_defrag_offset +
+- FRAG_CB(skb1)->frag_run_len;
+- if (end <= skb1->ip_defrag_offset)
+- rbn = &parent->rb_left;
+- else if (offset >= skb1_run_end)
+- rbn = &parent->rb_right;
+- else if (offset >= skb1->ip_defrag_offset &&
+- end <= skb1_run_end)
+- goto err; /* No new data, potential duplicate */
+- else
+- goto discard_qp; /* Found an overlap */
+- } while (*rbn);
+- /* Here we have parent properly set, and rbn pointing to
+- * one of its NULL left/right children. Insert skb.
+- */
+- ip4_frag_init_run(skb);
+- rb_link_node(&skb->rbnode, parent, rbn);
+- rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
+- }
++ err = inet_frag_queue_insert(&qp->q, skb, offset, end);
++ if (err)
++ goto insert_error;
+
+ if (dev)
+ qp->iif = dev->ifindex;
+- skb->ip_defrag_offset = offset;
+
+ qp->q.stamp = skb->tstamp;
+ qp->q.meat += skb->len;
+@@ -500,9 +380,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ skb_dst_drop(skb);
+ return -EINPROGRESS;
+
++insert_error:
++ if (err == IPFRAG_DUP) {
++ kfree_skb(skb);
++ return -EINVAL;
++ }
++ err = -EINVAL;
++ __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
+ discard_qp:
+ inet_frag_kill(&qp->q);
+- __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
++ __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
+ err:
+ kfree_skb(skb);
+ return err;
+@@ -514,13 +401,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ {
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct iphdr *iph;
+- struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
+- struct sk_buff **nextp; /* To build frag_list. */
+- struct rb_node *rbn;
+- int len;
+- int ihlen;
+- int delta;
+- int err;
++ void *reasm_data;
++ int len, err;
+ u8 ecn;
+
+ ipq_kill(qp);
+@@ -530,117 +412,23 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ err = -EINVAL;
+ goto out_fail;
+ }
+- /* Make the one we just received the head. */
+- if (head != skb) {
+- fp = skb_clone(skb, GFP_ATOMIC);
+- if (!fp)
+- goto out_nomem;
+- FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+- if (RB_EMPTY_NODE(&skb->rbnode))
+- FRAG_CB(prev_tail)->next_frag = fp;
+- else
+- rb_replace_node(&skb->rbnode, &fp->rbnode,
+- &qp->q.rb_fragments);
+- if (qp->q.fragments_tail == skb)
+- qp->q.fragments_tail = fp;
+- skb_morph(skb, head);
+- FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+- rb_replace_node(&head->rbnode, &skb->rbnode,
+- &qp->q.rb_fragments);
+- consume_skb(head);
+- head = skb;
+- }
+-
+- WARN_ON(head->ip_defrag_offset != 0);
+
+- /* Allocate a new buffer for the datagram. */
+- ihlen = ip_hdrlen(head);
+- len = ihlen + qp->q.len;
++ /* Make the one we just received the head. */
++ reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail);
++ if (!reasm_data)
++ goto out_nomem;
+
++ len = ip_hdrlen(skb) + qp->q.len;
+ err = -E2BIG;
+ if (len > 65535)
+ goto out_oversize;
+
+- delta = - head->truesize;
+-
+- /* Head of list must not be cloned. */
+- if (skb_unclone(head, GFP_ATOMIC))
+- goto out_nomem;
+-
+- delta += head->truesize;
+- if (delta)
+- add_frag_mem_limit(qp->q.net, delta);
+-
+- /* If the first fragment is fragmented itself, we split
+- * it to two chunks: the first with data and paged part
+- * and the second, holding only fragments. */
+- if (skb_has_frag_list(head)) {
+- struct sk_buff *clone;
+- int i, plen = 0;
+-
+- clone = alloc_skb(0, GFP_ATOMIC);
+- if (!clone)
+- goto out_nomem;
+- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+- skb_frag_list_init(head);
+- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+- clone->len = clone->data_len = head->data_len - plen;
+- head->truesize += clone->truesize;
+- clone->csum = 0;
+- clone->ip_summed = head->ip_summed;
+- add_frag_mem_limit(qp->q.net, clone->truesize);
+- skb_shinfo(head)->frag_list = clone;
+- nextp = &clone->next;
+- } else {
+- nextp = &skb_shinfo(head)->frag_list;
+- }
++ inet_frag_reasm_finish(&qp->q, skb, reasm_data);
+
+- skb_push(head, head->data - skb_network_header(head));
++ skb->dev = dev;
++ IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
+
+- /* Traverse the tree in order, to build frag_list. */
+- fp = FRAG_CB(head)->next_frag;
+- rbn = rb_next(&head->rbnode);
+- rb_erase(&head->rbnode, &qp->q.rb_fragments);
+- while (rbn || fp) {
+- /* fp points to the next sk_buff in the current run;
+- * rbn points to the next run.
+- */
+- /* Go through the current run. */
+- while (fp) {
+- *nextp = fp;
+- nextp = &fp->next;
+- fp->prev = NULL;
+- memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+- fp->sk = NULL;
+- head->data_len += fp->len;
+- head->len += fp->len;
+- if (head->ip_summed != fp->ip_summed)
+- head->ip_summed = CHECKSUM_NONE;
+- else if (head->ip_summed == CHECKSUM_COMPLETE)
+- head->csum = csum_add(head->csum, fp->csum);
+- head->truesize += fp->truesize;
+- fp = FRAG_CB(fp)->next_frag;
+- }
+- /* Move to the next run. */
+- if (rbn) {
+- struct rb_node *rbnext = rb_next(rbn);
+-
+- fp = rb_to_skb(rbn);
+- rb_erase(rbn, &qp->q.rb_fragments);
+- rbn = rbnext;
+- }
+- }
+- sub_frag_mem_limit(qp->q.net, head->truesize);
+-
+- *nextp = NULL;
+- head->next = NULL;
+- head->prev = NULL;
+- head->dev = dev;
+- head->tstamp = qp->q.stamp;
+- IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
+-
+- iph = ip_hdr(head);
++ iph = ip_hdr(skb);
+ iph->tot_len = htons(len);
+ iph->tos |= ecn;
+
+@@ -653,7 +441,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ * from one very small df-fragment and one large non-df frag.
+ */
+ if (qp->max_df_size == qp->q.max_size) {
+- IPCB(head)->flags |= IPSKB_FRAG_PMTU;
++ IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
+ iph->frag_off = htons(IP_DF);
+ } else {
+ iph->frag_off = 0;
+@@ -751,28 +539,6 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
+ }
+ EXPORT_SYMBOL(ip_check_defrag);
+
+-unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+-{
+- struct rb_node *p = rb_first(root);
+- unsigned int sum = 0;
+-
+- while (p) {
+- struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+-
+- p = rb_next(p);
+- rb_erase(&skb->rbnode, root);
+- while (skb) {
+- struct sk_buff *next = FRAG_CB(skb)->next_frag;
+-
+- sum += skb->truesize;
+- kfree_skb(skb);
+- skb = next;
+- }
+- }
+- return sum;
+-}
+-EXPORT_SYMBOL(inet_frag_rbtree_purge);
+-
+ #ifdef CONFIG_SYSCTL
+ static int dist_min;
+
+--
+2.19.1
+
--- /dev/null
+From a4875906060ceefdd345989aed28133d070053d3 Mon Sep 17 00:00:00 2001
+From: Peter Oskolkov <posk@google.com>
+Date: Mon, 8 Apr 2019 17:10:04 -0700
+Subject: net: IP6 defrag: use rbtrees for IPv6 defrag
+
+[ Upstream commit d4289fcc9b16b89619ee1c54f829e05e56de8b9a ]
+
+Currently, IPv6 defragmentation code drops non-last fragments that
+are smaller than 1280 bytes: see
+commit 0ed4229b08c1 ("ipv6: defrag: drop non-last frags smaller than min mtu")
+
+This behavior is not specified in IPv6 RFCs and appears to break
+compatibility with some IPv6 implemenations, as reported here:
+https://www.spinics.net/lists/netdev/msg543846.html
+
+This patch re-uses common IP defragmentation queueing and reassembly
+code in IPv6, removing the 1280 byte restriction.
+
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Reported-by: Tom Herbert <tom@herbertland.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ipv6_frag.h | 11 +-
+ net/ipv6/reassembly.c | 233 +++++++++++-----------------------------
+ 2 files changed, 71 insertions(+), 173 deletions(-)
+
+diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
+index 6ced1e6899b6..28aa9b30aece 100644
+--- a/include/net/ipv6_frag.h
++++ b/include/net/ipv6_frag.h
+@@ -82,8 +82,15 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
+ __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
+ /* Don't send error if the first segment did not arrive. */
+- head = fq->q.fragments;
+- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
++ if (!(fq->q.flags & INET_FRAG_FIRST_IN))
++ goto out;
++
++ /* sk_buff::dev and sk_buff::rbnode are unionized. So we
++ * pull the head out of the tree in order to be able to
++ * deal with head->dev.
++ */
++ head = inet_frag_pull_head(&fq->q);
++ if (!head)
+ goto out;
+
+ head->dev = dev;
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 7c943392c128..642f9f53b01d 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -69,8 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+
+ static struct inet_frags ip6_frags;
+
+-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+- struct net_device *dev);
++static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
++ struct sk_buff *prev_tail, struct net_device *dev);
+
+ static void ip6_frag_expire(struct timer_list *t)
+ {
+@@ -111,21 +111,26 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+ struct frag_hdr *fhdr, int nhoff,
+ u32 *prob_offset)
+ {
+- struct sk_buff *prev, *next;
+- struct net_device *dev;
+- int offset, end, fragsize;
+ struct net *net = dev_net(skb_dst(skb)->dev);
++ int offset, end, fragsize;
++ struct sk_buff *prev_tail;
++ struct net_device *dev;
++ int err = -ENOENT;
+ u8 ecn;
+
+ if (fq->q.flags & INET_FRAG_COMPLETE)
+ goto err;
+
++ err = -EINVAL;
+ offset = ntohs(fhdr->frag_off) & ~0x7;
+ end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
+
+ if ((unsigned int)end > IPV6_MAXPLEN) {
+ *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
++ /* note that if prob_offset is set, the skb is freed elsewhere,
++ * we do not free it here.
++ */
+ return -1;
+ }
+
+@@ -170,62 +175,27 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+ if (end == offset)
+ goto err;
+
++ err = -ENOMEM;
+ /* Point into the IP datagram 'data' part. */
+ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
+ goto err;
+
+- if (pskb_trim_rcsum(skb, end - offset))
+- goto err;
+-
+- /* Find out which fragments are in front and at the back of us
+- * in the chain of fragments so far. We must know where to put
+- * this fragment, right?
+- */
+- prev = fq->q.fragments_tail;
+- if (!prev || prev->ip_defrag_offset < offset) {
+- next = NULL;
+- goto found;
+- }
+- prev = NULL;
+- for (next = fq->q.fragments; next != NULL; next = next->next) {
+- if (next->ip_defrag_offset >= offset)
+- break; /* bingo! */
+- prev = next;
+- }
+-
+-found:
+- /* RFC5722, Section 4, amended by Errata ID : 3089
+- * When reassembling an IPv6 datagram, if
+- * one or more its constituent fragments is determined to be an
+- * overlapping fragment, the entire datagram (and any constituent
+- * fragments) MUST be silently discarded.
+- */
+-
+- /* Check for overlap with preceding fragment. */
+- if (prev &&
+- (prev->ip_defrag_offset + prev->len) > offset)
+- goto discard_fq;
+-
+- /* Look for overlap with succeeding segment. */
+- if (next && next->ip_defrag_offset < end)
++ err = pskb_trim_rcsum(skb, end - offset);
++ if (err)
+ goto discard_fq;
+
+- /* Note : skb->ip_defrag_offset and skb->dev share the same location */
++ /* Note : skb->rbnode and skb->dev share the same location. */
+ dev = skb->dev;
+- if (dev)
+- fq->iif = dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+- skb->ip_defrag_offset = offset;
+
+- /* Insert this fragment in the chain of fragments. */
+- skb->next = next;
+- if (!next)
+- fq->q.fragments_tail = skb;
+- if (prev)
+- prev->next = skb;
+- else
+- fq->q.fragments = skb;
++ prev_tail = fq->q.fragments_tail;
++ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
++ if (err)
++ goto insert_error;
++
++ if (dev)
++ fq->iif = dev->ifindex;
+
+ fq->q.stamp = skb->tstamp;
+ fq->q.meat += skb->len;
+@@ -246,44 +216,48 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len) {
+- int res;
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+- res = ip6_frag_reasm(fq, prev, dev);
++ err = ip6_frag_reasm(fq, skb, prev_tail, dev);
+ skb->_skb_refdst = orefdst;
+- return res;
++ return err;
+ }
+
+ skb_dst_drop(skb);
+- return -1;
++ return -EINPROGRESS;
+
++insert_error:
++ if (err == IPFRAG_DUP) {
++ kfree_skb(skb);
++ return -EINVAL;
++ }
++ err = -EINVAL;
++ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
++ IPSTATS_MIB_REASM_OVERLAPS);
+ discard_fq:
+ inet_frag_kill(&fq->q);
+-err:
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS);
++err:
+ kfree_skb(skb);
+- return -1;
++ return err;
+ }
+
+ /*
+ * Check if this packet is complete.
+- * Returns NULL on failure by any reason, and pointer
+- * to current nexthdr field in reassembled frame.
+ *
+ * It is called with locked fq, and caller must check that
+ * queue is eligible for reassembly i.e. it is not COMPLETE,
+ * the last and the first frames arrived and all the bits are here.
+ */
+-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+- struct net_device *dev)
++static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
++ struct sk_buff *prev_tail, struct net_device *dev)
+ {
+ struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
+- struct sk_buff *fp, *head = fq->q.fragments;
+- int payload_len, delta;
+ unsigned int nhoff;
+- int sum_truesize;
++ void *reasm_data;
++ int payload_len;
+ u8 ecn;
+
+ inet_frag_kill(&fq->q);
+@@ -292,121 +266,40 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ if (unlikely(ecn == 0xff))
+ goto out_fail;
+
+- /* Make the one we just received the head. */
+- if (prev) {
+- head = prev->next;
+- fp = skb_clone(head, GFP_ATOMIC);
+-
+- if (!fp)
+- goto out_oom;
+-
+- fp->next = head->next;
+- if (!fp->next)
+- fq->q.fragments_tail = fp;
+- prev->next = fp;
+-
+- skb_morph(head, fq->q.fragments);
+- head->next = fq->q.fragments->next;
+-
+- consume_skb(fq->q.fragments);
+- fq->q.fragments = head;
+- }
+-
+- WARN_ON(head == NULL);
+- WARN_ON(head->ip_defrag_offset != 0);
++ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
++ if (!reasm_data)
++ goto out_oom;
+
+- /* Unfragmented part is taken from the first segment. */
+- payload_len = ((head->data - skb_network_header(head)) -
++ payload_len = ((skb->data - skb_network_header(skb)) -
+ sizeof(struct ipv6hdr) + fq->q.len -
+ sizeof(struct frag_hdr));
+ if (payload_len > IPV6_MAXPLEN)
+ goto out_oversize;
+
+- delta = - head->truesize;
+-
+- /* Head of list must not be cloned. */
+- if (skb_unclone(head, GFP_ATOMIC))
+- goto out_oom;
+-
+- delta += head->truesize;
+- if (delta)
+- add_frag_mem_limit(fq->q.net, delta);
+-
+- /* If the first fragment is fragmented itself, we split
+- * it to two chunks: the first with data and paged part
+- * and the second, holding only fragments. */
+- if (skb_has_frag_list(head)) {
+- struct sk_buff *clone;
+- int i, plen = 0;
+-
+- clone = alloc_skb(0, GFP_ATOMIC);
+- if (!clone)
+- goto out_oom;
+- clone->next = head->next;
+- head->next = clone;
+- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+- skb_frag_list_init(head);
+- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+- clone->len = clone->data_len = head->data_len - plen;
+- head->data_len -= clone->len;
+- head->len -= clone->len;
+- clone->csum = 0;
+- clone->ip_summed = head->ip_summed;
+- add_frag_mem_limit(fq->q.net, clone->truesize);
+- }
+-
+ /* We have to remove fragment header from datagram and to relocate
+ * header in order to calculate ICV correctly. */
+ nhoff = fq->nhoffset;
+- skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
+- memmove(head->head + sizeof(struct frag_hdr), head->head,
+- (head->data - head->head) - sizeof(struct frag_hdr));
+- if (skb_mac_header_was_set(head))
+- head->mac_header += sizeof(struct frag_hdr);
+- head->network_header += sizeof(struct frag_hdr);
+-
+- skb_reset_transport_header(head);
+- skb_push(head, head->data - skb_network_header(head));
+-
+- sum_truesize = head->truesize;
+- for (fp = head->next; fp;) {
+- bool headstolen;
+- int delta;
+- struct sk_buff *next = fp->next;
+-
+- sum_truesize += fp->truesize;
+- if (head->ip_summed != fp->ip_summed)
+- head->ip_summed = CHECKSUM_NONE;
+- else if (head->ip_summed == CHECKSUM_COMPLETE)
+- head->csum = csum_add(head->csum, fp->csum);
+-
+- if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
+- kfree_skb_partial(fp, headstolen);
+- } else {
+- fp->sk = NULL;
+- if (!skb_shinfo(head)->frag_list)
+- skb_shinfo(head)->frag_list = fp;
+- head->data_len += fp->len;
+- head->len += fp->len;
+- head->truesize += fp->truesize;
+- }
+- fp = next;
+- }
+- sub_frag_mem_limit(fq->q.net, sum_truesize);
++ skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
++ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
++ (skb->data - skb->head) - sizeof(struct frag_hdr));
++ if (skb_mac_header_was_set(skb))
++ skb->mac_header += sizeof(struct frag_hdr);
++ skb->network_header += sizeof(struct frag_hdr);
++
++ skb_reset_transport_header(skb);
++
++ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+
+- head->next = NULL;
+- head->dev = dev;
+- head->tstamp = fq->q.stamp;
+- ipv6_hdr(head)->payload_len = htons(payload_len);
+- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
+- IP6CB(head)->nhoff = nhoff;
+- IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
+- IP6CB(head)->frag_max_size = fq->q.max_size;
++ skb->dev = dev;
++ ipv6_hdr(skb)->payload_len = htons(payload_len);
++ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
++ IP6CB(skb)->nhoff = nhoff;
++ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
++ IP6CB(skb)->frag_max_size = fq->q.max_size;
+
+ /* Yes, and fold redundant checksum back. 8) */
+- skb_postpush_rcsum(head, skb_network_header(head),
+- skb_network_header_len(head));
++ skb_postpush_rcsum(skb, skb_network_header(skb),
++ skb_network_header_len(skb));
+
+ rcu_read_lock();
+ __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+@@ -414,6 +307,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ fq->q.fragments = NULL;
+ fq->q.rb_fragments = RB_ROOT;
+ fq->q.fragments_tail = NULL;
++ fq->q.last_run_head = NULL;
+ return 1;
+
+ out_oversize:
+@@ -463,10 +357,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
+ return 1;
+ }
+
+- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
+- fhdr->frag_off & htons(IP6_MF))
+- goto fail_hdr;
+-
+ iif = skb->dev ? skb->dev->ifindex : 0;
+ fq = fq_find(net, fhdr->identification, hdr, iif);
+ if (fq) {
+@@ -484,6 +374,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
+ if (prob_offset) {
+ __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
+ IPSTATS_MIB_INHDRERRORS);
++ /* icmpv6_param_prob() calls kfree_skb(skb) */
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
+ }
+ return ret;
+--
+2.19.1
+
--- /dev/null
+From d1647fd4aa1024ca0c3361080279a663334751b9 Mon Sep 17 00:00:00 2001
+From: Peter Oskolkov <posk@google.com>
+Date: Mon, 8 Apr 2019 17:10:05 -0700
+Subject: net: IP6 defrag: use rbtrees in nf_conntrack_reasm.c
+
+[ Upstream commit 997dd96471641e147cb2c33ad54284000d0f5e35 ]
+
+Currently, IPv6 defragmentation code drops non-last fragments that
+are smaller than 1280 bytes: see
+commit 0ed4229b08c1 ("ipv6: defrag: drop non-last frags smaller than min mtu")
+
+This behavior is not specified in IPv6 RFCs and appears to break
+compatibility with some IPv6 implemenations, as reported here:
+https://www.spinics.net/lists/netdev/msg543846.html
+
+This patch re-uses common IP defragmentation queueing and reassembly
+code in IP6 defragmentation in nf_conntrack, removing the 1280 byte
+restriction.
+
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Reported-by: Tom Herbert <tom@herbertland.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Acked-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 260 +++++++-----------------
+ 1 file changed, 71 insertions(+), 189 deletions(-)
+
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 043ed8eb0ab9..cb1b4772dac0 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -136,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
+ }
+ #endif
+
++static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
++ struct sk_buff *prev_tail, struct net_device *dev);
++
+ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+ {
+ return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+@@ -177,9 +180,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
+ const struct frag_hdr *fhdr, int nhoff)
+ {
+- struct sk_buff *prev, *next;
+ unsigned int payload_len;
+- int offset, end;
++ struct net_device *dev;
++ struct sk_buff *prev;
++ int offset, end, err;
+ u8 ecn;
+
+ if (fq->q.flags & INET_FRAG_COMPLETE) {
+@@ -254,55 +258,18 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
+ goto err;
+ }
+
+- /* Find out which fragments are in front and at the back of us
+- * in the chain of fragments so far. We must know where to put
+- * this fragment, right?
+- */
+- prev = fq->q.fragments_tail;
+- if (!prev || prev->ip_defrag_offset < offset) {
+- next = NULL;
+- goto found;
+- }
+- prev = NULL;
+- for (next = fq->q.fragments; next != NULL; next = next->next) {
+- if (next->ip_defrag_offset >= offset)
+- break; /* bingo! */
+- prev = next;
+- }
+-
+-found:
+- /* RFC5722, Section 4:
+- * When reassembling an IPv6 datagram, if
+- * one or more its constituent fragments is determined to be an
+- * overlapping fragment, the entire datagram (and any constituent
+- * fragments, including those not yet received) MUST be silently
+- * discarded.
+- */
+-
+- /* Check for overlap with preceding fragment. */
+- if (prev &&
+- (prev->ip_defrag_offset + prev->len) > offset)
+- goto discard_fq;
+-
+- /* Look for overlap with succeeding segment. */
+- if (next && next->ip_defrag_offset < end)
+- goto discard_fq;
+-
+- /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+- if (skb->dev)
+- fq->iif = skb->dev->ifindex;
++ /* Note : skb->rbnode and skb->dev share the same location. */
++ dev = skb->dev;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+- skb->ip_defrag_offset = offset;
+
+- /* Insert this fragment in the chain of fragments. */
+- skb->next = next;
+- if (!next)
+- fq->q.fragments_tail = skb;
+- if (prev)
+- prev->next = skb;
+- else
+- fq->q.fragments = skb;
++ prev = fq->q.fragments_tail;
++ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
++ if (err)
++ goto insert_error;
++
++ if (dev)
++ fq->iif = dev->ifindex;
+
+ fq->q.stamp = skb->tstamp;
+ fq->q.meat += skb->len;
+@@ -319,11 +286,25 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
+ fq->q.flags |= INET_FRAG_FIRST_IN;
+ }
+
+- return 0;
++ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
++ fq->q.meat == fq->q.len) {
++ unsigned long orefdst = skb->_skb_refdst;
++
++ skb->_skb_refdst = 0UL;
++ err = nf_ct_frag6_reasm(fq, skb, prev, dev);
++ skb->_skb_refdst = orefdst;
++ return err;
++ }
++
++ skb_dst_drop(skb);
++ return -EINPROGRESS;
+
+-discard_fq:
++insert_error:
++ if (err == IPFRAG_DUP)
++ goto err;
+ inet_frag_kill(&fq->q);
+ err:
++ skb_dst_drop(skb);
+ return -EINVAL;
+ }
+
+@@ -333,147 +314,67 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
+ * It is called with locked fq, and caller must check that
+ * queue is eligible for reassembly i.e. it is not COMPLETE,
+ * the last and the first frames arrived and all the bits are here.
+- *
+- * returns true if *prev skb has been transformed into the reassembled
+- * skb, false otherwise.
+ */
+-static bool
+-nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
++static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
++ struct sk_buff *prev_tail, struct net_device *dev)
+ {
+- struct sk_buff *fp, *head = fq->q.fragments;
+- int payload_len, delta;
++ void *reasm_data;
++ int payload_len;
+ u8 ecn;
+
+ inet_frag_kill(&fq->q);
+
+- WARN_ON(head == NULL);
+- WARN_ON(head->ip_defrag_offset != 0);
+-
+ ecn = ip_frag_ecn_table[fq->ecn];
+ if (unlikely(ecn == 0xff))
+- return false;
++ goto err;
++
++ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
++ if (!reasm_data)
++ goto err;
+
+- /* Unfragmented part is taken from the first segment. */
+- payload_len = ((head->data - skb_network_header(head)) -
++ payload_len = ((skb->data - skb_network_header(skb)) -
+ sizeof(struct ipv6hdr) + fq->q.len -
+ sizeof(struct frag_hdr));
+ if (payload_len > IPV6_MAXPLEN) {
+ net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
+ payload_len);
+- return false;
+- }
+-
+- delta = - head->truesize;
+-
+- /* Head of list must not be cloned. */
+- if (skb_unclone(head, GFP_ATOMIC))
+- return false;
+-
+- delta += head->truesize;
+- if (delta)
+- add_frag_mem_limit(fq->q.net, delta);
+-
+- /* If the first fragment is fragmented itself, we split
+- * it to two chunks: the first with data and paged part
+- * and the second, holding only fragments. */
+- if (skb_has_frag_list(head)) {
+- struct sk_buff *clone;
+- int i, plen = 0;
+-
+- clone = alloc_skb(0, GFP_ATOMIC);
+- if (clone == NULL)
+- return false;
+-
+- clone->next = head->next;
+- head->next = clone;
+- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+- skb_frag_list_init(head);
+- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+- clone->len = clone->data_len = head->data_len - plen;
+- head->data_len -= clone->len;
+- head->len -= clone->len;
+- clone->csum = 0;
+- clone->ip_summed = head->ip_summed;
+-
+- add_frag_mem_limit(fq->q.net, clone->truesize);
+- }
+-
+- /* morph head into last received skb: prev.
+- *
+- * This allows callers of ipv6 conntrack defrag to continue
+- * to use the last skb(frag) passed into the reasm engine.
+- * The last skb frag 'silently' turns into the full reassembled skb.
+- *
+- * Since prev is also part of q->fragments we have to clone it first.
+- */
+- if (head != prev) {
+- struct sk_buff *iter;
+-
+- fp = skb_clone(prev, GFP_ATOMIC);
+- if (!fp)
+- return false;
+-
+- fp->next = prev->next;
+-
+- iter = head;
+- while (iter) {
+- if (iter->next == prev) {
+- iter->next = fp;
+- break;
+- }
+- iter = iter->next;
+- }
+-
+- skb_morph(prev, head);
+- prev->next = head->next;
+- consume_skb(head);
+- head = prev;
++ goto err;
+ }
+
+ /* We have to remove fragment header from datagram and to relocate
+ * header in order to calculate ICV correctly. */
+- skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
+- memmove(head->head + sizeof(struct frag_hdr), head->head,
+- (head->data - head->head) - sizeof(struct frag_hdr));
+- head->mac_header += sizeof(struct frag_hdr);
+- head->network_header += sizeof(struct frag_hdr);
+-
+- skb_shinfo(head)->frag_list = head->next;
+- skb_reset_transport_header(head);
+- skb_push(head, head->data - skb_network_header(head));
+-
+- for (fp = head->next; fp; fp = fp->next) {
+- head->data_len += fp->len;
+- head->len += fp->len;
+- if (head->ip_summed != fp->ip_summed)
+- head->ip_summed = CHECKSUM_NONE;
+- else if (head->ip_summed == CHECKSUM_COMPLETE)
+- head->csum = csum_add(head->csum, fp->csum);
+- head->truesize += fp->truesize;
+- fp->sk = NULL;
+- }
+- sub_frag_mem_limit(fq->q.net, head->truesize);
++ skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0];
++ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
++ (skb->data - skb->head) - sizeof(struct frag_hdr));
++ skb->mac_header += sizeof(struct frag_hdr);
++ skb->network_header += sizeof(struct frag_hdr);
++
++ skb_reset_transport_header(skb);
++
++ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+
+- head->ignore_df = 1;
+- head->next = NULL;
+- head->dev = dev;
+- head->tstamp = fq->q.stamp;
+- ipv6_hdr(head)->payload_len = htons(payload_len);
+- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
+- IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
++ skb->ignore_df = 1;
++ skb->dev = dev;
++ ipv6_hdr(skb)->payload_len = htons(payload_len);
++ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
++ IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
+
+ /* Yes, and fold redundant checksum back. 8) */
+- if (head->ip_summed == CHECKSUM_COMPLETE)
+- head->csum = csum_partial(skb_network_header(head),
+- skb_network_header_len(head),
+- head->csum);
++ if (skb->ip_summed == CHECKSUM_COMPLETE)
++ skb->csum = csum_partial(skb_network_header(skb),
++ skb_network_header_len(skb),
++ skb->csum);
+
+ fq->q.fragments = NULL;
+ fq->q.rb_fragments = RB_ROOT;
+ fq->q.fragments_tail = NULL;
++ fq->q.last_run_head = NULL;
+
+- return true;
++ return 0;
++
++err:
++ inet_frag_kill(&fq->q);
++ return -EINVAL;
+ }
+
+ /*
+@@ -542,7 +443,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
+ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
+ {
+ u16 savethdr = skb->transport_header;
+- struct net_device *dev = skb->dev;
+ int fhoff, nhoff, ret;
+ struct frag_hdr *fhdr;
+ struct frag_queue *fq;
+@@ -565,10 +465,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
+ hdr = ipv6_hdr(skb);
+ fhdr = (struct frag_hdr *)skb_transport_header(skb);
+
+- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
+- fhdr->frag_off & htons(IP6_MF))
+- return -EINVAL;
+-
+ skb_orphan(skb);
+ fq = fq_find(net, fhdr->identification, user, hdr,
+ skb->dev ? skb->dev->ifindex : 0);
+@@ -580,31 +476,17 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
+ spin_lock_bh(&fq->q.lock);
+
+ ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
+- if (ret < 0) {
+- if (ret == -EPROTO) {
+- skb->transport_header = savethdr;
+- ret = 0;
+- }
+- goto out_unlock;
++ if (ret == -EPROTO) {
++ skb->transport_header = savethdr;
++ ret = 0;
+ }
+
+ /* after queue has assumed skb ownership, only 0 or -EINPROGRESS
+ * must be returned.
+ */
+- ret = -EINPROGRESS;
+- if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+- fq->q.meat == fq->q.len) {
+- unsigned long orefdst = skb->_skb_refdst;
+-
+- skb->_skb_refdst = 0UL;
+- if (nf_ct_frag6_reasm(fq, skb, dev))
+- ret = 0;
+- skb->_skb_refdst = orefdst;
+- } else {
+- skb_dst_drop(skb);
+- }
++ if (ret)
++ ret = -EINPROGRESS;
+
+-out_unlock:
+ spin_unlock_bh(&fq->q.lock);
+ inet_frag_put(&fq->q);
+ return ret;
+--
+2.19.1
+
nfp-flower-replace-cfi-with-vlan-present.patch
nfp-flower-remove-vlan-cfi-bit-from-push-vlan-action.patch
sch_cake-simplify-logic-in-cake_select_tin.patch
+net-ip-defrag-encapsulate-rbtree-defrag-code-into-ca.patch
+net-ip6-defrag-use-rbtrees-for-ipv6-defrag.patch
+net-ip6-defrag-use-rbtrees-in-nf_conntrack_reasm.c.patch