From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 4 Feb 2019 10:01:28 +0000 (+0100)
Subject: 4.4-stable patches
X-Git-Tag: v4.20.7~7
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=dacc167aa0fbfdeea6ecceca195e37de7bd526b8;p=thirdparty%2Fkernel%2Fstable-queue.git

4.4-stable patches

added patches:
	fs-don-t-scan-the-inode-cache-before-sb_born-is-set.patch
	inet-frags-get-rif-of-inet_frag_evicting.patch
	ip-add-helpers-to-process-in-order-fragments-faster.patch
	ip-discard-ipv4-datagrams-with-overlapping-segments.patch
	ip-frags-fix-crash-in-ip_do_fragment.patch
	ip-process-in-order-fragments-efficiently.patch
	ip-use-rb-trees-for-ip-frag-queue.patch
	ipv4-frags-precedence-bug-in-ip_expire.patch
	ipv6-defrag-drop-non-last-frags-smaller-than-min-mtu.patch
	net-ipv4-do-not-handle-duplicate-fragments-as-overlapping.patch
	net-modify-skb_rbtree_purge-to-return-the-truesize-of-all-purged-skbs.patch
---

diff --git a/queue-4.4/fs-don-t-scan-the-inode-cache-before-sb_born-is-set.patch b/queue-4.4/fs-don-t-scan-the-inode-cache-before-sb_born-is-set.patch
new file mode 100644
index 00000000000..056f0fea0d1
--- /dev/null
+++ b/queue-4.4/fs-don-t-scan-the-inode-cache-before-sb_born-is-set.patch
@@ -0,0 +1,101 @@
+From 79f546a696bff2590169fb5684e23d65f4d9f591 Mon Sep 17 00:00:00 2001
+From: Dave Chinner <dchinner@redhat.com>
+Date: Fri, 11 May 2018 11:20:57 +1000
+Subject: fs: don't scan the inode cache before SB_BORN is set
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 79f546a696bff2590169fb5684e23d65f4d9f591 upstream.
+
+We recently had an oops reported on a 4.14 kernel in
+xfs_reclaim_inodes_count() where sb->s_fs_info pointed to garbage
+and so the m_perag_tree lookup walked into lala land.  It produces
+an oops down this path during the failed mount:
+
+  radix_tree_gang_lookup_tag+0xc4/0x130
+  xfs_perag_get_tag+0x37/0xf0
+  xfs_reclaim_inodes_count+0x32/0x40
+  xfs_fs_nr_cached_objects+0x11/0x20
+  super_cache_count+0x35/0xc0
+  shrink_slab.part.66+0xb1/0x370
+  shrink_node+0x7e/0x1a0
+  try_to_free_pages+0x199/0x470
+  __alloc_pages_slowpath+0x3a1/0xd20
+  __alloc_pages_nodemask+0x1c3/0x200
+  cache_grow_begin+0x20b/0x2e0
+  fallback_alloc+0x160/0x200
+  kmem_cache_alloc+0x111/0x4e0
+
+The problem is that the superblock shrinker is running before the
+filesystem structures it depends on have been fully set up. i.e.
+the shrinker is registered in sget(), before ->fill_super() has been
+called, and the shrinker can call into the filesystem before
+fill_super() does it's setup work. Essentially we are exposed to
+both use-after-free and use-before-initialisation bugs here.
+
+To fix this, add a check for the SB_BORN flag in super_cache_count.
+In general, this flag is not set until ->fs_mount() completes
+successfully, so we know that it is set after the filesystem
+setup has completed. This matches the trylock_super() behaviour
+which will not let super_cache_scan() run if SB_BORN is not set, and
+hence will not allow the superblock shrinker from entering the
+filesystem while it is being set up or after it has failed setup
+and is being torn down.
+
+Cc: stable@kernel.org
+Signed-Off-By: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Aaron Lu <aaron.lu@linux.alibaba.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/super.c |   30 ++++++++++++++++++++++++------
+ 1 file changed, 24 insertions(+), 6 deletions(-)
+
+--- a/fs/super.c
++++ b/fs/super.c
+@@ -118,13 +118,23 @@ static unsigned long super_cache_count(s
+ 	sb = container_of(shrink, struct super_block, s_shrink);
+ 
+ 	/*
+-	 * Don't call trylock_super as it is a potential
+-	 * scalability bottleneck. The counts could get updated
+-	 * between super_cache_count and super_cache_scan anyway.
+-	 * Call to super_cache_count with shrinker_rwsem held
+-	 * ensures the safety of call to list_lru_shrink_count() and
+-	 * s_op->nr_cached_objects().
++	 * We don't call trylock_super() here as it is a scalability bottleneck,
++	 * so we're exposed to partial setup state. The shrinker rwsem does not
++	 * protect filesystem operations backing list_lru_shrink_count() or
++	 * s_op->nr_cached_objects(). Counts can change between
++	 * super_cache_count and super_cache_scan, so we really don't need locks
++	 * here.
++	 *
++	 * However, if we are currently mounting the superblock, the underlying
++	 * filesystem might be in a state of partial construction and hence it
++	 * is dangerous to access it.  trylock_super() uses a MS_BORN check to
++	 * avoid this situation, so do the same here. The memory barrier is
++	 * matched with the one in mount_fs() as we don't hold locks here.
+ 	 */
++	if (!(sb->s_flags & MS_BORN))
++		return 0;
++	smp_rmb();
++
+ 	if (sb->s_op && sb->s_op->nr_cached_objects)
+ 		total_objects = sb->s_op->nr_cached_objects(sb, sc);
+ 
+@@ -1133,6 +1143,14 @@ mount_fs(struct file_system_type *type,
+ 	sb = root->d_sb;
+ 	BUG_ON(!sb);
+ 	WARN_ON(!sb->s_bdi);
++
++	/*
++	 * Write barrier is for super_cache_count(). We place it before setting
++	 * MS_BORN as the data dependency between the two functions is the
++	 * superblock structure contents that we just set up, not the MS_BORN
++	 * flag.
++	 */
++	smp_wmb();
+ 	sb->s_flags |= MS_BORN;
+ 
+ 	error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/queue-4.4/inet-frags-get-rif-of-inet_frag_evicting.patch b/queue-4.4/inet-frags-get-rif-of-inet_frag_evicting.patch
new file mode 100644
index 00000000000..99306ea3a1e
--- /dev/null
+++ b/queue-4.4/inet-frags-get-rif-of-inet_frag_evicting.patch
@@ -0,0 +1,147 @@
+From 399d1404be660d355192ff4df5ccc3f4159ec1e4 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 31 Mar 2018 12:58:51 -0700
+Subject: inet: frags: get rif of inet_frag_evicting()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 399d1404be660d355192ff4df5ccc3f4159ec1e4 upstream.
+
+This refactors ip_expire() since one indentation level is removed.
+
+Note: in the future, we should try hard to avoid the skb_clone()
+since this is a serious performance cost.
+Under DDOS, the ICMP message wont be sent because of rate limits.
+
+Fact that ip6_expire_frag_queue() does not use skb_clone() is
+disturbing too. Presumably IPv6 should have the same
+issue than the one we fixed in commit ec4fbd64751d
+("inet: frag: release spinlock before calling icmp_send()")
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/net/inet_frag.h |    5 ---
+ net/ipv4/ip_fragment.c  |   66 +++++++++++++++++++++++-------------------------
+ net/ipv6/reassembly.c   |    4 --
+ 3 files changed, 32 insertions(+), 43 deletions(-)
+
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -123,11 +123,6 @@ static inline void inet_frag_put(struct
+ 		inet_frag_destroy(q, f);
+ }
+ 
+-static inline bool inet_frag_evicting(struct inet_frag_queue *q)
+-{
+-	return !hlist_unhashed(&q->list_evictor);
+-}
+-
+ /* Memory Tracking Functions. */
+ 
+ static inline int frag_mem_limit(struct netns_frags *nf)
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -194,8 +194,11 @@ static bool frag_expire_skip_icmp(u32 us
+  */
+ static void ip_expire(unsigned long arg)
+ {
+-	struct ipq *qp;
++	struct sk_buff *clone, *head;
++	const struct iphdr *iph;
+ 	struct net *net;
++	struct ipq *qp;
++	int err;
+ 
+ 	qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+ 	net = container_of(qp->q.net, struct net, ipv4.frags);
+@@ -209,45 +212,40 @@ static void ip_expire(unsigned long arg)
+ 	ipq_kill(qp);
+ 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+ 
+-	if (!inet_frag_evicting(&qp->q)) {
+-		struct sk_buff *clone, *head = qp->q.fragments;
+-		const struct iphdr *iph;
+-		int err;
+-
+-		IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
++	head = qp->q.fragments;
+ 
+-		if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
+-			goto out;
++	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+ 
+-		head->dev = dev_get_by_index_rcu(net, qp->iif);
+-		if (!head->dev)
+-			goto out;
++	if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
++		goto out;
+ 
++	head->dev = dev_get_by_index_rcu(net, qp->iif);
++	if (!head->dev)
++		goto out;
+ 
+-		/* skb has no dst, perform route lookup again */
+-		iph = ip_hdr(head);
+-		err = ip_route_input_noref(head, iph->daddr, iph->saddr,
++	/* skb has no dst, perform route lookup again */
++	iph = ip_hdr(head);
++	err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ 					   iph->tos, head->dev);
+-		if (err)
+-			goto out;
++	if (err)
++		goto out;
++
++	/* Only an end host needs to send an ICMP
++	 * "Fragment Reassembly Timeout" message, per RFC792.
++	 */
++	if (frag_expire_skip_icmp(qp->user) &&
++	    (skb_rtable(head)->rt_type != RTN_LOCAL))
++		goto out;
++
++	clone = skb_clone(head, GFP_ATOMIC);
+ 
+-		/* Only an end host needs to send an ICMP
+-		 * "Fragment Reassembly Timeout" message, per RFC792.
+-		 */
+-		if (frag_expire_skip_icmp(qp->user) &&
+-		    (skb_rtable(head)->rt_type != RTN_LOCAL))
+-			goto out;
+-
+-		clone = skb_clone(head, GFP_ATOMIC);
+-
+-		/* Send an ICMP "Fragment Reassembly Timeout" message. */
+-		if (clone) {
+-			spin_unlock(&qp->q.lock);
+-			icmp_send(clone, ICMP_TIME_EXCEEDED,
+-				  ICMP_EXC_FRAGTIME, 0);
+-			consume_skb(clone);
+-			goto out_rcu_unlock;
+-		}
++	/* Send an ICMP "Fragment Reassembly Timeout" message. */
++	if (clone) {
++		spin_unlock(&qp->q.lock);
++		icmp_send(clone, ICMP_TIME_EXCEEDED,
++			  ICMP_EXC_FRAGTIME, 0);
++		consume_skb(clone);
++		goto out_rcu_unlock;
+ 	}
+ out:
+ 	spin_unlock(&qp->q.lock);
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -146,10 +146,6 @@ void ip6_expire_frag_queue(struct net *n
+ 		goto out_rcu_unlock;
+ 
+ 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+-
+-	if (inet_frag_evicting(&fq->q))
+-		goto out_rcu_unlock;
+-
+ 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+ 
+ 	/* Don't send error if the first segment did not arrive. */
diff --git a/queue-4.4/ip-add-helpers-to-process-in-order-fragments-faster.patch b/queue-4.4/ip-add-helpers-to-process-in-order-fragments-faster.patch
new file mode 100644
index 00000000000..7ac5d7b3f67
--- /dev/null
+++ b/queue-4.4/ip-add-helpers-to-process-in-order-fragments-faster.patch
@@ -0,0 +1,157 @@
+From 353c9cb360874e737fb000545f783df756c06f9a Mon Sep 17 00:00:00 2001
+From: Peter Oskolkov <posk@google.com>
+Date: Sat, 11 Aug 2018 20:27:24 +0000
+Subject: ip: add helpers to process in-order fragments faster.
+
+From: Peter Oskolkov <posk@google.com>
+
+commit 353c9cb360874e737fb000545f783df756c06f9a upstream.
+
+This patch introduces several helper functions/macros that will be
+used in the follow-up patch. No runtime changes yet.
+
+The new logic (fully implemented in the second patch) is as follows:
+
+* Nodes in the rb-tree will now contain not single fragments, but lists
+  of consecutive fragments ("runs").
+
+* At each point in time, the current "active" run at the tail is
+  maintained/tracked. Fragments that arrive in-order, adjacent
+  to the previous tail fragment, are added to this tail run without
+  triggering the re-balancing of the rb-tree.
+
+* If a fragment arrives out of order with the offset _before_ the tail run,
+  it is inserted into the rb-tree as a single fragment.
+
+* If a fragment arrives after the current tail fragment (with a gap),
+  it starts a new "tail" run, as is inserted into the rb-tree
+  at the end as the head of the new run.
+
+skb->cb is used to store additional information
+needed here (suggested by Eric Dumazet).
+
+Reported-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_frag.h |    4 ++
+ net/ipv4/ip_fragment.c  |   74 +++++++++++++++++++++++++++++++++++++++++++++---
+ 2 files changed, 74 insertions(+), 4 deletions(-)
+
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -48,6 +48,7 @@ struct inet_frag_queue {
+ 	struct sk_buff		*fragments;  /* Used in IPv6. */
+ 	struct rb_root		rb_fragments; /* Used in IPv4. */
+ 	struct sk_buff		*fragments_tail;
++	struct sk_buff		*last_run_head;
+ 	ktime_t			stamp;
+ 	int			len;
+ 	int			meat;
+@@ -118,6 +119,9 @@ struct inet_frag_queue *inet_frag_find(s
+ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+ 				   const char *prefix);
+ 
++/* Free all skbs in the queue; return the sum of their truesizes. */
++unsigned int inet_frag_rbtree_purge(struct rb_root *root);
++
+ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
+ {
+ 	if (atomic_dec_and_test(&q->refcnt))
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -58,13 +58,57 @@
+ static int sysctl_ipfrag_max_dist __read_mostly = 64;
+ static const char ip_frag_cache_name[] = "ip4-frags";
+ 
+-struct ipfrag_skb_cb
+-{
++/* Use skb->cb to track consecutive/adjacent fragments coming at
++ * the end of the queue. Nodes in the rb-tree queue will
++ * contain "runs" of one or more adjacent fragments.
++ *
++ * Invariants:
++ * - next_frag is NULL at the tail of a "run";
++ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
++ */
++struct ipfrag_skb_cb {
+ 	struct inet_skb_parm	h;
+-	int			offset;
++	int                     offset;
++	struct sk_buff		*next_frag;
++	int			frag_run_len;
+ };
+ 
+-#define FRAG_CB(skb)	((struct ipfrag_skb_cb *)((skb)->cb))
++#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
++
++static void ip4_frag_init_run(struct sk_buff *skb)
++{
++	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
++
++	FRAG_CB(skb)->next_frag = NULL;
++	FRAG_CB(skb)->frag_run_len = skb->len;
++}
++
++/* Append skb to the last "run". */
++static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
++					struct sk_buff *skb)
++{
++	RB_CLEAR_NODE(&skb->rbnode);
++	FRAG_CB(skb)->next_frag = NULL;
++
++	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
++	FRAG_CB(q->fragments_tail)->next_frag = skb;
++	q->fragments_tail = skb;
++}
++
++/* Create a new "run" with the skb. */
++static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
++{
++	if (q->last_run_head)
++		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
++			     &q->last_run_head->rbnode.rb_right);
++	else
++		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
++	rb_insert_color(&skb->rbnode, &q->rb_fragments);
++
++	ip4_frag_init_run(skb);
++	q->fragments_tail = skb;
++	q->last_run_head = skb;
++}
+ 
+ /* Describe an entry in the "incomplete datagrams" queue. */
+ struct ipq {
+@@ -721,6 +765,28 @@ struct sk_buff *ip_check_defrag(struct n
+ }
+ EXPORT_SYMBOL(ip_check_defrag);
+ 
++unsigned int inet_frag_rbtree_purge(struct rb_root *root)
++{
++	struct rb_node *p = rb_first(root);
++	unsigned int sum = 0;
++
++	while (p) {
++		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
++
++		p = rb_next(p);
++		rb_erase(&skb->rbnode, root);
++		while (skb) {
++			struct sk_buff *next = FRAG_CB(skb)->next_frag;
++
++			sum += skb->truesize;
++			kfree_skb(skb);
++			skb = next;
++		}
++	}
++	return sum;
++}
++EXPORT_SYMBOL(inet_frag_rbtree_purge);
++
+ #ifdef CONFIG_SYSCTL
+ static int zero;
+ 
diff --git a/queue-4.4/ip-discard-ipv4-datagrams-with-overlapping-segments.patch b/queue-4.4/ip-discard-ipv4-datagrams-with-overlapping-segments.patch
new file mode 100644
index 00000000000..09d82210368
--- /dev/null
+++ b/queue-4.4/ip-discard-ipv4-datagrams-with-overlapping-segments.patch
@@ -0,0 +1,146 @@
+From 7969e5c40dfd04799d4341f1b7cd266b6e47f227 Mon Sep 17 00:00:00 2001
+From: Peter Oskolkov <posk@google.com>
+Date: Thu, 2 Aug 2018 23:34:37 +0000
+Subject: ip: discard IPv4 datagrams with overlapping segments.
+
+From: Peter Oskolkov <posk@google.com>
+
+commit 7969e5c40dfd04799d4341f1b7cd266b6e47f227 upstream.
+
+This behavior is required in IPv6, and there is little need
+to tolerate overlapping fragments in IPv4. This change
+simplifies the code and eliminates potential DDoS attack vectors.
+
+Tested: ran ip_defrag selftest (not yet available uptream).
+
+Suggested-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Acked-by: Stephen Hemminger <stephen@networkplumber.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/snmp.h |    1 
+ net/ipv4/ip_fragment.c    |   73 ++++++++++++----------------------------------
+ net/ipv4/proc.c           |    1 
+ 3 files changed, 22 insertions(+), 53 deletions(-)
+
+--- a/include/uapi/linux/snmp.h
++++ b/include/uapi/linux/snmp.h
+@@ -55,6 +55,7 @@ enum
+ 	IPSTATS_MIB_ECT1PKTS,			/* InECT1Pkts */
+ 	IPSTATS_MIB_ECT0PKTS,			/* InECT0Pkts */
+ 	IPSTATS_MIB_CEPKTS,			/* InCEPkts */
++	IPSTATS_MIB_REASM_OVERLAPS,		/* ReasmOverlaps */
+ 	__IPSTATS_MIB_MAX
+ };
+ 
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -342,6 +342,7 @@ static int ip_frag_reinit(struct ipq *qp
+ /* Add new segment to existing queue. */
+ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
++	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ 	struct sk_buff *prev, *next;
+ 	struct net_device *dev;
+ 	unsigned int fragsize;
+@@ -422,60 +423,22 @@ static int ip_frag_queue(struct ipq *qp,
+ 	}
+ 
+ found:
+-	/* We found where to put this one.  Check for overlap with
+-	 * preceding fragment, and, if needed, align things so that
+-	 * any overlaps are eliminated.
++	/* RFC5722, Section 4, amended by Errata ID : 3089
++	 *                          When reassembling an IPv6 datagram, if
++	 *   one or more its constituent fragments is determined to be an
++	 *   overlapping fragment, the entire datagram (and any constituent
++	 *   fragments) MUST be silently discarded.
++	 *
++	 * We do the same here for IPv4.
+ 	 */
+-	if (prev) {
+-		int i = (FRAG_CB(prev)->offset + prev->len) - offset;
+-
+-		if (i > 0) {
+-			offset += i;
+-			err = -EINVAL;
+-			if (end <= offset)
+-				goto err;
+-			err = -ENOMEM;
+-			if (!pskb_pull(skb, i))
+-				goto err;
+-			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+-				skb->ip_summed = CHECKSUM_NONE;
+-		}
+-	}
+-
+-	err = -ENOMEM;
+-
+-	while (next && FRAG_CB(next)->offset < end) {
+-		int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
+-
+-		if (i < next->len) {
+-			/* Eat head of the next overlapped fragment
+-			 * and leave the loop. The next ones cannot overlap.
+-			 */
+-			if (!pskb_pull(next, i))
+-				goto err;
+-			FRAG_CB(next)->offset += i;
+-			qp->q.meat -= i;
+-			if (next->ip_summed != CHECKSUM_UNNECESSARY)
+-				next->ip_summed = CHECKSUM_NONE;
+-			break;
+-		} else {
+-			struct sk_buff *free_it = next;
+-
+-			/* Old fragment is completely overridden with
+-			 * new one drop it.
+-			 */
+-			next = next->next;
+-
+-			if (prev)
+-				prev->next = next;
+-			else
+-				qp->q.fragments = next;
+-
+-			qp->q.meat -= free_it->len;
+-			sub_frag_mem_limit(qp->q.net, free_it->truesize);
+-			kfree_skb(free_it);
+-		}
+-	}
++	/* Is there an overlap with the previous fragment? */
++	if (prev &&
++	    (FRAG_CB(prev)->offset + prev->len) > offset)
++		goto discard_qp;
++
++	/* Is there an overlap with the next fragment? */
++	if (next && FRAG_CB(next)->offset < end)
++		goto discard_qp;
+ 
+ 	FRAG_CB(skb)->offset = offset;
+ 
+@@ -522,6 +485,10 @@ found:
+ 	skb_dst_drop(skb);
+ 	return -EINPROGRESS;
+ 
++discard_qp:
++	ipq_kill(qp);
++	err = -EINVAL;
++	IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
+ err:
+ 	kfree_skb(skb);
+ 	return err;
+--- a/net/ipv4/proc.c
++++ b/net/ipv4/proc.c
+@@ -132,6 +132,7 @@ static const struct snmp_mib snmp4_ipext
+ 	SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
+ 	SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
+ 	SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
++	SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS),
+ 	SNMP_MIB_SENTINEL
+ };
+ 
diff --git a/queue-4.4/ip-frags-fix-crash-in-ip_do_fragment.patch b/queue-4.4/ip-frags-fix-crash-in-ip_do_fragment.patch
new file mode 100644
index 00000000000..f4c8b521f84
--- /dev/null
+++ b/queue-4.4/ip-frags-fix-crash-in-ip_do_fragment.patch
@@ -0,0 +1,110 @@
+From 5d407b071dc369c26a38398326ee2be53651cfe4 Mon Sep 17 00:00:00 2001
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Mon, 10 Sep 2018 02:47:05 +0900
+Subject: ip: frags: fix crash in ip_do_fragment()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+commit 5d407b071dc369c26a38398326ee2be53651cfe4 upstream.
+
+A kernel crash occurrs when defragmented packet is fragmented
+in ip_do_fragment().
+In defragment routine, skb_orphan() is called and
+skb->ip_defrag_offset is set. but skb->sk and
+skb->ip_defrag_offset are same union member. so that
+frag->sk is not NULL.
+Hence crash occurrs in skb->sk check routine in ip_do_fragment() when
+defragmented packet is fragmented.
+
+test commands:
+   %iptables -t nat -I POSTROUTING -j MASQUERADE
+   %hping3 192.168.4.2 -s 1000 -p 2000 -d 60000
+
+splat looks like:
+[  261.069429] kernel BUG at net/ipv4/ip_output.c:636!
+[  261.075753] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[  261.083854] CPU: 1 PID: 1349 Comm: hping3 Not tainted 4.19.0-rc2+ #3
+[  261.100977] RIP: 0010:ip_do_fragment+0x1613/0x2600
+[  261.106945] Code: e8 e2 38 e3 fe 4c 8b 44 24 18 48 8b 74 24 08 e9 92 f6 ff ff 80 3c 02 00 0f 85 da 07 00 00 48 8b b5 d0 00 00 00 e9 25 f6 ff ff <0f> 0b 0f 0b 44 8b 54 24 58 4c 8b 4c 24 18 4c 8b 5c 24 60 4c 8b 6c
+[  261.127015] RSP: 0018:ffff8801031cf2c0 EFLAGS: 00010202
+[  261.134156] RAX: 1ffff1002297537b RBX: ffffed0020639e6e RCX: 0000000000000004
+[  261.142156] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880114ba9bd8
+[  261.150157] RBP: ffff880114ba8a40 R08: ffffed0022975395 R09: ffffed0022975395
+[  261.158157] R10: 0000000000000001 R11: ffffed0022975394 R12: ffff880114ba9ca4
+[  261.166159] R13: 0000000000000010 R14: ffff880114ba9bc0 R15: dffffc0000000000
+[  261.174169] FS:  00007fbae2199700(0000) GS:ffff88011b400000(0000) knlGS:0000000000000000
+[  261.183012] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  261.189013] CR2: 00005579244fe000 CR3: 0000000119bf4000 CR4: 00000000001006e0
+[  261.198158] Call Trace:
+[  261.199018]  ? dst_output+0x180/0x180
+[  261.205011]  ? save_trace+0x300/0x300
+[  261.209018]  ? ip_copy_metadata+0xb00/0xb00
+[  261.213034]  ? sched_clock_local+0xd4/0x140
+[  261.218158]  ? kill_l4proto+0x120/0x120 [nf_conntrack]
+[  261.223014]  ? rt_cpu_seq_stop+0x10/0x10
+[  261.227014]  ? find_held_lock+0x39/0x1c0
+[  261.233008]  ip_finish_output+0x51d/0xb50
+[  261.237006]  ? ip_fragment.constprop.56+0x220/0x220
+[  261.243011]  ? nf_ct_l4proto_register_one+0x5b0/0x5b0 [nf_conntrack]
+[  261.250152]  ? rcu_is_watching+0x77/0x120
+[  261.255010]  ? nf_nat_ipv4_out+0x1e/0x2b0 [nf_nat_ipv4]
+[  261.261033]  ? nf_hook_slow+0xb1/0x160
+[  261.265007]  ip_output+0x1c7/0x710
+[  261.269005]  ? ip_mc_output+0x13f0/0x13f0
+[  261.273002]  ? __local_bh_enable_ip+0xe9/0x1b0
+[  261.278152]  ? ip_fragment.constprop.56+0x220/0x220
+[  261.282996]  ? nf_hook_slow+0xb1/0x160
+[  261.287007]  raw_sendmsg+0x21f9/0x4420
+[  261.291008]  ? dst_output+0x180/0x180
+[  261.297003]  ? sched_clock_cpu+0x126/0x170
+[  261.301003]  ? find_held_lock+0x39/0x1c0
+[  261.306155]  ? stop_critical_timings+0x420/0x420
+[  261.311004]  ? check_flags.part.36+0x450/0x450
+[  261.315005]  ? _raw_spin_unlock_irq+0x29/0x40
+[  261.320995]  ? _raw_spin_unlock_irq+0x29/0x40
+[  261.326142]  ? cyc2ns_read_end+0x10/0x10
+[  261.330139]  ? raw_bind+0x280/0x280
+[  261.334138]  ? sched_clock_cpu+0x126/0x170
+[  261.338995]  ? check_flags.part.36+0x450/0x450
+[  261.342991]  ? __lock_acquire+0x4500/0x4500
+[  261.348994]  ? inet_sendmsg+0x11c/0x500
+[  261.352989]  ? dst_output+0x180/0x180
+[  261.357012]  inet_sendmsg+0x11c/0x500
+[ ... ]
+
+v2:
+ - clear skb->sk at reassembly routine.(Eric Dumarzet)
+
+Fixes: fa0f527358bd ("ip: use rb trees for IP frag queue.")
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv4/ip_fragment.c                  |    1 +
+ net/ipv6/netfilter/nf_conntrack_reasm.c |    1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -661,6 +661,7 @@ static int ip_frag_reasm(struct ipq *qp,
+ 			nextp = &fp->next;
+ 			fp->prev = NULL;
+ 			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
++			fp->sk = NULL;
+ 			head->data_len += fp->len;
+ 			head->len += fp->len;
+ 			if (head->ip_summed != fp->ip_summed)
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -454,6 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq,
+ 		else if (head->ip_summed == CHECKSUM_COMPLETE)
+ 			head->csum = csum_add(head->csum, fp->csum);
+ 		head->truesize += fp->truesize;
++		fp->sk = NULL;
+ 	}
+ 	sub_frag_mem_limit(fq->q.net, head->truesize);
+ 
diff --git a/queue-4.4/ip-process-in-order-fragments-efficiently.patch b/queue-4.4/ip-process-in-order-fragments-efficiently.patch
new file mode 100644
index 00000000000..33b5def8169
--- /dev/null
+++ b/queue-4.4/ip-process-in-order-fragments-efficiently.patch
@@ -0,0 +1,268 @@
+From a4fd284a1f8fd4b6c59aa59db2185b1e17c5c11c Mon Sep 17 00:00:00 2001
+From: Peter Oskolkov <posk@google.com>
+Date: Sat, 11 Aug 2018 20:27:25 +0000
+Subject: ip: process in-order fragments efficiently
+
+From: Peter Oskolkov <posk@google.com>
+
+commit a4fd284a1f8fd4b6c59aa59db2185b1e17c5c11c upstream.
+
+This patch changes the runtime behavior of IP defrag queue:
+incoming in-order fragments are added to the end of the current
+list/"run" of in-order fragments at the tail.
+
+On some workloads, UDP stream performance is substantially improved:
+
+RX: ./udp_stream -F 10 -T 2 -l 60
+TX: ./udp_stream -c -H <host> -F 10 -T 5 -l 60
+
+with this patchset applied on a 10Gbps receiver:
+
+  throughput=9524.18
+  throughput_units=Mbit/s
+
+upstream (net-next):
+
+  throughput=4608.93
+  throughput_units=Mbit/s
+
+Reported-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_fragment.c |    2 
+ net/ipv4/ip_fragment.c   |  110 +++++++++++++++++++++++++++++------------------
+ 2 files changed, 70 insertions(+), 42 deletions(-)
+
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -315,7 +315,7 @@ void inet_frag_destroy(struct inet_frag_
+ 			fp = xp;
+ 		} while (fp);
+ 	} else {
+-		sum_truesize = skb_rbtree_purge(&q->rb_fragments);
++		sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
+ 	}
+ 	sum = sum_truesize + f->qsize;
+ 
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -139,8 +139,8 @@ int ip_frag_mem(struct net *net)
+ 	return sum_frag_mem_limit(&net->ipv4.frags);
+ }
+ 
+-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+-			 struct net_device *dev);
++static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
++			struct sk_buff *prev_tail, struct net_device *dev);
+ 
+ struct ip4_create_arg {
+ 	struct iphdr *iph;
+@@ -271,7 +271,12 @@ static void ip_expire(unsigned long arg)
+ 		head = skb_rb_first(&qp->q.rb_fragments);
+ 		if (!head)
+ 			goto out;
+-		rb_erase(&head->rbnode, &qp->q.rb_fragments);
++		if (FRAG_CB(head)->next_frag)
++			rb_replace_node(&head->rbnode,
++					&FRAG_CB(head)->next_frag->rbnode,
++					&qp->q.rb_fragments);
++		else
++			rb_erase(&head->rbnode, &qp->q.rb_fragments);
+ 		memset(&head->rbnode, 0, sizeof(head->rbnode));
+ 		barrier();
+ 	}
+@@ -373,7 +378,7 @@ static int ip_frag_reinit(struct ipq *qp
+ 		return -ETIMEDOUT;
+ 	}
+ 
+-	sum_truesize = skb_rbtree_purge(&qp->q.rb_fragments);
++	sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
+ 	sub_frag_mem_limit(qp->q.net, sum_truesize);
+ 
+ 	qp->q.flags = 0;
+@@ -382,6 +387,7 @@ static int ip_frag_reinit(struct ipq *qp
+ 	qp->q.fragments = NULL;
+ 	qp->q.rb_fragments = RB_ROOT;
+ 	qp->q.fragments_tail = NULL;
++	qp->q.last_run_head = NULL;
+ 	qp->iif = 0;
+ 	qp->ecn = 0;
+ 
+@@ -393,7 +399,7 @@ static int ip_frag_queue(struct ipq *qp,
+ {
+ 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ 	struct rb_node **rbn, *parent;
+-	struct sk_buff *skb1;
++	struct sk_buff *skb1, *prev_tail;
+ 	struct net_device *dev;
+ 	unsigned int fragsize;
+ 	int flags, offset;
+@@ -471,38 +477,41 @@ static int ip_frag_queue(struct ipq *qp,
+ 	 */
+ 
+ 	/* Find out where to put this fragment.  */
+-	skb1 = qp->q.fragments_tail;
+-	if (!skb1) {
+-		/* This is the first fragment we've received. */
+-		rb_link_node(&skb->rbnode, NULL, &qp->q.rb_fragments.rb_node);
+-		qp->q.fragments_tail = skb;
+-	} else if ((FRAG_CB(skb1)->offset + skb1->len) < end) {
+-		/* This is the common/special case: skb goes to the end. */
++	prev_tail = qp->q.fragments_tail;
++	if (!prev_tail)
++		ip4_frag_create_run(&qp->q, skb);  /* First fragment. */
++	else if (FRAG_CB(prev_tail)->offset + prev_tail->len < end) {
++		/* This is the common case: skb goes to the end. */
+ 		/* Detect and discard overlaps. */
+-		if (offset < (FRAG_CB(skb1)->offset + skb1->len))
++		if (offset < FRAG_CB(prev_tail)->offset + prev_tail->len)
+ 			goto discard_qp;
+-		/* Insert after skb1. */
+-		rb_link_node(&skb->rbnode, &skb1->rbnode, &skb1->rbnode.rb_right);
+-		qp->q.fragments_tail = skb;
++		if (offset == FRAG_CB(prev_tail)->offset + prev_tail->len)
++			ip4_frag_append_to_last_run(&qp->q, skb);
++		else
++			ip4_frag_create_run(&qp->q, skb);
+ 	} else {
+-		/* Binary search. Note that skb can become the first fragment, but
+-		 * not the last (covered above). */
++		/* Binary search. Note that skb can become the first fragment,
++		 * but not the last (covered above).
++		 */
+ 		rbn = &qp->q.rb_fragments.rb_node;
+ 		do {
+ 			parent = *rbn;
+ 			skb1 = rb_to_skb(parent);
+ 			if (end <= FRAG_CB(skb1)->offset)
+ 				rbn = &parent->rb_left;
+-			else if (offset >= FRAG_CB(skb1)->offset + skb1->len)
++			else if (offset >= FRAG_CB(skb1)->offset +
++						FRAG_CB(skb1)->frag_run_len)
+ 				rbn = &parent->rb_right;
+ 			else /* Found an overlap with skb1. */
+ 				goto discard_qp;
+ 		} while (*rbn);
+ 		/* Here we have parent properly set, and rbn pointing to
+-		 * one of its NULL left/right children. Insert skb. */
++		 * one of its NULL left/right children. Insert skb.
++		 */
++		ip4_frag_init_run(skb);
+ 		rb_link_node(&skb->rbnode, parent, rbn);
++		rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
+ 	}
+-	rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
+ 
+ 	if (dev) {
+ 		qp->iif = dev->ifindex;
+@@ -531,7 +540,7 @@ static int ip_frag_queue(struct ipq *qp,
+ 		unsigned long orefdst = skb->_skb_refdst;
+ 
+ 		skb->_skb_refdst = 0UL;
+-		err = ip_frag_reasm(qp, skb, dev);
++		err = ip_frag_reasm(qp, skb, prev_tail, dev);
+ 		skb->_skb_refdst = orefdst;
+ 		return err;
+ 	}
+@@ -550,7 +559,7 @@ err:
+ 
+ /* Build a new IP datagram from all its fragments. */
+ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+-			 struct net_device *dev)
++			 struct sk_buff *prev_tail, struct net_device *dev)
+ {
+ 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ 	struct iphdr *iph;
+@@ -575,10 +584,16 @@ static int ip_frag_reasm(struct ipq *qp,
+ 		if (!fp)
+ 			goto out_nomem;
+ 
+-		rb_replace_node(&skb->rbnode, &fp->rbnode, &qp->q.rb_fragments);
++		FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
++		if (RB_EMPTY_NODE(&skb->rbnode))
++			FRAG_CB(prev_tail)->next_frag = fp;
++		else
++			rb_replace_node(&skb->rbnode, &fp->rbnode,
++					&qp->q.rb_fragments);
+ 		if (qp->q.fragments_tail == skb)
+ 			qp->q.fragments_tail = fp;
+ 		skb_morph(skb, head);
++		FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+ 		rb_replace_node(&head->rbnode, &skb->rbnode,
+ 				&qp->q.rb_fragments);
+ 		consume_skb(head);
+@@ -614,7 +629,7 @@ static int ip_frag_reasm(struct ipq *qp,
+ 		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ 			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ 		clone->len = clone->data_len = head->data_len - plen;
+-		skb->truesize += clone->truesize;
++		head->truesize += clone->truesize;
+ 		clone->csum = 0;
+ 		clone->ip_summed = head->ip_summed;
+ 		add_frag_mem_limit(qp->q.net, clone->truesize);
+@@ -627,24 +642,36 @@ static int ip_frag_reasm(struct ipq *qp,
+ 	skb_push(head, head->data - skb_network_header(head));
+ 
+ 	/* Traverse the tree in order, to build frag_list. */
++	fp = FRAG_CB(head)->next_frag;
+ 	rbn = rb_next(&head->rbnode);
+ 	rb_erase(&head->rbnode, &qp->q.rb_fragments);
+-	while (rbn) {
+-		struct rb_node *rbnext = rb_next(rbn);
+-		fp = rb_to_skb(rbn);
+-		rb_erase(rbn, &qp->q.rb_fragments);
+-		rbn = rbnext;
+-		*nextp = fp;
+-		nextp = &fp->next;
+-		fp->prev = NULL;
+-		memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+-		head->data_len += fp->len;
+-		head->len += fp->len;
+-		if (head->ip_summed != fp->ip_summed)
+-			head->ip_summed = CHECKSUM_NONE;
+-		else if (head->ip_summed == CHECKSUM_COMPLETE)
+-			head->csum = csum_add(head->csum, fp->csum);
+-		head->truesize += fp->truesize;
++	while (rbn || fp) {
++		/* fp points to the next sk_buff in the current run;
++		 * rbn points to the next run.
++		 */
++		/* Go through the current run. */
++		while (fp) {
++			*nextp = fp;
++			nextp = &fp->next;
++			fp->prev = NULL;
++			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
++			head->data_len += fp->len;
++			head->len += fp->len;
++			if (head->ip_summed != fp->ip_summed)
++				head->ip_summed = CHECKSUM_NONE;
++			else if (head->ip_summed == CHECKSUM_COMPLETE)
++				head->csum = csum_add(head->csum, fp->csum);
++			head->truesize += fp->truesize;
++			fp = FRAG_CB(fp)->next_frag;
++		}
++		/* Move to the next run. */
++		if (rbn) {
++			struct rb_node *rbnext = rb_next(rbn);
++
++			fp = rb_to_skb(rbn);
++			rb_erase(rbn, &qp->q.rb_fragments);
++			rbn = rbnext;
++		}
+ 	}
+ 	sub_frag_mem_limit(qp->q.net, head->truesize);
+ 
+@@ -680,6 +707,7 @@ static int ip_frag_reasm(struct ipq *qp,
+ 	qp->q.fragments = NULL;
+ 	qp->q.rb_fragments = RB_ROOT;
+ 	qp->q.fragments_tail = NULL;
++	qp->q.last_run_head = NULL;
+ 	return 0;
+ 
+ out_nomem:
diff --git a/queue-4.4/ip-use-rb-trees-for-ip-frag-queue.patch b/queue-4.4/ip-use-rb-trees-for-ip-frag-queue.patch
new file mode 100644
index 00000000000..cd5b3a0d01b
--- /dev/null
+++ b/queue-4.4/ip-use-rb-trees-for-ip-frag-queue.patch
@@ -0,0 +1,462 @@
+From fa0f527358bd900ef92f925878ed6bfbd51305cc Mon Sep 17 00:00:00 2001
+From: Peter Oskolkov <posk@google.com>
+Date: Thu, 2 Aug 2018 23:34:39 +0000
+Subject: ip: use rb trees for IP frag queue.
+
+From: Peter Oskolkov <posk@google.com>
+
+commit fa0f527358bd900ef92f925878ed6bfbd51305cc upstream.
+
+Similar to TCP OOO RX queue, it makes sense to use rb trees to store
+IP fragments, so that OOO fragments are inserted faster.
+
+Tested:
+
+- a follow-up patch contains a rather comprehensive ip defrag
+  self-test (functional)
+- ran neper `udp_stream -c -H <host> -F 100 -l 300 -T 20`:
+    netstat --statistics
+    Ip:
+        282078937 total packets received
+        0 forwarded
+        0 incoming packets discarded
+        946760 incoming packets delivered
+        18743456 requests sent out
+        101 fragments dropped after timeout
+        282077129 reassemblies required
+        944952 packets reassembled ok
+        262734239 packet reassembles failed
+   (The numbers/stats above are somewhat better re:
+    reassemblies vs a kernel without this patchset. More
+    comprehensive performance testing TBD).
+
+Reported-by: Jann Horn <jannh@google.com>
+Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi>
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/skbuff.h                  |    2 
+ include/net/inet_frag.h                 |    3 
+ net/ipv4/inet_fragment.c                |   14 +-
+ net/ipv4/ip_fragment.c                  |  190 +++++++++++++++++---------------
+ net/ipv6/netfilter/nf_conntrack_reasm.c |    1 
+ net/ipv6/reassembly.c                   |    1 
+ 6 files changed, 120 insertions(+), 91 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -556,7 +556,7 @@ struct sk_buff {
+ 				struct skb_mstamp skb_mstamp;
+ 			};
+ 		};
+-		struct rb_node	rbnode; /* used in netem & tcp stack */
++		struct rb_node	rbnode; /* used in netem, ip4 defrag, and tcp stack */
+ 	};
+ 	struct sock		*sk;
+ 	struct net_device	*dev;
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -45,7 +45,8 @@ struct inet_frag_queue {
+ 	struct timer_list	timer;
+ 	struct hlist_node	list;
+ 	atomic_t		refcnt;
+-	struct sk_buff		*fragments;
++	struct sk_buff		*fragments;  /* Used in IPv6. */
++	struct rb_root		rb_fragments; /* Used in IPv4. */
+ 	struct sk_buff		*fragments_tail;
+ 	ktime_t			stamp;
+ 	int			len;
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -306,12 +306,16 @@ void inet_frag_destroy(struct inet_frag_
+ 	/* Release all fragment data. */
+ 	fp = q->fragments;
+ 	nf = q->net;
+-	while (fp) {
+-		struct sk_buff *xp = fp->next;
++	if (fp) {
++		do {
++			struct sk_buff *xp = fp->next;
+ 
+-		sum_truesize += fp->truesize;
+-		frag_kfree_skb(nf, f, fp);
+-		fp = xp;
++			sum_truesize += fp->truesize;
++			kfree_skb(fp);
++			fp = xp;
++		} while (fp);
++	} else {
++		sum_truesize = skb_rbtree_purge(&q->rb_fragments);
+ 	}
+ 	sum = sum_truesize + f->qsize;
+ 
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -194,7 +194,7 @@ static bool frag_expire_skip_icmp(u32 us
+  */
+ static void ip_expire(unsigned long arg)
+ {
+-	struct sk_buff *clone, *head;
++	struct sk_buff *head = NULL;
+ 	const struct iphdr *iph;
+ 	struct net *net;
+ 	struct ipq *qp;
+@@ -211,14 +211,31 @@ static void ip_expire(unsigned long arg)
+ 
+ 	ipq_kill(qp);
+ 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+-
+-	head = qp->q.fragments;
+-
+ 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+ 
+-	if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
++	if (!qp->q.flags & INET_FRAG_FIRST_IN)
+ 		goto out;
+ 
++	/* sk_buff::dev and sk_buff::rbnode are unionized. So we
++	 * pull the head out of the tree in order to be able to
++	 * deal with head->dev.
++	 */
++	if (qp->q.fragments) {
++		head = qp->q.fragments;
++		qp->q.fragments = head->next;
++	} else {
++		head = skb_rb_first(&qp->q.rb_fragments);
++		if (!head)
++			goto out;
++		rb_erase(&head->rbnode, &qp->q.rb_fragments);
++		memset(&head->rbnode, 0, sizeof(head->rbnode));
++		barrier();
++	}
++	if (head == qp->q.fragments_tail)
++		qp->q.fragments_tail = NULL;
++
++	sub_frag_mem_limit(qp->q.net, head->truesize);
++
+ 	head->dev = dev_get_by_index_rcu(net, qp->iif);
+ 	if (!head->dev)
+ 		goto out;
+@@ -237,20 +254,17 @@ static void ip_expire(unsigned long arg)
+ 	    (skb_rtable(head)->rt_type != RTN_LOCAL))
+ 		goto out;
+ 
+-	clone = skb_clone(head, GFP_ATOMIC);
+-
+ 	/* Send an ICMP "Fragment Reassembly Timeout" message. */
+-	if (clone) {
+-		spin_unlock(&qp->q.lock);
+-		icmp_send(clone, ICMP_TIME_EXCEEDED,
+-			  ICMP_EXC_FRAGTIME, 0);
+-		consume_skb(clone);
+-		goto out_rcu_unlock;
+-	}
++	spin_unlock(&qp->q.lock);
++	icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
++	goto out_rcu_unlock;
++
+ out:
+ 	spin_unlock(&qp->q.lock);
+ out_rcu_unlock:
+ 	rcu_read_unlock();
++	if (head)
++		kfree_skb(head);
+ 	ipq_put(qp);
+ }
+ 
+@@ -294,7 +308,7 @@ static int ip_frag_too_far(struct ipq *q
+ 	end = atomic_inc_return(&peer->rid);
+ 	qp->rid = end;
+ 
+-	rc = qp->q.fragments && (end - start) > max;
++	rc = qp->q.fragments_tail && (end - start) > max;
+ 
+ 	if (rc) {
+ 		struct net *net;
+@@ -308,7 +322,6 @@ static int ip_frag_too_far(struct ipq *q
+ 
+ static int ip_frag_reinit(struct ipq *qp)
+ {
+-	struct sk_buff *fp;
+ 	unsigned int sum_truesize = 0;
+ 
+ 	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
+@@ -316,20 +329,14 @@ static int ip_frag_reinit(struct ipq *qp
+ 		return -ETIMEDOUT;
+ 	}
+ 
+-	fp = qp->q.fragments;
+-	do {
+-		struct sk_buff *xp = fp->next;
+-
+-		sum_truesize += fp->truesize;
+-		kfree_skb(fp);
+-		fp = xp;
+-	} while (fp);
++	sum_truesize = skb_rbtree_purge(&qp->q.rb_fragments);
+ 	sub_frag_mem_limit(qp->q.net, sum_truesize);
+ 
+ 	qp->q.flags = 0;
+ 	qp->q.len = 0;
+ 	qp->q.meat = 0;
+ 	qp->q.fragments = NULL;
++	qp->q.rb_fragments = RB_ROOT;
+ 	qp->q.fragments_tail = NULL;
+ 	qp->iif = 0;
+ 	qp->ecn = 0;
+@@ -341,7 +348,8 @@ static int ip_frag_reinit(struct ipq *qp
+ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
+ 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+-	struct sk_buff *prev, *next;
++	struct rb_node **rbn, *parent;
++	struct sk_buff *skb1;
+ 	struct net_device *dev;
+ 	unsigned int fragsize;
+ 	int flags, offset;
+@@ -404,56 +412,60 @@ static int ip_frag_queue(struct ipq *qp,
+ 	if (err)
+ 		goto err;
+ 
+-	/* Find out which fragments are in front and at the back of us
+-	 * in the chain of fragments so far.  We must know where to put
+-	 * this fragment, right?
+-	 */
+-	prev = qp->q.fragments_tail;
+-	if (!prev || FRAG_CB(prev)->offset < offset) {
+-		next = NULL;
+-		goto found;
+-	}
+-	prev = NULL;
+-	for (next = qp->q.fragments; next != NULL; next = next->next) {
+-		if (FRAG_CB(next)->offset >= offset)
+-			break;	/* bingo! */
+-		prev = next;
+-	}
++	/* Note : skb->rbnode and skb->dev share the same location. */
++	dev = skb->dev;
++	/* Makes sure compiler wont do silly aliasing games */
++	barrier();
+ 
+-found:
+ 	/* RFC5722, Section 4, amended by Errata ID : 3089
+ 	 *                          When reassembling an IPv6 datagram, if
+ 	 *   one or more its constituent fragments is determined to be an
+ 	 *   overlapping fragment, the entire datagram (and any constituent
+ 	 *   fragments) MUST be silently discarded.
+ 	 *
+-	 * We do the same here for IPv4.
++	 * We do the same here for IPv4 (and increment an snmp counter).
+ 	 */
+-	/* Is there an overlap with the previous fragment? */
+-	if (prev &&
+-	    (FRAG_CB(prev)->offset + prev->len) > offset)
+-		goto discard_qp;
+-
+-	/* Is there an overlap with the next fragment? */
+-	if (next && FRAG_CB(next)->offset < end)
+-		goto discard_qp;
+-
+-	FRAG_CB(skb)->offset = offset;
+ 
+-	/* Insert this fragment in the chain of fragments. */
+-	skb->next = next;
+-	if (!next)
++	/* Find out where to put this fragment.  */
++	skb1 = qp->q.fragments_tail;
++	if (!skb1) {
++		/* This is the first fragment we've received. */
++		rb_link_node(&skb->rbnode, NULL, &qp->q.rb_fragments.rb_node);
++		qp->q.fragments_tail = skb;
++	} else if ((FRAG_CB(skb1)->offset + skb1->len) < end) {
++		/* This is the common/special case: skb goes to the end. */
++		/* Detect and discard overlaps. */
++		if (offset < (FRAG_CB(skb1)->offset + skb1->len))
++			goto discard_qp;
++		/* Insert after skb1. */
++		rb_link_node(&skb->rbnode, &skb1->rbnode, &skb1->rbnode.rb_right);
+ 		qp->q.fragments_tail = skb;
+-	if (prev)
+-		prev->next = skb;
+-	else
+-		qp->q.fragments = skb;
++	} else {
++		/* Binary search. Note that skb can become the first fragment, but
++		 * not the last (covered above). */
++		rbn = &qp->q.rb_fragments.rb_node;
++		do {
++			parent = *rbn;
++			skb1 = rb_to_skb(parent);
++			if (end <= FRAG_CB(skb1)->offset)
++				rbn = &parent->rb_left;
++			else if (offset >= FRAG_CB(skb1)->offset + skb1->len)
++				rbn = &parent->rb_right;
++			else /* Found an overlap with skb1. */
++				goto discard_qp;
++		} while (*rbn);
++		/* Here we have parent properly set, and rbn pointing to
++		 * one of its NULL left/right children. Insert skb. */
++		rb_link_node(&skb->rbnode, parent, rbn);
++	}
++	rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
+ 
+-	dev = skb->dev;
+ 	if (dev) {
+ 		qp->iif = dev->ifindex;
+ 		skb->dev = NULL;
+ 	}
++	FRAG_CB(skb)->offset = offset;
++
+ 	qp->q.stamp = skb->tstamp;
+ 	qp->q.meat += skb->len;
+ 	qp->ecn |= ecn;
+@@ -475,7 +487,7 @@ found:
+ 		unsigned long orefdst = skb->_skb_refdst;
+ 
+ 		skb->_skb_refdst = 0UL;
+-		err = ip_frag_reasm(qp, prev, dev);
++		err = ip_frag_reasm(qp, skb, dev);
+ 		skb->_skb_refdst = orefdst;
+ 		return err;
+ 	}
+@@ -492,15 +504,15 @@ err:
+ 	return err;
+ }
+ 
+-
+ /* Build a new IP datagram from all its fragments. */
+-
+-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
++static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ 			 struct net_device *dev)
+ {
+ 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ 	struct iphdr *iph;
+-	struct sk_buff *fp, *head = qp->q.fragments;
++	struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
++	struct sk_buff **nextp; /* To build frag_list. */
++	struct rb_node *rbn;
+ 	int len;
+ 	int ihlen;
+ 	int err;
+@@ -514,25 +526,21 @@ static int ip_frag_reasm(struct ipq *qp,
+ 		goto out_fail;
+ 	}
+ 	/* Make the one we just received the head. */
+-	if (prev) {
+-		head = prev->next;
+-		fp = skb_clone(head, GFP_ATOMIC);
++	if (head != skb) {
++		fp = skb_clone(skb, GFP_ATOMIC);
+ 		if (!fp)
+ 			goto out_nomem;
+ 
+-		fp->next = head->next;
+-		if (!fp->next)
++		rb_replace_node(&skb->rbnode, &fp->rbnode, &qp->q.rb_fragments);
++		if (qp->q.fragments_tail == skb)
+ 			qp->q.fragments_tail = fp;
+-		prev->next = fp;
+-
+-		skb_morph(head, qp->q.fragments);
+-		head->next = qp->q.fragments->next;
+-
+-		consume_skb(qp->q.fragments);
+-		qp->q.fragments = head;
++		skb_morph(skb, head);
++		rb_replace_node(&head->rbnode, &skb->rbnode,
++				&qp->q.rb_fragments);
++		consume_skb(head);
++		head = skb;
+ 	}
+ 
+-	WARN_ON(!head);
+ 	WARN_ON(FRAG_CB(head)->offset != 0);
+ 
+ 	/* Allocate a new buffer for the datagram. */
+@@ -557,24 +565,35 @@ static int ip_frag_reasm(struct ipq *qp,
+ 		clone = alloc_skb(0, GFP_ATOMIC);
+ 		if (!clone)
+ 			goto out_nomem;
+-		clone->next = head->next;
+-		head->next = clone;
+ 		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ 		skb_frag_list_init(head);
+ 		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ 			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ 		clone->len = clone->data_len = head->data_len - plen;
+-		head->data_len -= clone->len;
+-		head->len -= clone->len;
++		skb->truesize += clone->truesize;
+ 		clone->csum = 0;
+ 		clone->ip_summed = head->ip_summed;
+ 		add_frag_mem_limit(qp->q.net, clone->truesize);
++		skb_shinfo(head)->frag_list = clone;
++		nextp = &clone->next;
++	} else {
++		nextp = &skb_shinfo(head)->frag_list;
+ 	}
+ 
+-	skb_shinfo(head)->frag_list = head->next;
+ 	skb_push(head, head->data - skb_network_header(head));
+ 
+-	for (fp=head->next; fp; fp = fp->next) {
++	/* Traverse the tree in order, to build frag_list. */
++	rbn = rb_next(&head->rbnode);
++	rb_erase(&head->rbnode, &qp->q.rb_fragments);
++	while (rbn) {
++		struct rb_node *rbnext = rb_next(rbn);
++		fp = rb_to_skb(rbn);
++		rb_erase(rbn, &qp->q.rb_fragments);
++		rbn = rbnext;
++		*nextp = fp;
++		nextp = &fp->next;
++		fp->prev = NULL;
++		memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+ 		head->data_len += fp->len;
+ 		head->len += fp->len;
+ 		if (head->ip_summed != fp->ip_summed)
+@@ -585,7 +604,9 @@ static int ip_frag_reasm(struct ipq *qp,
+ 	}
+ 	sub_frag_mem_limit(qp->q.net, head->truesize);
+ 
++	*nextp = NULL;
+ 	head->next = NULL;
++	head->prev = NULL;
+ 	head->dev = dev;
+ 	head->tstamp = qp->q.stamp;
+ 	IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
+@@ -613,6 +634,7 @@ static int ip_frag_reasm(struct ipq *qp,
+ 
+ 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
+ 	qp->q.fragments = NULL;
++	qp->q.rb_fragments = RB_ROOT;
+ 	qp->q.fragments_tail = NULL;
+ 	return 0;
+ 
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -472,6 +472,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq,
+ 					  head->csum);
+ 
+ 	fq->q.fragments = NULL;
++	fq->q.rb_fragments = RB_ROOT;
+ 	fq->q.fragments_tail = NULL;
+ 
+ 	/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -499,6 +499,7 @@ static int ip6_frag_reasm(struct frag_qu
+ 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+ 	rcu_read_unlock();
+ 	fq->q.fragments = NULL;
++	fq->q.rb_fragments = RB_ROOT;
+ 	fq->q.fragments_tail = NULL;
+ 	return 1;
+ 
diff --git a/queue-4.4/ipv4-frags-precedence-bug-in-ip_expire.patch b/queue-4.4/ipv4-frags-precedence-bug-in-ip_expire.patch
new file mode 100644
index 00000000000..a68b23b6703
--- /dev/null
+++ b/queue-4.4/ipv4-frags-precedence-bug-in-ip_expire.patch
@@ -0,0 +1,33 @@
+From 70837ffe3085c9a91488b52ca13ac84424da1042 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Mon, 6 Aug 2018 22:17:35 +0300
+Subject: ipv4: frags: precedence bug in ip_expire()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 70837ffe3085c9a91488b52ca13ac84424da1042 upstream.
+
+We accidentally removed the parentheses here, but they are required
+because '!' has higher precedence than '&'.
+
+Fixes: fa0f527358bd ("ip: use rb trees for IP frag queue.")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv4/ip_fragment.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -257,7 +257,7 @@ static void ip_expire(unsigned long arg)
+ 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+ 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+ 
+-	if (!qp->q.flags & INET_FRAG_FIRST_IN)
++	if (!(qp->q.flags & INET_FRAG_FIRST_IN))
+ 		goto out;
+ 
+ 	/* sk_buff::dev and sk_buff::rbnode are unionized. So we
diff --git a/queue-4.4/ipv6-defrag-drop-non-last-frags-smaller-than-min-mtu.patch b/queue-4.4/ipv6-defrag-drop-non-last-frags-smaller-than-min-mtu.patch
new file mode 100644
index 00000000000..a054cde03f7
--- /dev/null
+++ b/queue-4.4/ipv6-defrag-drop-non-last-frags-smaller-than-min-mtu.patch
@@ -0,0 +1,55 @@
+From 0ed4229b08c13c84a3c301a08defdc9e7f4467e6 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 3 Aug 2018 02:22:20 +0200
+Subject: ipv6: defrag: drop non-last frags smaller than min mtu
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 0ed4229b08c13c84a3c301a08defdc9e7f4467e6 upstream.
+
+don't bother with pathological cases, they only waste cycles.
+IPv6 requires a minimum MTU of 1280 so we should never see fragments
+smaller than this (except last frag).
+
+v3: don't use awkward "-offset + len"
+v2: drop IPv4 part, which added same check w. IPV4_MIN_MTU (68).
+    There were concerns that there could be even smaller frags
+    generated by intermediate nodes, e.g. on radio networks.
+
+Cc: Peter Oskolkov <posk@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/netfilter/nf_conntrack_reasm.c |    4 ++++
+ net/ipv6/reassembly.c                   |    4 ++++
+ 2 files changed, 8 insertions(+)
+
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -602,6 +602,10 @@ struct sk_buff *nf_ct_frag6_gather(struc
+ 	hdr = ipv6_hdr(clone);
+ 	fhdr = (struct frag_hdr *)skb_transport_header(clone);
+ 
++	if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
++	    fhdr->frag_off & htons(IP6_MF))
++		goto ret_orig;
++
+ 	skb_orphan(skb);
+ 	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
+ 		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -549,6 +549,10 @@ static int ipv6_frag_rcv(struct sk_buff
+ 		return 1;
+ 	}
+ 
++	if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
++	    fhdr->frag_off & htons(IP6_MF))
++		goto fail_hdr;
++
+ 	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
+ 		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ 	if (fq) {
diff --git a/queue-4.4/net-ipv4-do-not-handle-duplicate-fragments-as-overlapping.patch b/queue-4.4/net-ipv4-do-not-handle-duplicate-fragments-as-overlapping.patch
new file mode 100644
index 00000000000..6094b9c9f64
--- /dev/null
+++ b/queue-4.4/net-ipv4-do-not-handle-duplicate-fragments-as-overlapping.patch
@@ -0,0 +1,83 @@
+From ade446403bfb79d3528d56071a84b15351a139ad Mon Sep 17 00:00:00 2001
+From: Michal Kubecek <mkubecek@suse.cz>
+Date: Thu, 13 Dec 2018 17:23:32 +0100
+Subject: net: ipv4: do not handle duplicate fragments as overlapping
+
+From: Michal Kubecek <mkubecek@suse.cz>
+
+commit ade446403bfb79d3528d56071a84b15351a139ad upstream.
+
+Since commit 7969e5c40dfd ("ip: discard IPv4 datagrams with overlapping
+segments.") IPv4 reassembly code drops the whole queue whenever an
+overlapping fragment is received. However, the test is written in a way
+which detects duplicate fragments as overlapping so that in environments
+with many duplicate packets, fragmented packets may be undeliverable.
+
+Add an extra test and for (potentially) duplicate fragment, only drop the
+new fragment rather than the whole queue. Only starting offset and length
+are checked, not the contents of the fragments as that would be too
+expensive. For similar reason, linear list ("run") of a rbtree node is not
+iterated, we only check if the new fragment is a subset of the interval
+covered by existing consecutive fragments.
+
+v2: instead of an exact check iterating through linear list of an rbtree
+node, only check if the new fragment is subset of the "run" (suggested
+by Eric Dumazet)
+
+Fixes: 7969e5c40dfd ("ip: discard IPv4 datagrams with overlapping segments.")
+Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv4/ip_fragment.c |   18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -400,10 +400,10 @@ static int ip_frag_queue(struct ipq *qp,
+ 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ 	struct rb_node **rbn, *parent;
+ 	struct sk_buff *skb1, *prev_tail;
++	int ihl, end, skb1_run_end;
+ 	struct net_device *dev;
+ 	unsigned int fragsize;
+ 	int flags, offset;
+-	int ihl, end;
+ 	int err = -ENOENT;
+ 	u8 ecn;
+ 
+@@ -473,7 +473,9 @@ static int ip_frag_queue(struct ipq *qp,
+ 	 *   overlapping fragment, the entire datagram (and any constituent
+ 	 *   fragments) MUST be silently discarded.
+ 	 *
+-	 * We do the same here for IPv4 (and increment an snmp counter).
++	 * We do the same here for IPv4 (and increment an snmp counter) but
++	 * we do not want to drop the whole queue in response to a duplicate
++	 * fragment.
+ 	 */
+ 
+ 	/* Find out where to put this fragment.  */
+@@ -497,13 +499,17 @@ static int ip_frag_queue(struct ipq *qp,
+ 		do {
+ 			parent = *rbn;
+ 			skb1 = rb_to_skb(parent);
++			skb1_run_end = FRAG_CB(skb1)->offset +
++				       FRAG_CB(skb1)->frag_run_len;
+ 			if (end <= FRAG_CB(skb1)->offset)
+ 				rbn = &parent->rb_left;
+-			else if (offset >= FRAG_CB(skb1)->offset +
+-						FRAG_CB(skb1)->frag_run_len)
++			else if (offset >= skb1_run_end)
+ 				rbn = &parent->rb_right;
+-			else /* Found an overlap with skb1. */
+-				goto discard_qp;
++			else if (offset >= FRAG_CB(skb1)->offset &&
++				 end <= skb1_run_end)
++				goto err; /* No new data, potential duplicate */
++			else
++				goto discard_qp; /* Found an overlap */
+ 		} while (*rbn);
+ 		/* Here we have parent properly set, and rbn pointing to
+ 		 * one of its NULL left/right children. Insert skb.
diff --git a/queue-4.4/net-modify-skb_rbtree_purge-to-return-the-truesize-of-all-purged-skbs.patch b/queue-4.4/net-modify-skb_rbtree_purge-to-return-the-truesize-of-all-purged-skbs.patch
new file mode 100644
index 00000000000..17fe45cc0f1
--- /dev/null
+++ b/queue-4.4/net-modify-skb_rbtree_purge-to-return-the-truesize-of-all-purged-skbs.patch
@@ -0,0 +1,66 @@
+From 385114dec8a49b5e5945e77ba7de6356106713f4 Mon Sep 17 00:00:00 2001
+From: Peter Oskolkov <posk@google.com>
+Date: Thu, 2 Aug 2018 23:34:38 +0000
+Subject: net: modify skb_rbtree_purge to return the truesize of all purged skbs.
+
+From: Peter Oskolkov <posk@google.com>
+
+commit 385114dec8a49b5e5945e77ba7de6356106713f4 upstream.
+
+Tested: see the next patch is the series.
+
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Mao Wenan <maowenan@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/skbuff.h |    2 +-
+ net/core/skbuff.c      |    6 +++++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2273,7 +2273,7 @@ static inline void __skb_queue_purge(str
+ 		kfree_skb(skb);
+ }
+ 
+-void skb_rbtree_purge(struct rb_root *root);
++unsigned int skb_rbtree_purge(struct rb_root *root);
+ 
+ void *netdev_alloc_frag(unsigned int fragsz);
+ 
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -2380,23 +2380,27 @@ EXPORT_SYMBOL(skb_queue_purge);
+ /**
+  *	skb_rbtree_purge - empty a skb rbtree
+  *	@root: root of the rbtree to empty
++ *	Return value: the sum of truesizes of all purged skbs.
+  *
+  *	Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
+  *	the list and one reference dropped. This function does not take
+  *	any lock. Synchronization should be handled by the caller (e.g., TCP
+  *	out-of-order queue is protected by the socket lock).
+  */
+-void skb_rbtree_purge(struct rb_root *root)
++unsigned int skb_rbtree_purge(struct rb_root *root)
+ {
+ 	struct rb_node *p = rb_first(root);
++	unsigned int sum = 0;
+ 
+ 	while (p) {
+ 		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+ 
+ 		p = rb_next(p);
+ 		rb_erase(&skb->rbnode, root);
++		sum += skb->truesize;
+ 		kfree_skb(skb);
+ 	}
++	return sum;
+ }
+ 
+ /**
diff --git a/queue-4.4/series b/queue-4.4/series
index 4f4710deb1c..6f645796b98 100644
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -52,3 +52,14 @@ mm-oom-fix-use-after-free-in-oom_kill_process.patch
 cifs-always-resolve-hostname-before-reconnecting.patch
 drivers-core-remove-glue-dirs-from-sysfs-earlier.patch
 mm-migrate-don-t-rely-on-__pagemovable-of-newpage-after-unlocking-it.patch
+fs-don-t-scan-the-inode-cache-before-sb_born-is-set.patch
+ip-discard-ipv4-datagrams-with-overlapping-segments.patch
+net-modify-skb_rbtree_purge-to-return-the-truesize-of-all-purged-skbs.patch
+inet-frags-get-rif-of-inet_frag_evicting.patch
+ip-use-rb-trees-for-ip-frag-queue.patch
+ipv6-defrag-drop-non-last-frags-smaller-than-min-mtu.patch
+ip-add-helpers-to-process-in-order-fragments-faster.patch
+ip-process-in-order-fragments-efficiently.patch
+net-ipv4-do-not-handle-duplicate-fragments-as-overlapping.patch
+ip-frags-fix-crash-in-ip_do_fragment.patch
+ipv4-frags-precedence-bug-in-ip_expire.patch