From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 30 Apr 2019 10:56:25 +0000 (+0200)
Subject: 4.9-stable patches
X-Git-Tag: v4.9.172~5
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7e3ded1b5cb3616c2e9567fa6d5c5fa82a6dcb63;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	ipv6-frags-fix-a-lockdep-false-positive.patch
	ipv6-remove-dependency-of-nf_defrag_ipv6-on-ipv6-module.patch
	net-ip-defrag-encapsulate-rbtree-defrag-code-into-callable-functions.patch
	net-ip6-defrag-use-rbtrees-for-ipv6-defrag.patch
	net-ip6-defrag-use-rbtrees-in-nf_conntrack_reasm.c.patch
---

diff --git a/queue-4.9/ipv6-frags-fix-a-lockdep-false-positive.patch b/queue-4.9/ipv6-frags-fix-a-lockdep-false-positive.patch
new file mode 100644
index 00000000000..2d2ed145ccd
--- /dev/null
+++ b/queue-4.9/ipv6-frags-fix-a-lockdep-false-positive.patch
@@ -0,0 +1,103 @@
+From foo@baz Tue 30 Apr 2019 12:43:33 PM CEST
+From: Peter Oskolkov <posk@google.com>
+Date: Fri, 26 Apr 2019 08:41:04 -0700
+Subject: ipv6: frags: fix a lockdep false positive
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable@vger.kernel.org, netdev@vger.kernel.org
+Cc: Peter Oskolkov <posk@posk.io>, David Miller <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Sasha Levin <sashal@kernel.org>, Captain Wiggum <captwiggum@gmail.com>, Lars Persson <lists@bofh.nu>
+Message-ID: <20190426154108.52277-2-posk@google.com>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 415787d7799f4fccbe8d49cb0b8e5811be6b0389 ]
+
+lockdep does not know that the locks used by IPv4 defrag
+and IPv6 reassembly units are of different classes.
+
+It complains because of following chains :
+
+1) sch_direct_xmit()        (lock txq->_xmit_lock)
+    dev_hard_start_xmit()
+     xmit_one()
+      dev_queue_xmit_nit()
+       packet_rcv_fanout()
+        ip_check_defrag()
+         ip_defrag()
+          spin_lock()     (lock frag queue spinlock)
+
+2) ip6_input_finish()
+    ipv6_frag_rcv()       (lock frag queue spinlock)
+     ip6_frag_queue()
+      icmpv6_param_prob() (lock txq->_xmit_lock at some point)
+
+We could add lockdep annotations, but we also can make sure IPv6
+calls icmpv6_param_prob() only after the release of the frag queue spinlock,
+since this naturally makes frag queue spinlock a leaf in lock hierarchy.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/reassembly.c |   23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -169,7 +169,8 @@ fq_find(struct net *net, __be32 id, cons
+ }
+ 
+ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+-			   struct frag_hdr *fhdr, int nhoff)
++			  struct frag_hdr *fhdr, int nhoff,
++			  u32 *prob_offset)
+ {
+ 	struct sk_buff *prev, *next;
+ 	struct net_device *dev;
+@@ -185,11 +186,7 @@ static int ip6_frag_queue(struct frag_qu
+ 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
+ 
+ 	if ((unsigned int)end > IPV6_MAXPLEN) {
+-		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+-				IPSTATS_MIB_INHDRERRORS);
+-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+-				  ((u8 *)&fhdr->frag_off -
+-				   skb_network_header(skb)));
++		*prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
+ 		return -1;
+ 	}
+ 
+@@ -220,10 +217,7 @@ static int ip6_frag_queue(struct frag_qu
+ 			/* RFC2460 says always send parameter problem in
+ 			 * this case. -DaveM
+ 			 */
+-			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+-					IPSTATS_MIB_INHDRERRORS);
+-			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+-					  offsetof(struct ipv6hdr, payload_len));
++			*prob_offset = offsetof(struct ipv6hdr, payload_len);
+ 			return -1;
+ 		}
+ 		if (end > fq->q.len) {
+@@ -524,15 +518,22 @@ static int ipv6_frag_rcv(struct sk_buff
+ 	iif = skb->dev ? skb->dev->ifindex : 0;
+ 	fq = fq_find(net, fhdr->identification, hdr, iif);
+ 	if (fq) {
++		u32 prob_offset = 0;
+ 		int ret;
+ 
+ 		spin_lock(&fq->q.lock);
+ 
+ 		fq->iif = iif;
+-		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
++		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
++				     &prob_offset);
+ 
+ 		spin_unlock(&fq->q.lock);
+ 		inet_frag_put(&fq->q);
++		if (prob_offset) {
++			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
++					IPSTATS_MIB_INHDRERRORS);
++			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
++		}
+ 		return ret;
+ 	}
+ 
diff --git a/queue-4.9/ipv6-remove-dependency-of-nf_defrag_ipv6-on-ipv6-module.patch b/queue-4.9/ipv6-remove-dependency-of-nf_defrag_ipv6-on-ipv6-module.patch
new file mode 100644
index 00000000000..32b74ad87b7
--- /dev/null
+++ b/queue-4.9/ipv6-remove-dependency-of-nf_defrag_ipv6-on-ipv6-module.patch
@@ -0,0 +1,400 @@
+From foo@baz Tue 30 Apr 2019 12:43:33 PM CEST
+From: Peter Oskolkov <posk@google.com>
+Date: Fri, 26 Apr 2019 08:41:06 -0700
+Subject: ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable@vger.kernel.org, netdev@vger.kernel.org
+Cc: Peter Oskolkov <posk@posk.io>, David Miller <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Sasha Levin <sashal@kernel.org>, Captain Wiggum <captwiggum@gmail.com>, Lars Persson <lists@bofh.nu>, Florian Westphal <fw@strlen.de>, Pablo Neira Ayuso <pablo@netfilter.org>
+Message-ID: <20190426154108.52277-4-posk@google.com>
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 70b095c84326640eeacfd69a411db8fc36e8ab1a ]
+
+IPV6=m
+DEFRAG_IPV6=m
+CONNTRACK=y yields:
+
+net/netfilter/nf_conntrack_proto.o: In function `nf_ct_netns_do_get':
+net/netfilter/nf_conntrack_proto.c:802: undefined reference to `nf_defrag_ipv6_enable'
+net/netfilter/nf_conntrack_proto.o:(.rodata+0x640): undefined reference to `nf_conntrack_l4proto_icmpv6'
+
+Setting DEFRAG_IPV6=y causes undefined references to ip6_rhash_params
+ip6_frag_init and ip6_expire_frag_queue so it would be needed to force
+IPV6=y too.
+
+This patch gets rid of the 'followup linker error' by removing
+the dependency of ipv6.ko symbols from netfilter ipv6 defrag.
+
+Shared code is placed into a header, then used from both.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ipv6.h                        |   29 --------
+ include/net/ipv6_frag.h                   |  104 ++++++++++++++++++++++++++++++
+ net/ieee802154/6lowpan/reassembly.c       |    2 
+ net/ipv6/netfilter/nf_conntrack_reasm.c   |   17 +++-
+ net/ipv6/netfilter/nf_defrag_ipv6_hooks.c |    3 
+ net/ipv6/reassembly.c                     |   92 ++------------------------
+ net/openvswitch/conntrack.c               |    1 
+ 7 files changed, 126 insertions(+), 122 deletions(-)
+ create mode 100644 include/net/ipv6_frag.h
+
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -511,35 +511,6 @@ static inline bool ipv6_prefix_equal(con
+ }
+ #endif
+ 
+-struct inet_frag_queue;
+-
+-enum ip6_defrag_users {
+-	IP6_DEFRAG_LOCAL_DELIVER,
+-	IP6_DEFRAG_CONNTRACK_IN,
+-	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
+-	IP6_DEFRAG_CONNTRACK_OUT,
+-	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
+-	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
+-	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
+-};
+-
+-void ip6_frag_init(struct inet_frag_queue *q, const void *a);
+-extern const struct rhashtable_params ip6_rhash_params;
+-
+-/*
+- *	Equivalent of ipv4 struct ip
+- */
+-struct frag_queue {
+-	struct inet_frag_queue	q;
+-
+-	int			iif;
+-	unsigned int		csum;
+-	__u16			nhoffset;
+-	u8			ecn;
+-};
+-
+-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
+-
+ static inline bool ipv6_addr_any(const struct in6_addr *a)
+ {
+ #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+--- /dev/null
++++ b/include/net/ipv6_frag.h
+@@ -0,0 +1,104 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _IPV6_FRAG_H
++#define _IPV6_FRAG_H
++#include <linux/kernel.h>
++#include <net/addrconf.h>
++#include <net/ipv6.h>
++#include <net/inet_frag.h>
++
++enum ip6_defrag_users {
++	IP6_DEFRAG_LOCAL_DELIVER,
++	IP6_DEFRAG_CONNTRACK_IN,
++	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
++	IP6_DEFRAG_CONNTRACK_OUT,
++	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
++	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
++	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
++};
++
++/*
++ *	Equivalent of ipv4 struct ip
++ */
++struct frag_queue {
++	struct inet_frag_queue	q;
++
++	int			iif;
++	__u16			nhoffset;
++	u8			ecn;
++};
++
++#if IS_ENABLED(CONFIG_IPV6)
++static inline void ip6frag_init(struct inet_frag_queue *q, const void *a)
++{
++	struct frag_queue *fq = container_of(q, struct frag_queue, q);
++	const struct frag_v6_compare_key *key = a;
++
++	q->key.v6 = *key;
++	fq->ecn = 0;
++}
++
++static inline u32 ip6frag_key_hashfn(const void *data, u32 len, u32 seed)
++{
++	return jhash2(data,
++		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
++}
++
++static inline u32 ip6frag_obj_hashfn(const void *data, u32 len, u32 seed)
++{
++	const struct inet_frag_queue *fq = data;
++
++	return jhash2((const u32 *)&fq->key.v6,
++		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
++}
++
++static inline int
++ip6frag_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
++{
++	const struct frag_v6_compare_key *key = arg->key;
++	const struct inet_frag_queue *fq = ptr;
++
++	return !!memcmp(&fq->key, key, sizeof(*key));
++}
++
++static inline void
++ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
++{
++	struct net_device *dev = NULL;
++	struct sk_buff *head;
++
++	rcu_read_lock();
++	spin_lock(&fq->q.lock);
++
++	if (fq->q.flags & INET_FRAG_COMPLETE)
++		goto out;
++
++	inet_frag_kill(&fq->q);
++
++	dev = dev_get_by_index_rcu(net, fq->iif);
++	if (!dev)
++		goto out;
++
++	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
++	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
++
++	/* Don't send error if the first segment did not arrive. */
++	head = fq->q.fragments;
++	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
++		goto out;
++
++	head->dev = dev;
++	skb_get(head);
++	spin_unlock(&fq->q.lock);
++
++	icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
++	kfree_skb(head);
++	goto out_rcu_unlock;
++
++out:
++	spin_unlock(&fq->q.lock);
++out_rcu_unlock:
++	rcu_read_unlock();
++	inet_frag_put(&fq->q);
++}
++#endif
++#endif
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -25,7 +25,7 @@
+ 
+ #include <net/ieee802154_netdev.h>
+ #include <net/6lowpan.h>
+-#include <net/ipv6.h>
++#include <net/ipv6_frag.h>
+ #include <net/inet_frag.h>
+ 
+ #include "6lowpan_i.h"
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -33,9 +33,8 @@
+ 
+ #include <net/sock.h>
+ #include <net/snmp.h>
+-#include <net/inet_frag.h>
++#include <net/ipv6_frag.h>
+ 
+-#include <net/ipv6.h>
+ #include <net/protocol.h>
+ #include <net/transp_v6.h>
+ #include <net/rawv6.h>
+@@ -158,7 +157,7 @@ static void nf_ct_frag6_expire(unsigned
+ 	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ 	net = container_of(fq->q.net, struct net, nf_frag.frags);
+ 
+-	ip6_expire_frag_queue(net, fq);
++	ip6frag_expire_frag_queue(net, fq);
+ }
+ 
+ /* Creation primitives. */
+@@ -634,16 +633,24 @@ static struct pernet_operations nf_ct_ne
+ 	.exit = nf_ct_net_exit,
+ };
+ 
++static const struct rhashtable_params nfct_rhash_params = {
++	.head_offset		= offsetof(struct inet_frag_queue, node),
++	.hashfn			= ip6frag_key_hashfn,
++	.obj_hashfn		= ip6frag_obj_hashfn,
++	.obj_cmpfn		= ip6frag_obj_cmpfn,
++	.automatic_shrinking	= true,
++};
++
+ int nf_ct_frag6_init(void)
+ {
+ 	int ret = 0;
+ 
+-	nf_frags.constructor = ip6_frag_init;
++	nf_frags.constructor = ip6frag_init;
+ 	nf_frags.destructor = NULL;
+ 	nf_frags.qsize = sizeof(struct frag_queue);
+ 	nf_frags.frag_expire = nf_ct_frag6_expire;
+ 	nf_frags.frags_cache_name = nf_frags_cache_name;
+-	nf_frags.rhash_params = ip6_rhash_params;
++	nf_frags.rhash_params = nfct_rhash_params;
+ 	ret = inet_frags_init(&nf_frags);
+ 	if (ret)
+ 		goto out;
+--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
++++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+@@ -14,8 +14,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/icmp.h>
+ #include <linux/sysctl.h>
+-#include <net/ipv6.h>
+-#include <net/inet_frag.h>
++#include <net/ipv6_frag.h>
+ 
+ #include <linux/netfilter_ipv6.h>
+ #include <linux/netfilter_bridge.h>
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -57,7 +57,7 @@
+ #include <net/rawv6.h>
+ #include <net/ndisc.h>
+ #include <net/addrconf.h>
+-#include <net/inet_frag.h>
++#include <net/ipv6_frag.h>
+ #include <net/inet_ecn.h>
+ 
+ static const char ip6_frag_cache_name[] = "ip6-frags";
+@@ -79,61 +79,6 @@ static struct inet_frags ip6_frags;
+ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ 			  struct net_device *dev);
+ 
+-void ip6_frag_init(struct inet_frag_queue *q, const void *a)
+-{
+-	struct frag_queue *fq = container_of(q, struct frag_queue, q);
+-	const struct frag_v6_compare_key *key = a;
+-
+-	q->key.v6 = *key;
+-	fq->ecn = 0;
+-}
+-EXPORT_SYMBOL(ip6_frag_init);
+-
+-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
+-{
+-	struct net_device *dev = NULL;
+-	struct sk_buff *head;
+-
+-	rcu_read_lock();
+-	spin_lock(&fq->q.lock);
+-
+-	if (fq->q.flags & INET_FRAG_COMPLETE)
+-		goto out;
+-
+-	inet_frag_kill(&fq->q);
+-
+-	dev = dev_get_by_index_rcu(net, fq->iif);
+-	if (!dev)
+-		goto out;
+-
+-	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+-	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+-
+-	/* Don't send error if the first segment did not arrive. */
+-	head = fq->q.fragments;
+-	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+-		goto out;
+-
+-	/* But use as source device on which LAST ARRIVED
+-	 * segment was received. And do not use fq->dev
+-	 * pointer directly, device might already disappeared.
+-	 */
+-	head->dev = dev;
+-	skb_get(head);
+-	spin_unlock(&fq->q.lock);
+-
+-	icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+-	kfree_skb(head);
+-	goto out_rcu_unlock;
+-
+-out:
+-	spin_unlock(&fq->q.lock);
+-out_rcu_unlock:
+-	rcu_read_unlock();
+-	inet_frag_put(&fq->q);
+-}
+-EXPORT_SYMBOL(ip6_expire_frag_queue);
+-
+ static void ip6_frag_expire(unsigned long data)
+ {
+ 	struct frag_queue *fq;
+@@ -142,7 +87,7 @@ static void ip6_frag_expire(unsigned lon
+ 	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ 	net = container_of(fq->q.net, struct net, ipv6.frags);
+ 
+-	ip6_expire_frag_queue(net, fq);
++	ip6frag_expire_frag_queue(net, fq);
+ }
+ 
+ static struct frag_queue *
+@@ -701,42 +646,19 @@ static struct pernet_operations ip6_frag
+ 	.exit = ipv6_frags_exit_net,
+ };
+ 
+-static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
+-{
+-	return jhash2(data,
+-		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+-}
+-
+-static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
+-{
+-	const struct inet_frag_queue *fq = data;
+-
+-	return jhash2((const u32 *)&fq->key.v6,
+-		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+-}
+-
+-static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+-{
+-	const struct frag_v6_compare_key *key = arg->key;
+-	const struct inet_frag_queue *fq = ptr;
+-
+-	return !!memcmp(&fq->key, key, sizeof(*key));
+-}
+-
+-const struct rhashtable_params ip6_rhash_params = {
++static const struct rhashtable_params ip6_rhash_params = {
+ 	.head_offset		= offsetof(struct inet_frag_queue, node),
+-	.hashfn			= ip6_key_hashfn,
+-	.obj_hashfn		= ip6_obj_hashfn,
+-	.obj_cmpfn		= ip6_obj_cmpfn,
++	.hashfn			= ip6frag_key_hashfn,
++	.obj_hashfn		= ip6frag_obj_hashfn,
++	.obj_cmpfn		= ip6frag_obj_cmpfn,
+ 	.automatic_shrinking	= true,
+ };
+-EXPORT_SYMBOL(ip6_rhash_params);
+ 
+ int __init ipv6_frag_init(void)
+ {
+ 	int ret;
+ 
+-	ip6_frags.constructor = ip6_frag_init;
++	ip6_frags.constructor = ip6frag_init;
+ 	ip6_frags.destructor = NULL;
+ 	ip6_frags.qsize = sizeof(struct frag_queue);
+ 	ip6_frags.frag_expire = ip6_frag_expire;
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -23,6 +23,7 @@
+ #include <net/netfilter/nf_conntrack_seqadj.h>
+ #include <net/netfilter/nf_conntrack_zones.h>
+ #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
++#include <net/ipv6_frag.h>
+ 
+ #ifdef CONFIG_NF_NAT_NEEDED
+ #include <linux/netfilter/nf_nat.h>
diff --git a/queue-4.9/net-ip-defrag-encapsulate-rbtree-defrag-code-into-callable-functions.patch b/queue-4.9/net-ip-defrag-encapsulate-rbtree-defrag-code-into-callable-functions.patch
new file mode 100644
index 00000000000..69a7ef55963
--- /dev/null
+++ b/queue-4.9/net-ip-defrag-encapsulate-rbtree-defrag-code-into-callable-functions.patch
@@ -0,0 +1,776 @@
+From foo@baz Tue 30 Apr 2019 12:43:33 PM CEST
+From: Peter Oskolkov <posk@google.com>
+Date: Fri, 26 Apr 2019 08:41:05 -0700
+Subject: net: IP defrag: encapsulate rbtree defrag code into callable functions
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable@vger.kernel.org, netdev@vger.kernel.org
+Cc: Peter Oskolkov <posk@posk.io>, David Miller <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Sasha Levin <sashal@kernel.org>, Captain Wiggum <captwiggum@gmail.com>, Lars Persson <lists@bofh.nu>, Peter Oskolkov <posk@google.com>, Florian Westphal <fw@strlen.de>, Tom Herbert <tom@herbertland.com>
+Message-ID: <20190426154108.52277-3-posk@google.com>
+
+From: Peter Oskolkov <posk@google.com>
+
+[ Upstream commit c23f35d19db3b36ffb9e04b08f1d91565d15f84f ]
+
+This is a refactoring patch: without changing runtime behavior,
+it moves rbtree-related code from IPv4-specific files/functions
+into .h/.c defrag files shared with IPv6 defragmentation code.
+
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Cc: Tom Herbert <tom@herbertland.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_frag.h  |   16 ++
+ net/ipv4/inet_fragment.c |  293 ++++++++++++++++++++++++++++++++++++++++++++++
+ net/ipv4/ip_fragment.c   |  295 +++++------------------------------------------
+ 3 files changed, 342 insertions(+), 262 deletions(-)
+
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -76,8 +76,8 @@ struct inet_frag_queue {
+ 	struct timer_list	timer;
+ 	spinlock_t		lock;
+ 	atomic_t		refcnt;
+-	struct sk_buff		*fragments;  /* Used in IPv6. */
+-	struct rb_root		rb_fragments; /* Used in IPv4. */
++	struct sk_buff		*fragments;  /* used in 6lopwpan IPv6. */
++	struct rb_root		rb_fragments; /* Used in IPv4/IPv6. */
+ 	struct sk_buff		*fragments_tail;
+ 	struct sk_buff		*last_run_head;
+ 	ktime_t			stamp;
+@@ -152,4 +152,16 @@ static inline void add_frag_mem_limit(st
+ 
+ extern const u8 ip_frag_ecn_table[16];
+ 
++/* Return values of inet_frag_queue_insert() */
++#define IPFRAG_OK	0
++#define IPFRAG_DUP	1
++#define IPFRAG_OVERLAP	2
++int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
++			   int offset, int end);
++void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
++			      struct sk_buff *parent);
++void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
++			    void *reasm_data);
++struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q);
++
+ #endif
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -24,6 +24,62 @@
+ #include <net/sock.h>
+ #include <net/inet_frag.h>
+ #include <net/inet_ecn.h>
++#include <net/ip.h>
++#include <net/ipv6.h>
++
++/* Use skb->cb to track consecutive/adjacent fragments coming at
++ * the end of the queue. Nodes in the rb-tree queue will
++ * contain "runs" of one or more adjacent fragments.
++ *
++ * Invariants:
++ * - next_frag is NULL at the tail of a "run";
++ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
++ */
++struct ipfrag_skb_cb {
++	union {
++		struct inet_skb_parm	h4;
++		struct inet6_skb_parm	h6;
++	};
++	struct sk_buff		*next_frag;
++	int			frag_run_len;
++};
++
++#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
++
++static void fragcb_clear(struct sk_buff *skb)
++{
++	RB_CLEAR_NODE(&skb->rbnode);
++	FRAG_CB(skb)->next_frag = NULL;
++	FRAG_CB(skb)->frag_run_len = skb->len;
++}
++
++/* Append skb to the last "run". */
++static void fragrun_append_to_last(struct inet_frag_queue *q,
++				   struct sk_buff *skb)
++{
++	fragcb_clear(skb);
++
++	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
++	FRAG_CB(q->fragments_tail)->next_frag = skb;
++	q->fragments_tail = skb;
++}
++
++/* Create a new "run" with the skb. */
++static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
++{
++	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
++	fragcb_clear(skb);
++
++	if (q->last_run_head)
++		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
++			     &q->last_run_head->rbnode.rb_right);
++	else
++		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
++	rb_insert_color(&skb->rbnode, &q->rb_fragments);
++
++	q->fragments_tail = skb;
++	q->last_run_head = skb;
++}
+ 
+ /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
+  * Value : 0xff if frame should be dropped.
+@@ -122,6 +178,28 @@ static void inet_frag_destroy_rcu(struct
+ 	kmem_cache_free(f->frags_cachep, q);
+ }
+ 
++unsigned int inet_frag_rbtree_purge(struct rb_root *root)
++{
++	struct rb_node *p = rb_first(root);
++	unsigned int sum = 0;
++
++	while (p) {
++		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
++
++		p = rb_next(p);
++		rb_erase(&skb->rbnode, root);
++		while (skb) {
++			struct sk_buff *next = FRAG_CB(skb)->next_frag;
++
++			sum += skb->truesize;
++			kfree_skb(skb);
++			skb = next;
++		}
++	}
++	return sum;
++}
++EXPORT_SYMBOL(inet_frag_rbtree_purge);
++
+ void inet_frag_destroy(struct inet_frag_queue *q)
+ {
+ 	struct sk_buff *fp;
+@@ -223,3 +301,218 @@ struct inet_frag_queue *inet_frag_find(s
+ 	return fq;
+ }
+ EXPORT_SYMBOL(inet_frag_find);
++
++int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
++			   int offset, int end)
++{
++	struct sk_buff *last = q->fragments_tail;
++
++	/* RFC5722, Section 4, amended by Errata ID : 3089
++	 *                          When reassembling an IPv6 datagram, if
++	 *   one or more its constituent fragments is determined to be an
++	 *   overlapping fragment, the entire datagram (and any constituent
++	 *   fragments) MUST be silently discarded.
++	 *
++	 * Duplicates, however, should be ignored (i.e. skb dropped, but the
++	 * queue/fragments kept for later reassembly).
++	 */
++	if (!last)
++		fragrun_create(q, skb);  /* First fragment. */
++	else if (last->ip_defrag_offset + last->len < end) {
++		/* This is the common case: skb goes to the end. */
++		/* Detect and discard overlaps. */
++		if (offset < last->ip_defrag_offset + last->len)
++			return IPFRAG_OVERLAP;
++		if (offset == last->ip_defrag_offset + last->len)
++			fragrun_append_to_last(q, skb);
++		else
++			fragrun_create(q, skb);
++	} else {
++		/* Binary search. Note that skb can become the first fragment,
++		 * but not the last (covered above).
++		 */
++		struct rb_node **rbn, *parent;
++
++		rbn = &q->rb_fragments.rb_node;
++		do {
++			struct sk_buff *curr;
++			int curr_run_end;
++
++			parent = *rbn;
++			curr = rb_to_skb(parent);
++			curr_run_end = curr->ip_defrag_offset +
++					FRAG_CB(curr)->frag_run_len;
++			if (end <= curr->ip_defrag_offset)
++				rbn = &parent->rb_left;
++			else if (offset >= curr_run_end)
++				rbn = &parent->rb_right;
++			else if (offset >= curr->ip_defrag_offset &&
++				 end <= curr_run_end)
++				return IPFRAG_DUP;
++			else
++				return IPFRAG_OVERLAP;
++		} while (*rbn);
++		/* Here we have parent properly set, and rbn pointing to
++		 * one of its NULL left/right children. Insert skb.
++		 */
++		fragcb_clear(skb);
++		rb_link_node(&skb->rbnode, parent, rbn);
++		rb_insert_color(&skb->rbnode, &q->rb_fragments);
++	}
++
++	skb->ip_defrag_offset = offset;
++
++	return IPFRAG_OK;
++}
++EXPORT_SYMBOL(inet_frag_queue_insert);
++
++void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
++			      struct sk_buff *parent)
++{
++	struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
++	struct sk_buff **nextp;
++	int delta;
++
++	if (head != skb) {
++		fp = skb_clone(skb, GFP_ATOMIC);
++		if (!fp)
++			return NULL;
++		FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
++		if (RB_EMPTY_NODE(&skb->rbnode))
++			FRAG_CB(parent)->next_frag = fp;
++		else
++			rb_replace_node(&skb->rbnode, &fp->rbnode,
++					&q->rb_fragments);
++		if (q->fragments_tail == skb)
++			q->fragments_tail = fp;
++		skb_morph(skb, head);
++		FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
++		rb_replace_node(&head->rbnode, &skb->rbnode,
++				&q->rb_fragments);
++		consume_skb(head);
++		head = skb;
++	}
++	WARN_ON(head->ip_defrag_offset != 0);
++
++	delta = -head->truesize;
++
++	/* Head of list must not be cloned. */
++	if (skb_unclone(head, GFP_ATOMIC))
++		return NULL;
++
++	delta += head->truesize;
++	if (delta)
++		add_frag_mem_limit(q->net, delta);
++
++	/* If the first fragment is fragmented itself, we split
++	 * it to two chunks: the first with data and paged part
++	 * and the second, holding only fragments.
++	 */
++	if (skb_has_frag_list(head)) {
++		struct sk_buff *clone;
++		int i, plen = 0;
++
++		clone = alloc_skb(0, GFP_ATOMIC);
++		if (!clone)
++			return NULL;
++		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
++		skb_frag_list_init(head);
++		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
++			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
++		clone->data_len = head->data_len - plen;
++		clone->len = clone->data_len;
++		head->truesize += clone->truesize;
++		clone->csum = 0;
++		clone->ip_summed = head->ip_summed;
++		add_frag_mem_limit(q->net, clone->truesize);
++		skb_shinfo(head)->frag_list = clone;
++		nextp = &clone->next;
++	} else {
++		nextp = &skb_shinfo(head)->frag_list;
++	}
++
++	return nextp;
++}
++EXPORT_SYMBOL(inet_frag_reasm_prepare);
++
++void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
++			    void *reasm_data)
++{
++	struct sk_buff **nextp = (struct sk_buff **)reasm_data;
++	struct rb_node *rbn;
++	struct sk_buff *fp;
++
++	skb_push(head, head->data - skb_network_header(head));
++
++	/* Traverse the tree in order, to build frag_list. */
++	fp = FRAG_CB(head)->next_frag;
++	rbn = rb_next(&head->rbnode);
++	rb_erase(&head->rbnode, &q->rb_fragments);
++	while (rbn || fp) {
++		/* fp points to the next sk_buff in the current run;
++		 * rbn points to the next run.
++		 */
++		/* Go through the current run. */
++		while (fp) {
++			*nextp = fp;
++			nextp = &fp->next;
++			fp->prev = NULL;
++			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
++			fp->sk = NULL;
++			head->data_len += fp->len;
++			head->len += fp->len;
++			if (head->ip_summed != fp->ip_summed)
++				head->ip_summed = CHECKSUM_NONE;
++			else if (head->ip_summed == CHECKSUM_COMPLETE)
++				head->csum = csum_add(head->csum, fp->csum);
++			head->truesize += fp->truesize;
++			fp = FRAG_CB(fp)->next_frag;
++		}
++		/* Move to the next run. */
++		if (rbn) {
++			struct rb_node *rbnext = rb_next(rbn);
++
++			fp = rb_to_skb(rbn);
++			rb_erase(rbn, &q->rb_fragments);
++			rbn = rbnext;
++		}
++	}
++	sub_frag_mem_limit(q->net, head->truesize);
++
++	*nextp = NULL;
++	head->next = NULL;
++	head->prev = NULL;
++	head->tstamp = q->stamp;
++}
++EXPORT_SYMBOL(inet_frag_reasm_finish);
++
++struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
++{
++	struct sk_buff *head;
++
++	if (q->fragments) {
++		head = q->fragments;
++		q->fragments = head->next;
++	} else {
++		struct sk_buff *skb;
++
++		head = skb_rb_first(&q->rb_fragments);
++		if (!head)
++			return NULL;
++		skb = FRAG_CB(head)->next_frag;
++		if (skb)
++			rb_replace_node(&head->rbnode, &skb->rbnode,
++					&q->rb_fragments);
++		else
++			rb_erase(&head->rbnode, &q->rb_fragments);
++		memset(&head->rbnode, 0, sizeof(head->rbnode));
++		barrier();
++	}
++	if (head == q->fragments_tail)
++		q->fragments_tail = NULL;
++
++	sub_frag_mem_limit(q->net, head->truesize);
++
++	return head;
++}
++EXPORT_SYMBOL(inet_frag_pull_head);
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -56,57 +56,6 @@
+  */
+ static const char ip_frag_cache_name[] = "ip4-frags";
+ 
+-/* Use skb->cb to track consecutive/adjacent fragments coming at
+- * the end of the queue. Nodes in the rb-tree queue will
+- * contain "runs" of one or more adjacent fragments.
+- *
+- * Invariants:
+- * - next_frag is NULL at the tail of a "run";
+- * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+- */
+-struct ipfrag_skb_cb {
+-	struct inet_skb_parm	h;
+-	struct sk_buff		*next_frag;
+-	int			frag_run_len;
+-};
+-
+-#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
+-
+-static void ip4_frag_init_run(struct sk_buff *skb)
+-{
+-	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+-
+-	FRAG_CB(skb)->next_frag = NULL;
+-	FRAG_CB(skb)->frag_run_len = skb->len;
+-}
+-
+-/* Append skb to the last "run". */
+-static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
+-					struct sk_buff *skb)
+-{
+-	RB_CLEAR_NODE(&skb->rbnode);
+-	FRAG_CB(skb)->next_frag = NULL;
+-
+-	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+-	FRAG_CB(q->fragments_tail)->next_frag = skb;
+-	q->fragments_tail = skb;
+-}
+-
+-/* Create a new "run" with the skb. */
+-static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
+-{
+-	if (q->last_run_head)
+-		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+-			     &q->last_run_head->rbnode.rb_right);
+-	else
+-		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+-	rb_insert_color(&skb->rbnode, &q->rb_fragments);
+-
+-	ip4_frag_init_run(skb);
+-	q->fragments_tail = skb;
+-	q->last_run_head = skb;
+-}
+-
+ /* Describe an entry in the "incomplete datagrams" queue. */
+ struct ipq {
+ 	struct inet_frag_queue q;
+@@ -210,27 +159,9 @@ static void ip_expire(unsigned long arg)
+ 	 * pull the head out of the tree in order to be able to
+ 	 * deal with head->dev.
+ 	 */
+-	if (qp->q.fragments) {
+-		head = qp->q.fragments;
+-		qp->q.fragments = head->next;
+-	} else {
+-		head = skb_rb_first(&qp->q.rb_fragments);
+-		if (!head)
+-			goto out;
+-		if (FRAG_CB(head)->next_frag)
+-			rb_replace_node(&head->rbnode,
+-					&FRAG_CB(head)->next_frag->rbnode,
+-					&qp->q.rb_fragments);
+-		else
+-			rb_erase(&head->rbnode, &qp->q.rb_fragments);
+-		memset(&head->rbnode, 0, sizeof(head->rbnode));
+-		barrier();
+-	}
+-	if (head == qp->q.fragments_tail)
+-		qp->q.fragments_tail = NULL;
+-
+-	sub_frag_mem_limit(qp->q.net, head->truesize);
+-
++	head = inet_frag_pull_head(&qp->q);
++	if (!head)
++		goto out;
+ 	head->dev = dev_get_by_index_rcu(net, qp->iif);
+ 	if (!head->dev)
+ 		goto out;
+@@ -343,12 +274,10 @@ static int ip_frag_reinit(struct ipq *qp
+ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
+ 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+-	struct rb_node **rbn, *parent;
+-	struct sk_buff *skb1, *prev_tail;
+-	int ihl, end, skb1_run_end;
++	int ihl, end, flags, offset;
++	struct sk_buff *prev_tail;
+ 	struct net_device *dev;
+ 	unsigned int fragsize;
+-	int flags, offset;
+ 	int err = -ENOENT;
+ 	u8 ecn;
+ 
+@@ -380,7 +309,7 @@ static int ip_frag_queue(struct ipq *qp,
+ 		 */
+ 		if (end < qp->q.len ||
+ 		    ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
+-			goto err;
++			goto discard_qp;
+ 		qp->q.flags |= INET_FRAG_LAST_IN;
+ 		qp->q.len = end;
+ 	} else {
+@@ -392,82 +321,33 @@ static int ip_frag_queue(struct ipq *qp,
+ 		if (end > qp->q.len) {
+ 			/* Some bits beyond end -> corruption. */
+ 			if (qp->q.flags & INET_FRAG_LAST_IN)
+-				goto err;
++				goto discard_qp;
+ 			qp->q.len = end;
+ 		}
+ 	}
+ 	if (end == offset)
+-		goto err;
++		goto discard_qp;
+ 
+ 	err = -ENOMEM;
+ 	if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
+-		goto err;
++		goto discard_qp;
+ 
+ 	err = pskb_trim_rcsum(skb, end - offset);
+ 	if (err)
+-		goto err;
++		goto discard_qp;
+ 
+ 	/* Note : skb->rbnode and skb->dev share the same location. */
+ 	dev = skb->dev;
+ 	/* Makes sure compiler wont do silly aliasing games */
+ 	barrier();
+ 
+-	/* RFC5722, Section 4, amended by Errata ID : 3089
+-	 *                          When reassembling an IPv6 datagram, if
+-	 *   one or more its constituent fragments is determined to be an
+-	 *   overlapping fragment, the entire datagram (and any constituent
+-	 *   fragments) MUST be silently discarded.
+-	 *
+-	 * We do the same here for IPv4 (and increment an snmp counter) but
+-	 * we do not want to drop the whole queue in response to a duplicate
+-	 * fragment.
+-	 */
+-
+-	err = -EINVAL;
+-	/* Find out where to put this fragment.  */
+ 	prev_tail = qp->q.fragments_tail;
+-	if (!prev_tail)
+-		ip4_frag_create_run(&qp->q, skb);  /* First fragment. */
+-	else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
+-		/* This is the common case: skb goes to the end. */
+-		/* Detect and discard overlaps. */
+-		if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
+-			goto discard_qp;
+-		if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
+-			ip4_frag_append_to_last_run(&qp->q, skb);
+-		else
+-			ip4_frag_create_run(&qp->q, skb);
+-	} else {
+-		/* Binary search. Note that skb can become the first fragment,
+-		 * but not the last (covered above).
+-		 */
+-		rbn = &qp->q.rb_fragments.rb_node;
+-		do {
+-			parent = *rbn;
+-			skb1 = rb_to_skb(parent);
+-			skb1_run_end = skb1->ip_defrag_offset +
+-				       FRAG_CB(skb1)->frag_run_len;
+-			if (end <= skb1->ip_defrag_offset)
+-				rbn = &parent->rb_left;
+-			else if (offset >= skb1_run_end)
+-				rbn = &parent->rb_right;
+-			else if (offset >= skb1->ip_defrag_offset &&
+-				 end <= skb1_run_end)
+-				goto err; /* No new data, potential duplicate */
+-			else
+-				goto discard_qp; /* Found an overlap */
+-		} while (*rbn);
+-		/* Here we have parent properly set, and rbn pointing to
+-		 * one of its NULL left/right children. Insert skb.
+-		 */
+-		ip4_frag_init_run(skb);
+-		rb_link_node(&skb->rbnode, parent, rbn);
+-		rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
+-	}
++	err = inet_frag_queue_insert(&qp->q, skb, offset, end);
++	if (err)
++		goto insert_error;
+ 
+ 	if (dev)
+ 		qp->iif = dev->ifindex;
+-	skb->ip_defrag_offset = offset;
+ 
+ 	qp->q.stamp = skb->tstamp;
+ 	qp->q.meat += skb->len;
+@@ -492,15 +372,24 @@ static int ip_frag_queue(struct ipq *qp,
+ 		skb->_skb_refdst = 0UL;
+ 		err = ip_frag_reasm(qp, skb, prev_tail, dev);
+ 		skb->_skb_refdst = orefdst;
++		if (err)
++			inet_frag_kill(&qp->q);
+ 		return err;
+ 	}
+ 
+ 	skb_dst_drop(skb);
+ 	return -EINPROGRESS;
+ 
++insert_error:
++	if (err == IPFRAG_DUP) {
++		kfree_skb(skb);
++		return -EINVAL;
++	}
++	err = -EINVAL;
++	__IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
+ discard_qp:
+ 	inet_frag_kill(&qp->q);
+-	__IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
++	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
+ err:
+ 	kfree_skb(skb);
+ 	return err;
+@@ -512,12 +401,8 @@ static int ip_frag_reasm(struct ipq *qp,
+ {
+ 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ 	struct iphdr *iph;
+-	struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
+-	struct sk_buff **nextp; /* To build frag_list. */
+-	struct rb_node *rbn;
+-	int len;
+-	int ihlen;
+-	int err;
++	void *reasm_data;
++	int len, err;
+ 	u8 ecn;
+ 
+ 	ipq_kill(qp);
+@@ -527,111 +412,23 @@ static int ip_frag_reasm(struct ipq *qp,
+ 		err = -EINVAL;
+ 		goto out_fail;
+ 	}
+-	/* Make the one we just received the head. */
+-	if (head != skb) {
+-		fp = skb_clone(skb, GFP_ATOMIC);
+-		if (!fp)
+-			goto out_nomem;
+-		FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+-		if (RB_EMPTY_NODE(&skb->rbnode))
+-			FRAG_CB(prev_tail)->next_frag = fp;
+-		else
+-			rb_replace_node(&skb->rbnode, &fp->rbnode,
+-					&qp->q.rb_fragments);
+-		if (qp->q.fragments_tail == skb)
+-			qp->q.fragments_tail = fp;
+-		skb_morph(skb, head);
+-		FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+-		rb_replace_node(&head->rbnode, &skb->rbnode,
+-				&qp->q.rb_fragments);
+-		consume_skb(head);
+-		head = skb;
+-	}
+-
+-	WARN_ON(head->ip_defrag_offset != 0);
+ 
+-	/* Allocate a new buffer for the datagram. */
+-	ihlen = ip_hdrlen(head);
+-	len = ihlen + qp->q.len;
++	/* Make the one we just received the head. */
++	reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail);
++	if (!reasm_data)
++		goto out_nomem;
+ 
++	len = ip_hdrlen(skb) + qp->q.len;
+ 	err = -E2BIG;
+ 	if (len > 65535)
+ 		goto out_oversize;
+ 
+-	/* Head of list must not be cloned. */
+-	if (skb_unclone(head, GFP_ATOMIC))
+-		goto out_nomem;
+-
+-	/* If the first fragment is fragmented itself, we split
+-	 * it to two chunks: the first with data and paged part
+-	 * and the second, holding only fragments. */
+-	if (skb_has_frag_list(head)) {
+-		struct sk_buff *clone;
+-		int i, plen = 0;
+-
+-		clone = alloc_skb(0, GFP_ATOMIC);
+-		if (!clone)
+-			goto out_nomem;
+-		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+-		skb_frag_list_init(head);
+-		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+-			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+-		clone->len = clone->data_len = head->data_len - plen;
+-		head->truesize += clone->truesize;
+-		clone->csum = 0;
+-		clone->ip_summed = head->ip_summed;
+-		add_frag_mem_limit(qp->q.net, clone->truesize);
+-		skb_shinfo(head)->frag_list = clone;
+-		nextp = &clone->next;
+-	} else {
+-		nextp = &skb_shinfo(head)->frag_list;
+-	}
+-
+-	skb_push(head, head->data - skb_network_header(head));
+-
+-	/* Traverse the tree in order, to build frag_list. */
+-	fp = FRAG_CB(head)->next_frag;
+-	rbn = rb_next(&head->rbnode);
+-	rb_erase(&head->rbnode, &qp->q.rb_fragments);
+-	while (rbn || fp) {
+-		/* fp points to the next sk_buff in the current run;
+-		 * rbn points to the next run.
+-		 */
+-		/* Go through the current run. */
+-		while (fp) {
+-			*nextp = fp;
+-			nextp = &fp->next;
+-			fp->prev = NULL;
+-			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+-			fp->sk = NULL;
+-			head->data_len += fp->len;
+-			head->len += fp->len;
+-			if (head->ip_summed != fp->ip_summed)
+-				head->ip_summed = CHECKSUM_NONE;
+-			else if (head->ip_summed == CHECKSUM_COMPLETE)
+-				head->csum = csum_add(head->csum, fp->csum);
+-			head->truesize += fp->truesize;
+-			fp = FRAG_CB(fp)->next_frag;
+-		}
+-		/* Move to the next run. */
+-		if (rbn) {
+-			struct rb_node *rbnext = rb_next(rbn);
+-
+-			fp = rb_to_skb(rbn);
+-			rb_erase(rbn, &qp->q.rb_fragments);
+-			rbn = rbnext;
+-		}
+-	}
+-	sub_frag_mem_limit(qp->q.net, head->truesize);
++	inet_frag_reasm_finish(&qp->q, skb, reasm_data);
+ 
+-	*nextp = NULL;
+-	head->next = NULL;
+-	head->prev = NULL;
+-	head->dev = dev;
+-	head->tstamp = qp->q.stamp;
+-	IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
++	skb->dev = dev;
++	IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
+ 
+-	iph = ip_hdr(head);
++	iph = ip_hdr(skb);
+ 	iph->tot_len = htons(len);
+ 	iph->tos |= ecn;
+ 
+@@ -644,7 +441,7 @@ static int ip_frag_reasm(struct ipq *qp,
+ 	 * from one very small df-fragment and one large non-df frag.
+ 	 */
+ 	if (qp->max_df_size == qp->q.max_size) {
+-		IPCB(head)->flags |= IPSKB_FRAG_PMTU;
++		IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
+ 		iph->frag_off = htons(IP_DF);
+ 	} else {
+ 		iph->frag_off = 0;
+@@ -742,28 +539,6 @@ struct sk_buff *ip_check_defrag(struct n
+ }
+ EXPORT_SYMBOL(ip_check_defrag);
+ 
+-unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+-{
+-	struct rb_node *p = rb_first(root);
+-	unsigned int sum = 0;
+-
+-	while (p) {
+-		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+-
+-		p = rb_next(p);
+-		rb_erase(&skb->rbnode, root);
+-		while (skb) {
+-			struct sk_buff *next = FRAG_CB(skb)->next_frag;
+-
+-			sum += skb->truesize;
+-			kfree_skb(skb);
+-			skb = next;
+-		}
+-	}
+-	return sum;
+-}
+-EXPORT_SYMBOL(inet_frag_rbtree_purge);
+-
+ #ifdef CONFIG_SYSCTL
+ static int dist_min;
+ 
diff --git a/queue-4.9/net-ip6-defrag-use-rbtrees-for-ipv6-defrag.patch b/queue-4.9/net-ip6-defrag-use-rbtrees-for-ipv6-defrag.patch
new file mode 100644
index 00000000000..a00e19d24de
--- /dev/null
+++ b/queue-4.9/net-ip6-defrag-use-rbtrees-for-ipv6-defrag.patch
@@ -0,0 +1,449 @@
+From foo@baz Tue 30 Apr 2019 12:43:33 PM CEST
+From: Peter Oskolkov <posk@google.com>
+Date: Fri, 26 Apr 2019 08:41:07 -0700
+Subject: net: IP6 defrag: use rbtrees for IPv6 defrag
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable@vger.kernel.org, netdev@vger.kernel.org
+Cc: Peter Oskolkov <posk@posk.io>, David Miller <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Sasha Levin <sashal@kernel.org>, Captain Wiggum <captwiggum@gmail.com>, Lars Persson <lists@bofh.nu>, Peter Oskolkov <posk@google.com>, Tom Herbert <tom@herbertland.com>, Florian Westphal <fw@strlen.de>
+Message-ID: <20190426154108.52277-5-posk@google.com>
+
+From: Peter Oskolkov <posk@google.com>
+
+[ Upstream commit d4289fcc9b16b89619ee1c54f829e05e56de8b9a ]
+
+Currently, IPv6 defragmentation code drops non-last fragments that
+are smaller than 1280 bytes: see
+commit 0ed4229b08c1 ("ipv6: defrag: drop non-last frags smaller than min mtu")
+
+This behavior is not specified in IPv6 RFCs and appears to break
+compatibility with some IPv6 implemenations, as reported here:
+https://www.spinics.net/lists/netdev/msg543846.html
+
+This patch re-uses common IP defragmentation queueing and reassembly
+code in IPv6, removing the 1280 byte restriction.
+
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Reported-by: Tom Herbert <tom@herbertland.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ipv6_frag.h |   11 +-
+ net/ipv6/reassembly.c   |  248 ++++++++++++++----------------------------------
+ 2 files changed, 82 insertions(+), 177 deletions(-)
+
+--- a/include/net/ipv6_frag.h
++++ b/include/net/ipv6_frag.h
+@@ -82,8 +82,15 @@ ip6frag_expire_frag_queue(struct net *ne
+ 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+ 
+ 	/* Don't send error if the first segment did not arrive. */
+-	head = fq->q.fragments;
+-	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
++	if (!(fq->q.flags & INET_FRAG_FIRST_IN))
++		goto out;
++
++	/* sk_buff::dev and sk_buff::rbnode are unionized. So we
++	 * pull the head out of the tree in order to be able to
++	 * deal with head->dev.
++	 */
++	head = inet_frag_pull_head(&fq->q);
++	if (!head)
+ 		goto out;
+ 
+ 	head->dev = dev;
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -62,13 +62,6 @@
+ 
+ static const char ip6_frag_cache_name[] = "ip6-frags";
+ 
+-struct ip6frag_skb_cb {
+-	struct inet6_skb_parm	h;
+-	int			offset;
+-};
+-
+-#define FRAG6_CB(skb)	((struct ip6frag_skb_cb *)((skb)->cb))
+-
+ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+ {
+ 	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+@@ -76,8 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6
+ 
+ static struct inet_frags ip6_frags;
+ 
+-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+-			  struct net_device *dev);
++static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
++			  struct sk_buff *prev_tail, struct net_device *dev);
+ 
+ static void ip6_frag_expire(unsigned long data)
+ {
+@@ -117,21 +110,26 @@ static int ip6_frag_queue(struct frag_qu
+ 			  struct frag_hdr *fhdr, int nhoff,
+ 			  u32 *prob_offset)
+ {
+-	struct sk_buff *prev, *next;
+-	struct net_device *dev;
+-	int offset, end;
+ 	struct net *net = dev_net(skb_dst(skb)->dev);
++	int offset, end, fragsize;
++	struct sk_buff *prev_tail;
++	struct net_device *dev;
++	int err = -ENOENT;
+ 	u8 ecn;
+ 
+ 	if (fq->q.flags & INET_FRAG_COMPLETE)
+ 		goto err;
+ 
++	err = -EINVAL;
+ 	offset = ntohs(fhdr->frag_off) & ~0x7;
+ 	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
+ 
+ 	if ((unsigned int)end > IPV6_MAXPLEN) {
+ 		*prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
++		/* note that if prob_offset is set, the skb is freed elsewhere,
++		 * we do not free it here.
++		 */
+ 		return -1;
+ 	}
+ 
+@@ -151,7 +149,7 @@ static int ip6_frag_queue(struct frag_qu
+ 		 */
+ 		if (end < fq->q.len ||
+ 		    ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
+-			goto err;
++			goto discard_fq;
+ 		fq->q.flags |= INET_FRAG_LAST_IN;
+ 		fq->q.len = end;
+ 	} else {
+@@ -168,75 +166,45 @@ static int ip6_frag_queue(struct frag_qu
+ 		if (end > fq->q.len) {
+ 			/* Some bits beyond end -> corruption. */
+ 			if (fq->q.flags & INET_FRAG_LAST_IN)
+-				goto err;
++				goto discard_fq;
+ 			fq->q.len = end;
+ 		}
+ 	}
+ 
+ 	if (end == offset)
+-		goto err;
++		goto discard_fq;
+ 
++	err = -ENOMEM;
+ 	/* Point into the IP datagram 'data' part. */
+ 	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
+-		goto err;
+-
+-	if (pskb_trim_rcsum(skb, end - offset))
+-		goto err;
+-
+-	/* Find out which fragments are in front and at the back of us
+-	 * in the chain of fragments so far.  We must know where to put
+-	 * this fragment, right?
+-	 */
+-	prev = fq->q.fragments_tail;
+-	if (!prev || FRAG6_CB(prev)->offset < offset) {
+-		next = NULL;
+-		goto found;
+-	}
+-	prev = NULL;
+-	for (next = fq->q.fragments; next != NULL; next = next->next) {
+-		if (FRAG6_CB(next)->offset >= offset)
+-			break;	/* bingo! */
+-		prev = next;
+-	}
+-
+-found:
+-	/* RFC5722, Section 4, amended by Errata ID : 3089
+-	 *                          When reassembling an IPv6 datagram, if
+-	 *   one or more its constituent fragments is determined to be an
+-	 *   overlapping fragment, the entire datagram (and any constituent
+-	 *   fragments) MUST be silently discarded.
+-	 */
+-
+-	/* Check for overlap with preceding fragment. */
+-	if (prev &&
+-	    (FRAG6_CB(prev)->offset + prev->len) > offset)
+ 		goto discard_fq;
+ 
+-	/* Look for overlap with succeeding segment. */
+-	if (next && FRAG6_CB(next)->offset < end)
++	err = pskb_trim_rcsum(skb, end - offset);
++	if (err)
+ 		goto discard_fq;
+ 
+-	FRAG6_CB(skb)->offset = offset;
++	/* Note : skb->rbnode and skb->dev share the same location. */
++	dev = skb->dev;
++	/* Makes sure compiler wont do silly aliasing games */
++	barrier();
+ 
+-	/* Insert this fragment in the chain of fragments. */
+-	skb->next = next;
+-	if (!next)
+-		fq->q.fragments_tail = skb;
+-	if (prev)
+-		prev->next = skb;
+-	else
+-		fq->q.fragments = skb;
++	prev_tail = fq->q.fragments_tail;
++	err = inet_frag_queue_insert(&fq->q, skb, offset, end);
++	if (err)
++		goto insert_error;
+ 
+-	dev = skb->dev;
+-	if (dev) {
++	if (dev)
+ 		fq->iif = dev->ifindex;
+-		skb->dev = NULL;
+-	}
++
+ 	fq->q.stamp = skb->tstamp;
+ 	fq->q.meat += skb->len;
+ 	fq->ecn |= ecn;
+ 	add_frag_mem_limit(fq->q.net, skb->truesize);
+ 
++	fragsize = -skb_network_offset(skb) + skb->len;
++	if (fragsize > fq->q.max_size)
++		fq->q.max_size = fragsize;
++
+ 	/* The first fragment.
+ 	 * nhoffset is obtained from the first fragment, of course.
+ 	 */
+@@ -247,44 +215,48 @@ found:
+ 
+ 	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ 	    fq->q.meat == fq->q.len) {
+-		int res;
+ 		unsigned long orefdst = skb->_skb_refdst;
+ 
+ 		skb->_skb_refdst = 0UL;
+-		res = ip6_frag_reasm(fq, prev, dev);
++		err = ip6_frag_reasm(fq, skb, prev_tail, dev);
+ 		skb->_skb_refdst = orefdst;
+-		return res;
++		return err;
+ 	}
+ 
+ 	skb_dst_drop(skb);
+-	return -1;
++	return -EINPROGRESS;
+ 
++insert_error:
++	if (err == IPFRAG_DUP) {
++		kfree_skb(skb);
++		return -EINVAL;
++	}
++	err = -EINVAL;
++	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
++			IPSTATS_MIB_REASM_OVERLAPS);
+ discard_fq:
+ 	inet_frag_kill(&fq->q);
+-err:
+ 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ 			IPSTATS_MIB_REASMFAILS);
++err:
+ 	kfree_skb(skb);
+-	return -1;
++	return err;
+ }
+ 
+ /*
+  *	Check if this packet is complete.
+- *	Returns NULL on failure by any reason, and pointer
+- *	to current nexthdr field in reassembled frame.
+  *
+  *	It is called with locked fq, and caller must check that
+  *	queue is eligible for reassembly i.e. it is not COMPLETE,
+  *	the last and the first frames arrived and all the bits are here.
+  */
+-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+-			  struct net_device *dev)
++static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
++			  struct sk_buff *prev_tail, struct net_device *dev)
+ {
+ 	struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
+-	struct sk_buff *fp, *head = fq->q.fragments;
+-	int    payload_len;
+ 	unsigned int nhoff;
+-	int sum_truesize;
++	void *reasm_data;
++	int payload_len;
+ 	u8 ecn;
+ 
+ 	inet_frag_kill(&fq->q);
+@@ -293,113 +265,40 @@ static int ip6_frag_reasm(struct frag_qu
+ 	if (unlikely(ecn == 0xff))
+ 		goto out_fail;
+ 
+-	/* Make the one we just received the head. */
+-	if (prev) {
+-		head = prev->next;
+-		fp = skb_clone(head, GFP_ATOMIC);
+-
+-		if (!fp)
+-			goto out_oom;
+-
+-		fp->next = head->next;
+-		if (!fp->next)
+-			fq->q.fragments_tail = fp;
+-		prev->next = fp;
+-
+-		skb_morph(head, fq->q.fragments);
+-		head->next = fq->q.fragments->next;
+-
+-		consume_skb(fq->q.fragments);
+-		fq->q.fragments = head;
+-	}
+-
+-	WARN_ON(head == NULL);
+-	WARN_ON(FRAG6_CB(head)->offset != 0);
++	reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
++	if (!reasm_data)
++		goto out_oom;
+ 
+-	/* Unfragmented part is taken from the first segment. */
+-	payload_len = ((head->data - skb_network_header(head)) -
++	payload_len = ((skb->data - skb_network_header(skb)) -
+ 		       sizeof(struct ipv6hdr) + fq->q.len -
+ 		       sizeof(struct frag_hdr));
+ 	if (payload_len > IPV6_MAXPLEN)
+ 		goto out_oversize;
+ 
+-	/* Head of list must not be cloned. */
+-	if (skb_unclone(head, GFP_ATOMIC))
+-		goto out_oom;
+-
+-	/* If the first fragment is fragmented itself, we split
+-	 * it to two chunks: the first with data and paged part
+-	 * and the second, holding only fragments. */
+-	if (skb_has_frag_list(head)) {
+-		struct sk_buff *clone;
+-		int i, plen = 0;
+-
+-		clone = alloc_skb(0, GFP_ATOMIC);
+-		if (!clone)
+-			goto out_oom;
+-		clone->next = head->next;
+-		head->next = clone;
+-		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+-		skb_frag_list_init(head);
+-		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+-			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+-		clone->len = clone->data_len = head->data_len - plen;
+-		head->data_len -= clone->len;
+-		head->len -= clone->len;
+-		clone->csum = 0;
+-		clone->ip_summed = head->ip_summed;
+-		add_frag_mem_limit(fq->q.net, clone->truesize);
+-	}
+-
+ 	/* We have to remove fragment header from datagram and to relocate
+ 	 * header in order to calculate ICV correctly. */
+ 	nhoff = fq->nhoffset;
+-	skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
+-	memmove(head->head + sizeof(struct frag_hdr), head->head,
+-		(head->data - head->head) - sizeof(struct frag_hdr));
+-	if (skb_mac_header_was_set(head))
+-		head->mac_header += sizeof(struct frag_hdr);
+-	head->network_header += sizeof(struct frag_hdr);
+-
+-	skb_reset_transport_header(head);
+-	skb_push(head, head->data - skb_network_header(head));
+-
+-	sum_truesize = head->truesize;
+-	for (fp = head->next; fp;) {
+-		bool headstolen;
+-		int delta;
+-		struct sk_buff *next = fp->next;
+-
+-		sum_truesize += fp->truesize;
+-		if (head->ip_summed != fp->ip_summed)
+-			head->ip_summed = CHECKSUM_NONE;
+-		else if (head->ip_summed == CHECKSUM_COMPLETE)
+-			head->csum = csum_add(head->csum, fp->csum);
+-
+-		if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
+-			kfree_skb_partial(fp, headstolen);
+-		} else {
+-			if (!skb_shinfo(head)->frag_list)
+-				skb_shinfo(head)->frag_list = fp;
+-			head->data_len += fp->len;
+-			head->len += fp->len;
+-			head->truesize += fp->truesize;
+-		}
+-		fp = next;
+-	}
+-	sub_frag_mem_limit(fq->q.net, sum_truesize);
+-
+-	head->next = NULL;
+-	head->dev = dev;
+-	head->tstamp = fq->q.stamp;
+-	ipv6_hdr(head)->payload_len = htons(payload_len);
+-	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
+-	IP6CB(head)->nhoff = nhoff;
+-	IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
++	skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
++	memmove(skb->head + sizeof(struct frag_hdr), skb->head,
++		(skb->data - skb->head) - sizeof(struct frag_hdr));
++	if (skb_mac_header_was_set(skb))
++		skb->mac_header += sizeof(struct frag_hdr);
++	skb->network_header += sizeof(struct frag_hdr);
++
++	skb_reset_transport_header(skb);
++
++	inet_frag_reasm_finish(&fq->q, skb, reasm_data);
++
++	skb->dev = dev;
++	ipv6_hdr(skb)->payload_len = htons(payload_len);
++	ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
++	IP6CB(skb)->nhoff = nhoff;
++	IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
++	IP6CB(skb)->frag_max_size = fq->q.max_size;
+ 
+ 	/* Yes, and fold redundant checksum back. 8) */
+-	skb_postpush_rcsum(head, skb_network_header(head),
+-			   skb_network_header_len(head));
++	skb_postpush_rcsum(skb, skb_network_header(skb),
++			   skb_network_header_len(skb));
+ 
+ 	rcu_read_lock();
+ 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+@@ -407,6 +306,7 @@ static int ip6_frag_reasm(struct frag_qu
+ 	fq->q.fragments = NULL;
+ 	fq->q.rb_fragments = RB_ROOT;
+ 	fq->q.fragments_tail = NULL;
++	fq->q.last_run_head = NULL;
+ 	return 1;
+ 
+ out_oversize:
+@@ -418,6 +318,7 @@ out_fail:
+ 	rcu_read_lock();
+ 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ 	rcu_read_unlock();
++	inet_frag_kill(&fq->q);
+ 	return -1;
+ }
+ 
+@@ -456,10 +357,6 @@ static int ipv6_frag_rcv(struct sk_buff
+ 		return 1;
+ 	}
+ 
+-	if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
+-	    fhdr->frag_off & htons(IP6_MF))
+-		goto fail_hdr;
+-
+ 	iif = skb->dev ? skb->dev->ifindex : 0;
+ 	fq = fq_find(net, fhdr->identification, hdr, iif);
+ 	if (fq) {
+@@ -477,6 +374,7 @@ static int ipv6_frag_rcv(struct sk_buff
+ 		if (prob_offset) {
+ 			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ 					IPSTATS_MIB_INHDRERRORS);
++			/* icmpv6_param_prob() calls kfree_skb(skb) */
+ 			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
+ 		}
+ 		return ret;
diff --git a/queue-4.9/net-ip6-defrag-use-rbtrees-in-nf_conntrack_reasm.c.patch b/queue-4.9/net-ip6-defrag-use-rbtrees-in-nf_conntrack_reasm.c.patch
new file mode 100644
index 00000000000..6495f15309a
--- /dev/null
+++ b/queue-4.9/net-ip6-defrag-use-rbtrees-in-nf_conntrack_reasm.c.patch
@@ -0,0 +1,396 @@
+From foo@baz Tue 30 Apr 2019 12:43:33 PM CEST
+From: Peter Oskolkov <posk@google.com>
+Date: Fri, 26 Apr 2019 08:41:08 -0700
+Subject: net: IP6 defrag: use rbtrees in nf_conntrack_reasm.c
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable@vger.kernel.org, netdev@vger.kernel.org
+Cc: Peter Oskolkov <posk@posk.io>, David Miller <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Sasha Levin <sashal@kernel.org>, Captain Wiggum <captwiggum@gmail.com>, Lars Persson <lists@bofh.nu>, Peter Oskolkov <posk@google.com>, Tom Herbert <tom@herbertland.com>, Florian Westphal <fw@strlen.de>
+Message-ID: <20190426154108.52277-6-posk@google.com>
+
+From: Peter Oskolkov <posk@google.com>
+
+[ Upstream commit 997dd96471641e147cb2c33ad54284000d0f5e35 ]
+
+Currently, IPv6 defragmentation code drops non-last fragments that
+are smaller than 1280 bytes: see
+commit 0ed4229b08c1 ("ipv6: defrag: drop non-last frags smaller than min mtu")
+
+This behavior is not specified in IPv6 RFCs and appears to break
+compatibility with some IPv6 implemenations, as reported here:
+https://www.spinics.net/lists/netdev/msg543846.html
+
+This patch re-uses common IP defragmentation queueing and reassembly
+code in IP6 defragmentation in nf_conntrack, removing the 1280 byte
+restriction.
+
+Signed-off-by: Peter Oskolkov <posk@google.com>
+Reported-by: Tom Herbert <tom@herbertland.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/netfilter/nf_conntrack_reasm.c |  260 +++++++++-----------------------
+ 1 file changed, 74 insertions(+), 186 deletions(-)
+
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -51,14 +51,6 @@
+ 
+ static const char nf_frags_cache_name[] = "nf-frags";
+ 
+-struct nf_ct_frag6_skb_cb
+-{
+-	struct inet6_skb_parm	h;
+-	int			offset;
+-};
+-
+-#define NFCT_FRAG6_CB(skb)	((struct nf_ct_frag6_skb_cb *)((skb)->cb))
+-
+ static struct inet_frags nf_frags;
+ 
+ #ifdef CONFIG_SYSCTL
+@@ -144,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysc
+ }
+ #endif
+ 
++static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
++			     struct sk_buff *prev_tail, struct net_device *dev);
++
+ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+ {
+ 	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+@@ -184,9 +179,10 @@ static struct frag_queue *fq_find(struct
+ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
+ 			     const struct frag_hdr *fhdr, int nhoff)
+ {
+-	struct sk_buff *prev, *next;
+ 	unsigned int payload_len;
+-	int offset, end;
++	struct net_device *dev;
++	struct sk_buff *prev;
++	int offset, end, err;
+ 	u8 ecn;
+ 
+ 	if (fq->q.flags & INET_FRAG_COMPLETE) {
+@@ -261,55 +257,19 @@ static int nf_ct_frag6_queue(struct frag
+ 		goto err;
+ 	}
+ 
+-	/* Find out which fragments are in front and at the back of us
+-	 * in the chain of fragments so far.  We must know where to put
+-	 * this fragment, right?
+-	 */
++	/* Note : skb->rbnode and skb->dev share the same location. */
++	dev = skb->dev;
++	/* Makes sure compiler wont do silly aliasing games */
++	barrier();
++
+ 	prev = fq->q.fragments_tail;
+-	if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
+-		next = NULL;
+-		goto found;
+-	}
+-	prev = NULL;
+-	for (next = fq->q.fragments; next != NULL; next = next->next) {
+-		if (NFCT_FRAG6_CB(next)->offset >= offset)
+-			break;	/* bingo! */
+-		prev = next;
+-	}
+-
+-found:
+-	/* RFC5722, Section 4:
+-	 *                                  When reassembling an IPv6 datagram, if
+-	 *   one or more its constituent fragments is determined to be an
+-	 *   overlapping fragment, the entire datagram (and any constituent
+-	 *   fragments, including those not yet received) MUST be silently
+-	 *   discarded.
+-	 */
++	err = inet_frag_queue_insert(&fq->q, skb, offset, end);
++	if (err)
++		goto insert_error;
++
++	if (dev)
++		fq->iif = dev->ifindex;
+ 
+-	/* Check for overlap with preceding fragment. */
+-	if (prev &&
+-	    (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
+-		goto discard_fq;
+-
+-	/* Look for overlap with succeeding segment. */
+-	if (next && NFCT_FRAG6_CB(next)->offset < end)
+-		goto discard_fq;
+-
+-	NFCT_FRAG6_CB(skb)->offset = offset;
+-
+-	/* Insert this fragment in the chain of fragments. */
+-	skb->next = next;
+-	if (!next)
+-		fq->q.fragments_tail = skb;
+-	if (prev)
+-		prev->next = skb;
+-	else
+-		fq->q.fragments = skb;
+-
+-	if (skb->dev) {
+-		fq->iif = skb->dev->ifindex;
+-		skb->dev = NULL;
+-	}
+ 	fq->q.stamp = skb->tstamp;
+ 	fq->q.meat += skb->len;
+ 	fq->ecn |= ecn;
+@@ -325,11 +285,25 @@ found:
+ 		fq->q.flags |= INET_FRAG_FIRST_IN;
+ 	}
+ 
+-	return 0;
++	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
++	    fq->q.meat == fq->q.len) {
++		unsigned long orefdst = skb->_skb_refdst;
++
++		skb->_skb_refdst = 0UL;
++		err = nf_ct_frag6_reasm(fq, skb, prev, dev);
++		skb->_skb_refdst = orefdst;
++		return err;
++	}
++
++	skb_dst_drop(skb);
++	return -EINPROGRESS;
+ 
+-discard_fq:
++insert_error:
++	if (err == IPFRAG_DUP)
++		goto err;
+ 	inet_frag_kill(&fq->q);
+ err:
++	skb_dst_drop(skb);
+ 	return -EINVAL;
+ }
+ 
+@@ -339,141 +313,67 @@ err:
+  *	It is called with locked fq, and caller must check that
+  *	queue is eligible for reassembly i.e. it is not COMPLETE,
+  *	the last and the first frames arrived and all the bits are here.
+- *
+- *	returns true if *prev skb has been transformed into the reassembled
+- *	skb, false otherwise.
+  */
+-static bool
+-nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_device *dev)
++static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
++			     struct sk_buff *prev_tail, struct net_device *dev)
+ {
+-	struct sk_buff *fp, *head = fq->q.fragments;
+-	int    payload_len;
++	void *reasm_data;
++	int payload_len;
+ 	u8 ecn;
+ 
+ 	inet_frag_kill(&fq->q);
+ 
+-	WARN_ON(head == NULL);
+-	WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+-
+ 	ecn = ip_frag_ecn_table[fq->ecn];
+ 	if (unlikely(ecn == 0xff))
+-		return false;
++		goto err;
++
++	reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
++	if (!reasm_data)
++		goto err;
+ 
+-	/* Unfragmented part is taken from the first segment. */
+-	payload_len = ((head->data - skb_network_header(head)) -
++	payload_len = ((skb->data - skb_network_header(skb)) -
+ 		       sizeof(struct ipv6hdr) + fq->q.len -
+ 		       sizeof(struct frag_hdr));
+ 	if (payload_len > IPV6_MAXPLEN) {
+ 		net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
+ 				    payload_len);
+-		return false;
+-	}
+-
+-	/* Head of list must not be cloned. */
+-	if (skb_unclone(head, GFP_ATOMIC))
+-		return false;
+-
+-	/* If the first fragment is fragmented itself, we split
+-	 * it to two chunks: the first with data and paged part
+-	 * and the second, holding only fragments. */
+-	if (skb_has_frag_list(head)) {
+-		struct sk_buff *clone;
+-		int i, plen = 0;
+-
+-		clone = alloc_skb(0, GFP_ATOMIC);
+-		if (clone == NULL)
+-			return false;
+-
+-		clone->next = head->next;
+-		head->next = clone;
+-		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+-		skb_frag_list_init(head);
+-		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+-			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+-		clone->len = clone->data_len = head->data_len - plen;
+-		head->data_len -= clone->len;
+-		head->len -= clone->len;
+-		clone->csum = 0;
+-		clone->ip_summed = head->ip_summed;
+-
+-		add_frag_mem_limit(fq->q.net, clone->truesize);
+-	}
+-
+-	/* morph head into last received skb: prev.
+-	 *
+-	 * This allows callers of ipv6 conntrack defrag to continue
+-	 * to use the last skb(frag) passed into the reasm engine.
+-	 * The last skb frag 'silently' turns into the full reassembled skb.
+-	 *
+-	 * Since prev is also part of q->fragments we have to clone it first.
+-	 */
+-	if (head != prev) {
+-		struct sk_buff *iter;
+-
+-		fp = skb_clone(prev, GFP_ATOMIC);
+-		if (!fp)
+-			return false;
+-
+-		fp->next = prev->next;
+-
+-		iter = head;
+-		while (iter) {
+-			if (iter->next == prev) {
+-				iter->next = fp;
+-				break;
+-			}
+-			iter = iter->next;
+-		}
+-
+-		skb_morph(prev, head);
+-		prev->next = head->next;
+-		consume_skb(head);
+-		head = prev;
++		goto err;
+ 	}
+ 
+ 	/* We have to remove fragment header from datagram and to relocate
+ 	 * header in order to calculate ICV correctly. */
+-	skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
+-	memmove(head->head + sizeof(struct frag_hdr), head->head,
+-		(head->data - head->head) - sizeof(struct frag_hdr));
+-	head->mac_header += sizeof(struct frag_hdr);
+-	head->network_header += sizeof(struct frag_hdr);
+-
+-	skb_shinfo(head)->frag_list = head->next;
+-	skb_reset_transport_header(head);
+-	skb_push(head, head->data - skb_network_header(head));
+-
+-	for (fp = head->next; fp; fp = fp->next) {
+-		head->data_len += fp->len;
+-		head->len += fp->len;
+-		if (head->ip_summed != fp->ip_summed)
+-			head->ip_summed = CHECKSUM_NONE;
+-		else if (head->ip_summed == CHECKSUM_COMPLETE)
+-			head->csum = csum_add(head->csum, fp->csum);
+-		head->truesize += fp->truesize;
+-		fp->sk = NULL;
+-	}
+-	sub_frag_mem_limit(fq->q.net, head->truesize);
+-
+-	head->ignore_df = 1;
+-	head->next = NULL;
+-	head->dev = dev;
+-	head->tstamp = fq->q.stamp;
+-	ipv6_hdr(head)->payload_len = htons(payload_len);
+-	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
+-	IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
++	skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0];
++	memmove(skb->head + sizeof(struct frag_hdr), skb->head,
++		(skb->data - skb->head) - sizeof(struct frag_hdr));
++	skb->mac_header += sizeof(struct frag_hdr);
++	skb->network_header += sizeof(struct frag_hdr);
++
++	skb_reset_transport_header(skb);
++
++	inet_frag_reasm_finish(&fq->q, skb, reasm_data);
++
++	skb->ignore_df = 1;
++	skb->dev = dev;
++	ipv6_hdr(skb)->payload_len = htons(payload_len);
++	ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
++	IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
+ 
+ 	/* Yes, and fold redundant checksum back. 8) */
+-	if (head->ip_summed == CHECKSUM_COMPLETE)
+-		head->csum = csum_partial(skb_network_header(head),
+-					  skb_network_header_len(head),
+-					  head->csum);
++	if (skb->ip_summed == CHECKSUM_COMPLETE)
++		skb->csum = csum_partial(skb_network_header(skb),
++					 skb_network_header_len(skb),
++					 skb->csum);
+ 
+ 	fq->q.fragments = NULL;
+ 	fq->q.rb_fragments = RB_ROOT;
+ 	fq->q.fragments_tail = NULL;
++	fq->q.last_run_head = NULL;
++
++	return 0;
+ 
+-	return true;
++err:
++	inet_frag_kill(&fq->q);
++	return -EINVAL;
+ }
+ 
+ /*
+@@ -542,7 +442,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *
+ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
+ {
+ 	u16 savethdr = skb->transport_header;
+-	struct net_device *dev = skb->dev;
+ 	int fhoff, nhoff, ret;
+ 	struct frag_hdr *fhdr;
+ 	struct frag_queue *fq;
+@@ -565,10 +464,6 @@ int nf_ct_frag6_gather(struct net *net,
+ 	hdr = ipv6_hdr(skb);
+ 	fhdr = (struct frag_hdr *)skb_transport_header(skb);
+ 
+-	if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
+-	    fhdr->frag_off & htons(IP6_MF))
+-		return -EINVAL;
+-
+ 	skb_orphan(skb);
+ 	fq = fq_find(net, fhdr->identification, user, hdr,
+ 		     skb->dev ? skb->dev->ifindex : 0);
+@@ -580,24 +475,17 @@ int nf_ct_frag6_gather(struct net *net,
+ 	spin_lock_bh(&fq->q.lock);
+ 
+ 	ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
+-	if (ret < 0) {
+-		if (ret == -EPROTO) {
+-			skb->transport_header = savethdr;
+-			ret = 0;
+-		}
+-		goto out_unlock;
++	if (ret == -EPROTO) {
++		skb->transport_header = savethdr;
++		ret = 0;
+ 	}
+ 
+ 	/* after queue has assumed skb ownership, only 0 or -EINPROGRESS
+ 	 * must be returned.
+ 	 */
+-	ret = -EINPROGRESS;
+-	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+-	    fq->q.meat == fq->q.len &&
+-	    nf_ct_frag6_reasm(fq, skb, dev))
+-		ret = 0;
++	if (ret)
++		ret = -EINPROGRESS;
+ 
+-out_unlock:
+ 	spin_unlock_bh(&fq->q.lock);
+ 	inet_frag_put(&fq->q);
+ 	return ret;
diff --git a/queue-4.9/series b/queue-4.9/series
index 43c3b76ebe4..5790b670f1d 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -34,3 +34,8 @@ net-rds-exchange-of-8k-and-1m-pool.patch
 team-fix-possible-recursive-locking-when-add-slaves.patch
 net-stmmac-move-stmmac_check_ether_addr-to-driver-probe.patch
 ipv4-set-the-tcp_min_rtt_wlen-range-from-0-to-one-day.patch
+ipv6-frags-fix-a-lockdep-false-positive.patch
+net-ip-defrag-encapsulate-rbtree-defrag-code-into-callable-functions.patch
+ipv6-remove-dependency-of-nf_defrag_ipv6-on-ipv6-module.patch
+net-ip6-defrag-use-rbtrees-for-ipv6-defrag.patch
+net-ip6-defrag-use-rbtrees-in-nf_conntrack_reasm.c.patch