--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Mon, 21 Dec 2015 10:55:45 -0800
+Subject: addrconf: always initialize sysctl table data
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 5449a5ca9bc27dd51a462de7ca0b1cd861cd2bd0 ]
+
+When sysctl performs restrict writes, it allows to write from
+a middle position of a sysctl file, which requires us to initialize
+the table data before calling proc_dostring() for the write case.
+
+Fixes: 3d1bec99320d ("ipv6: introduce secret_stable to ipv6_devconf")
+Reported-by: Sasha Levin <sasha.levin@oracle.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Tested-by: Sasha Levin <sasha.levin@oracle.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -5267,13 +5267,10 @@ static int addrconf_sysctl_stable_secret
+ goto out;
+ }
+
+- if (!write) {
+- err = snprintf(str, sizeof(str), "%pI6",
+- &secret->secret);
+- if (err >= sizeof(str)) {
+- err = -EIO;
+- goto out;
+- }
++ err = snprintf(str, sizeof(str), "%pI6", &secret->secret);
++ if (err >= sizeof(str)) {
++ err = -EIO;
++ goto out;
+ }
+
+ err = proc_dostring(&lctl, write, buffer, lenp, ppos);
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Sven Eckelmann <sven@narfation.org>
+Date: Thu, 14 Jan 2016 15:28:19 +0100
+Subject: batman-adv: Avoid recursive call_rcu for batadv_bla_claim
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit 63b399272294e7a939cde41792dca38c549f0484 ]
+
+The batadv_claim_free_ref function uses call_rcu to delay the free of the
+batadv_bla_claim object until no (already started) rcu_read_lock is enabled
+anymore. This makes sure that no context is still trying to access the
+object which should be removed. But batadv_bla_claim also contains a
+reference to backbone_gw which must be removed.
+
+The reference drop of backbone_gw was done in the call_rcu function
+batadv_claim_free_rcu but should actually be done in the
+batadv_claim_release function to avoid nested call_rcus. This is important
+because rcu_barrier (e.g. batadv_softif_free or batadv_exit) will not
+detect the inner call_rcu as relevant for its execution. Otherwise this
+barrier will most likely be inserted in the queue before the callback of
+the first call_rcu was executed. The caller of rcu_barrier will therefore
+continue to run before the inner call_rcu callback finished.
+
+Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code")
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Acked-by: Simon Wunderlich <sw@simonwunderlich.de>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Antonio Quartulli <a@unstable.cc>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/bridge_loop_avoidance.c | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+--- a/net/batman-adv/bridge_loop_avoidance.c
++++ b/net/batman-adv/bridge_loop_avoidance.c
+@@ -112,21 +112,17 @@ batadv_backbone_gw_free_ref(struct batad
+ }
+
+ /* finally deinitialize the claim */
+-static void batadv_claim_free_rcu(struct rcu_head *rcu)
++static void batadv_claim_release(struct batadv_bla_claim *claim)
+ {
+- struct batadv_bla_claim *claim;
+-
+- claim = container_of(rcu, struct batadv_bla_claim, rcu);
+-
+ batadv_backbone_gw_free_ref(claim->backbone_gw);
+- kfree(claim);
++ kfree_rcu(claim, rcu);
+ }
+
+ /* free a claim, call claim_free_rcu if its the last reference */
+ static void batadv_claim_free_ref(struct batadv_bla_claim *claim)
+ {
+ if (atomic_dec_and_test(&claim->refcount))
+- call_rcu(&claim->rcu, batadv_claim_free_rcu);
++ batadv_claim_release(claim);
+ }
+
+ /**
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Sven Eckelmann <sven@narfation.org>
+Date: Tue, 5 Jan 2016 12:06:19 +0100
+Subject: batman-adv: Avoid recursive call_rcu for batadv_nc_node
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit 44e8e7e91d6c7c7ab19688750f7257292640d1a0 ]
+
+The batadv_nc_node_free_ref function uses call_rcu to delay the free of the
+batadv_nc_node object until no (already started) rcu_read_lock is enabled
+anymore. This makes sure that no context is still trying to access the
+object which should be removed. But batadv_nc_node also contains a
+reference to orig_node which must be removed.
+
+The reference drop of orig_node was done in the call_rcu function
+batadv_nc_node_free_rcu but should actually be done in the
+batadv_nc_node_release function to avoid nested call_rcus. This is
+important because rcu_barrier (e.g. batadv_softif_free or batadv_exit) will
+not detect the inner call_rcu as relevant for its execution. Otherwise this
+barrier will most likely be inserted in the queue before the callback of
+the first call_rcu was executed. The caller of rcu_barrier will therefore
+continue to run before the inner call_rcu callback finished.
+
+Fixes: d56b1705e28c ("batman-adv: network coding - detect coding nodes and remove these after timeout")
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Antonio Quartulli <a@unstable.cc>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/network-coding.c | 19 ++++++++-----------
+ 1 file changed, 8 insertions(+), 11 deletions(-)
+
+--- a/net/batman-adv/network-coding.c
++++ b/net/batman-adv/network-coding.c
+@@ -175,28 +175,25 @@ void batadv_nc_init_orig(struct batadv_o
+ }
+
+ /**
+- * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove
+- * its refcount on the orig_node
+- * @rcu: rcu pointer of the nc node
++ * batadv_nc_node_release - release nc_node from lists and queue for free after
++ * rcu grace period
++ * @nc_node: the nc node to free
+ */
+-static void batadv_nc_node_free_rcu(struct rcu_head *rcu)
++static void batadv_nc_node_release(struct batadv_nc_node *nc_node)
+ {
+- struct batadv_nc_node *nc_node;
+-
+- nc_node = container_of(rcu, struct batadv_nc_node, rcu);
+ batadv_orig_node_free_ref(nc_node->orig_node);
+- kfree(nc_node);
++ kfree_rcu(nc_node, rcu);
+ }
+
+ /**
+- * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly
+- * frees it
++ * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly
++ * release it
+ * @nc_node: the nc node to free
+ */
+ static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node)
+ {
+ if (atomic_dec_and_test(&nc_node->refcount))
+- call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu);
++ batadv_nc_node_release(nc_node);
+ }
+
+ /**
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Sven Eckelmann <sven@narfation.org>
+Date: Tue, 5 Jan 2016 12:06:25 +0100
+Subject: batman-adv: Drop immediate batadv_hard_iface free function
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit b4d922cfc9c08318eeb77d53b7633740e6b0efb0 ]
+
+It is not allowed to free the memory of an object which is part of a list
+which is protected by rcu-read-side-critical sections without making sure
+that no other context is accessing the object anymore. This usually happens
+by removing the references to this object and then waiting until the rcu
+grace period is over and no one (allowedly) accesses it anymore.
+
+But the _now functions ignore this completely. They free the object
+directly even when a different context still tries to access it. This has
+to be avoided and thus these functions must be removed and all functions
+have to use batadv_hardif_free_ref.
+
+Fixes: 89652331c00f ("batman-adv: split tq information in neigh_node struct")
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Antonio Quartulli <a@unstable.cc>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/hard-interface.h | 12 ------------
+ net/batman-adv/originator.c | 16 +++++++---------
+ 2 files changed, 7 insertions(+), 21 deletions(-)
+
+--- a/net/batman-adv/hard-interface.h
++++ b/net/batman-adv/hard-interface.h
+@@ -64,18 +64,6 @@ batadv_hardif_free_ref(struct batadv_har
+ call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu);
+ }
+
+-/**
+- * batadv_hardif_free_ref_now - decrement the hard interface refcounter and
+- * possibly free it (without rcu callback)
+- * @hard_iface: the hard interface to free
+- */
+-static inline void
+-batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface)
+-{
+- if (atomic_dec_and_test(&hard_iface->refcount))
+- batadv_hardif_free_rcu(&hard_iface->rcu);
+-}
+-
+ static inline struct batadv_hard_iface *
+ batadv_primary_if_get_selected(struct batadv_priv *bat_priv)
+ {
+--- a/net/batman-adv/originator.c
++++ b/net/batman-adv/originator.c
+@@ -175,24 +175,22 @@ void batadv_neigh_ifinfo_free_ref(struct
+ }
+
+ /**
+- * batadv_neigh_node_free_rcu - free the neigh_node
+- * @rcu: rcu pointer of the neigh_node
++ * batadv_neigh_node_release - release neigh_node from lists and queue for
++ * free after rcu grace period
++ * @neigh_node: neigh neighbor to free
+ */
+-static void batadv_neigh_node_free_rcu(struct rcu_head *rcu)
++static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node)
+ {
+ struct hlist_node *node_tmp;
+- struct batadv_neigh_node *neigh_node;
+ struct batadv_neigh_ifinfo *neigh_ifinfo;
+
+- neigh_node = container_of(rcu, struct batadv_neigh_node, rcu);
+-
+ hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
+ &neigh_node->ifinfo_list, list) {
+ batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+ }
+- batadv_hardif_free_ref_now(neigh_node->if_incoming);
++ batadv_hardif_free_ref(neigh_node->if_incoming);
+
+- kfree(neigh_node);
++ kfree_rcu(neigh_node, rcu);
+ }
+
+ /**
+@@ -203,7 +201,7 @@ static void batadv_neigh_node_free_rcu(s
+ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node)
+ {
+ if (atomic_dec_and_test(&neigh_node->refcount))
+- call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu);
++ batadv_neigh_node_release(neigh_node);
+ }
+
+ /**
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Sven Eckelmann <sven@narfation.org>
+Date: Tue, 5 Jan 2016 12:06:22 +0100
+Subject: batman-adv: Drop immediate batadv_neigh_node free function
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit 2baa753c276f27f8e844637561ad597867aa6fb6 ]
+
+It is not allowed to free the memory of an object which is part of a list
+which is protected by rcu-read-side-critical sections without making sure
+that no other context is accessing the object anymore. This usually happens
+by removing the references to this object and then waiting until the rcu
+grace period is over and no one (allowedly) accesses it anymore.
+
+But the _now functions ignore this completely. They free the object
+directly even when a different context still tries to access it. This has
+to be avoided and thus these functions must be removed and all functions
+have to use batadv_neigh_node_free_ref.
+
+Fixes: 89652331c00f ("batman-adv: split tq information in neigh_node struct")
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Antonio Quartulli <a@unstable.cc>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/originator.c | 33 ++++++++++-----------------------
+ 1 file changed, 10 insertions(+), 23 deletions(-)
+
+--- a/net/batman-adv/originator.c
++++ b/net/batman-adv/originator.c
+@@ -210,20 +210,8 @@ static void batadv_neigh_node_free_rcu(s
+ }
+
+ /**
+- * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter
+- * and possibly free it (without rcu callback)
+- * @neigh_node: neigh neighbor to free
+- */
+-static void
+-batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node)
+-{
+- if (atomic_dec_and_test(&neigh_node->refcount))
+- batadv_neigh_node_free_rcu(&neigh_node->rcu);
+-}
+-
+-/**
+ * batadv_neigh_node_free_ref - decrement the neighbors refcounter
+- * and possibly free it
++ * and possibly release it
+ * @neigh_node: neigh neighbor to free
+ */
+ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node)
+@@ -495,24 +483,23 @@ batadv_neigh_node_get(const struct batad
+ }
+
+ /**
+- * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object
+- * @rcu: rcu pointer of the orig_ifinfo object
++ * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
++ * free after rcu grace period
++ * @orig_ifinfo: the orig_ifinfo object to release
+ */
+-static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu)
++static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo)
+ {
+- struct batadv_orig_ifinfo *orig_ifinfo;
+ struct batadv_neigh_node *router;
+
+- orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu);
+-
+ if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
+- batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing);
++ batadv_hardif_free_ref(orig_ifinfo->if_outgoing);
+
+ /* this is the last reference to this object */
+ router = rcu_dereference_protected(orig_ifinfo->router, true);
+ if (router)
+- batadv_neigh_node_free_ref_now(router);
+- kfree(orig_ifinfo);
++ batadv_neigh_node_free_ref(router);
++
++ kfree_rcu(orig_ifinfo, rcu);
+ }
+
+ /**
+@@ -523,7 +510,7 @@ static void batadv_orig_ifinfo_free_rcu(
+ void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo)
+ {
+ if (atomic_dec_and_test(&orig_ifinfo->refcount))
+- call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu);
++ batadv_orig_ifinfo_release(orig_ifinfo);
+ }
+
+ /**
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Sven Eckelmann <sven@narfation.org>
+Date: Tue, 5 Jan 2016 12:06:21 +0100
+Subject: batman-adv: Drop immediate batadv_orig_ifinfo free function
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit deed96605f5695cb945e0b3d79429581857a2b9d ]
+
+It is not allowed to free the memory of an object which is part of a list
+which is protected by rcu-read-side-critical sections without making sure
+that no other context is accessing the object anymore. This usually happens
+by removing the references to this object and then waiting until the rcu
+grace period is over and no one (allowedly) accesses it anymore.
+
+But the _now functions ignore this completely. They free the object
+directly even when a different context still tries to access it. This has
+to be avoided and thus these functions must be removed and all functions
+have to use batadv_orig_ifinfo_free_ref.
+
+Fixes: 7351a4822d42 ("batman-adv: split out router from orig_node")
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Antonio Quartulli <a@unstable.cc>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/originator.c | 59 +++++++++++++++++++++++---------------------
+ 1 file changed, 31 insertions(+), 28 deletions(-)
+
+--- a/net/batman-adv/originator.c
++++ b/net/batman-adv/originator.c
+@@ -516,76 +516,79 @@ static void batadv_orig_ifinfo_free_rcu(
+ }
+
+ /**
+- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free
+- * the orig_ifinfo (without rcu callback)
++ * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly release
++ * the orig_ifinfo
+ * @orig_ifinfo: the orig_ifinfo object to release
+ */
+-static void
+-batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo)
++void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo)
+ {
+ if (atomic_dec_and_test(&orig_ifinfo->refcount))
+- batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu);
++ call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu);
+ }
+
+ /**
+- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free
+- * the orig_ifinfo
+- * @orig_ifinfo: the orig_ifinfo object to release
++ * batadv_orig_node_free_rcu - free the orig_node
++ * @rcu: rcu pointer of the orig_node
+ */
+-void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo)
++static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
+ {
+- if (atomic_dec_and_test(&orig_ifinfo->refcount))
+- call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu);
++ struct batadv_orig_node *orig_node;
++
++ orig_node = container_of(rcu, struct batadv_orig_node, rcu);
++
++ batadv_mcast_purge_orig(orig_node);
++
++ batadv_frag_purge_orig(orig_node, NULL);
++
++ if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
++ orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
++
++ kfree(orig_node->tt_buff);
++ kfree(orig_node);
+ }
+
+-static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
++/**
++ * batadv_orig_node_release - release orig_node from lists and queue for
++ * free after rcu grace period
++ * @orig_node: the orig node to free
++ */
++static void batadv_orig_node_release(struct batadv_orig_node *orig_node)
+ {
+ struct hlist_node *node_tmp;
+ struct batadv_neigh_node *neigh_node;
+- struct batadv_orig_node *orig_node;
+ struct batadv_orig_ifinfo *orig_ifinfo;
+
+- orig_node = container_of(rcu, struct batadv_orig_node, rcu);
+-
+ spin_lock_bh(&orig_node->neigh_list_lock);
+
+ /* for all neighbors towards this originator ... */
+ hlist_for_each_entry_safe(neigh_node, node_tmp,
+ &orig_node->neigh_list, list) {
+ hlist_del_rcu(&neigh_node->list);
+- batadv_neigh_node_free_ref_now(neigh_node);
++ batadv_neigh_node_free_ref(neigh_node);
+ }
+
+ hlist_for_each_entry_safe(orig_ifinfo, node_tmp,
+ &orig_node->ifinfo_list, list) {
+ hlist_del_rcu(&orig_ifinfo->list);
+- batadv_orig_ifinfo_free_ref_now(orig_ifinfo);
++ batadv_orig_ifinfo_free_ref(orig_ifinfo);
+ }
+ spin_unlock_bh(&orig_node->neigh_list_lock);
+
+- batadv_mcast_purge_orig(orig_node);
+-
+ /* Free nc_nodes */
+ batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
+
+- batadv_frag_purge_orig(orig_node, NULL);
+-
+- if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
+- orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+-
+- kfree(orig_node->tt_buff);
+- kfree(orig_node);
++ call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
+ }
+
+ /**
+ * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly
+- * schedule an rcu callback for freeing it
++ * release it
+ * @orig_node: the orig node to free
+ */
+ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node)
+ {
+ if (atomic_dec_and_test(&orig_node->refcount))
+- call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
++ batadv_orig_node_release(orig_node);
+ }
+
+ /**
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Sven Eckelmann <sven@narfation.org>
+Date: Tue, 5 Jan 2016 12:06:24 +0100
+Subject: batman-adv: Drop immediate neigh_ifinfo free function
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit ae3e1e36e3cb6c686a7a2725af20ca86aa46d62a ]
+
+It is not allowed to free the memory of an object which is part of a list
+which is protected by rcu-read-side-critical sections without making sure
+that no other context is accessing the object anymore. This usually happens
+by removing the references to this object and then waiting until the rcu
+grace period is over and no one (allowedly) accesses it anymore.
+
+But the _now functions ignore this completely. They free the object
+directly even when a different context still tries to access it. This has
+to be avoided and thus these functions must be removed and all functions
+have to use batadv_neigh_ifinfo_free_ref.
+
+Fixes: 89652331c00f ("batman-adv: split tq information in neigh_node struct")
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Antonio Quartulli <a@unstable.cc>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/originator.c | 34 ++++++++++------------------------
+ 1 file changed, 10 insertions(+), 24 deletions(-)
+
+--- a/net/batman-adv/originator.c
++++ b/net/batman-adv/originator.c
+@@ -150,42 +150,28 @@ err:
+ }
+
+ /**
+- * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object
+- * @rcu: rcu pointer of the neigh_ifinfo object
+- */
+-static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu)
+-{
+- struct batadv_neigh_ifinfo *neigh_ifinfo;
+-
+- neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu);
+-
+- if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
+- batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing);
+-
+- kfree(neigh_ifinfo);
+-}
+-
+-/**
+- * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free
+- * the neigh_ifinfo (without rcu callback)
++ * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for
++ * free after rcu grace period
+ * @neigh_ifinfo: the neigh_ifinfo object to release
+ */
+ static void
+-batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo)
++batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo)
+ {
+- if (atomic_dec_and_test(&neigh_ifinfo->refcount))
+- batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu);
++ if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
++ batadv_hardif_free_ref(neigh_ifinfo->if_outgoing);
++
++ kfree_rcu(neigh_ifinfo, rcu);
+ }
+
+ /**
+- * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free
++ * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly release
+ * the neigh_ifinfo
+ * @neigh_ifinfo: the neigh_ifinfo object to release
+ */
+ void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo)
+ {
+ if (atomic_dec_and_test(&neigh_ifinfo->refcount))
+- call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu);
++ batadv_neigh_ifinfo_release(neigh_ifinfo);
+ }
+
+ /**
+@@ -202,7 +188,7 @@ static void batadv_neigh_node_free_rcu(s
+
+ hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
+ &neigh_node->ifinfo_list, list) {
+- batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo);
++ batadv_neigh_ifinfo_free_ref(neigh_ifinfo);
+ }
+ batadv_hardif_free_ref_now(neigh_node->if_incoming);
+
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Sven Eckelmann <sven@narfation.org>
+Date: Tue, 5 Jan 2016 12:06:20 +0100
+Subject: batman-adv: Drop immediate orig_node free function
+
+From: Sven Eckelmann <sven@narfation.org>
+
+[ Upstream commit 42eff6a617e23b691f8e4467f4687ed7245a92db ]
+
+It is not allowed to free the memory of an object which is part of a list
+which is protected by rcu-read-side-critical sections without making sure
+that no other context is accessing the object anymore. This usually happens
+by removing the references to this object and then waiting until the rcu
+grace period is over and no one (allowedly) accesses it anymore.
+
+But the _now functions ignore this completely. They free the object
+directly even when a different context still tries to access it. This has
+to be avoided and thus these functions must be removed and all functions
+have to use batadv_orig_node_free_ref.
+
+Fixes: 72822225bd41 ("batman-adv: Fix rcu_barrier() miss due to double call_rcu() in TT code")
+Signed-off-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
+Signed-off-by: Antonio Quartulli <a@unstable.cc>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/batman-adv/originator.c | 11 -----------
+ net/batman-adv/originator.h | 1 -
+ net/batman-adv/translation-table.c | 28 +++++++++++++---------------
+ 3 files changed, 13 insertions(+), 27 deletions(-)
+
+--- a/net/batman-adv/originator.c
++++ b/net/batman-adv/originator.c
+@@ -562,17 +562,6 @@ void batadv_orig_node_free_ref(struct ba
+ batadv_orig_node_release(orig_node);
+ }
+
+-/**
+- * batadv_orig_node_free_ref_now - decrement the orig node refcounter and
+- * possibly free it (without rcu callback)
+- * @orig_node: the orig node to free
+- */
+-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node)
+-{
+- if (atomic_dec_and_test(&orig_node->refcount))
+- batadv_orig_node_free_rcu(&orig_node->rcu);
+-}
+-
+ void batadv_originator_free(struct batadv_priv *bat_priv)
+ {
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+--- a/net/batman-adv/originator.h
++++ b/net/batman-adv/originator.h
+@@ -25,7 +25,6 @@ int batadv_originator_init(struct batadv
+ void batadv_originator_free(struct batadv_priv *bat_priv);
+ void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
+ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node);
+-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
+ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
+ const uint8_t *addr);
+ struct batadv_neigh_node *
+--- a/net/batman-adv/translation-table.c
++++ b/net/batman-adv/translation-table.c
+@@ -219,20 +219,6 @@ int batadv_tt_global_hash_count(struct b
+ return count;
+ }
+
+-static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
+-{
+- struct batadv_tt_orig_list_entry *orig_entry;
+-
+- orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
+-
+- /* We are in an rcu callback here, therefore we cannot use
+- * batadv_orig_node_free_ref() and its call_rcu():
+- * An rcu_barrier() wouldn't wait for that to finish
+- */
+- batadv_orig_node_free_ref_now(orig_entry->orig_node);
+- kfree(orig_entry);
+-}
+-
+ /**
+ * batadv_tt_local_size_mod - change the size by v of the local table identified
+ * by vid
+@@ -328,13 +314,25 @@ static void batadv_tt_global_size_dec(st
+ batadv_tt_global_size_mod(orig_node, vid, -1);
+ }
+
++/**
++ * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
++ * queue for free after rcu grace period
++ * @orig_entry: tt orig entry to be free'd
++ */
++static void
++batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry)
++{
++ batadv_orig_node_free_ref(orig_entry->orig_node);
++ kfree_rcu(orig_entry, rcu);
++}
++
+ static void
+ batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
+ {
+ if (!atomic_dec_and_test(&orig_entry->refcount))
+ return;
+
+- call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
++ batadv_tt_orig_list_entry_release(orig_entry);
+ }
+
+ /**
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Karl Heiss <kheiss@gmail.com>
+Date: Mon, 11 Jan 2016 08:28:43 -0500
+Subject: bonding: Prevent IPv6 link local address on enslaved devices
+
+From: Karl Heiss <kheiss@gmail.com>
+
+[ Upstream commit 03d84a5f83a67e692af00a3d3901e7820e3e84d5 ]
+
+Commit 1f718f0f4f97 ("bonding: populate neighbour's private on enslave")
+undoes the fix provided by commit c2edacf80e15 ("bonding / ipv6: no addrconf
+for slaves separately from master") by effectively setting the slave flag
+after the slave has been opened. If the slave comes up quickly enough, it
+will go through the IPv6 addrconf before the slave flag has been set and
+will get a link local IPv6 address.
+
+In order to ensure that addrconf knows to ignore the slave devices on state
+change, set IFF_SLAVE before dev_open() during bonding enslavement.
+
+Fixes: 1f718f0f4f97 ("bonding: populate neighbour's private on enslave")
+Signed-off-by: Karl Heiss <kheiss@gmail.com>
+Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Reviewed-by: Jarod Wilson <jarod@redhat.com>
+Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -1194,7 +1194,6 @@ static int bond_master_upper_dev_link(st
+ err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave);
+ if (err)
+ return err;
+- slave_dev->flags |= IFF_SLAVE;
+ rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
+ return 0;
+ }
+@@ -1452,6 +1451,9 @@ int bond_enslave(struct net_device *bond
+ }
+ }
+
++ /* set slave flag before open to prevent IPv6 addrconf */
++ slave_dev->flags |= IFF_SLAVE;
++
+ /* open the slave since the application closed it */
+ res = dev_open(slave_dev);
+ if (res) {
+@@ -1712,6 +1714,7 @@ err_close:
+ dev_close(slave_dev);
+
+ err_restore_mac:
++ slave_dev->flags &= ~IFF_SLAVE;
+ if (!bond->params.fail_over_mac ||
+ BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+ /* XXX TODO - fom follow mode needs to change master's
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Fri, 15 Jan 2016 19:03:54 +0100
+Subject: bridge: fix lockdep addr_list_lock false positive splat
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit c6894dec8ea9ae05747124dce98b3b5c2e69b168 ]
+
+After promisc mode management was introduced a bridge device could do
+dev_set_promiscuity from its ndo_change_rx_flags() callback which in
+turn can be called after the bridge's addr_list_lock has been taken
+(e.g. by dev_uc_add). This causes a false positive lockdep splat because
+the port interfaces' addr_list_lock is taken when br_manage_promisc()
+runs after the bridge's addr list lock was already taken.
+To remove the false positive introduce a custom bridge addr_list_lock
+class and set it on bridge init.
+A simple way to reproduce this is with the following:
+$ brctl addbr br0
+$ ip l add l br0 br0.100 type vlan id 100
+$ ip l set br0 up
+$ ip l set br0.100 up
+$ echo 1 > /sys/class/net/br0/bridge/vlan_filtering
+$ brctl addif br0 eth0
+Splat:
+[ 43.684325] =============================================
+[ 43.684485] [ INFO: possible recursive locking detected ]
+[ 43.684636] 4.4.0-rc8+ #54 Not tainted
+[ 43.684755] ---------------------------------------------
+[ 43.684906] brctl/1187 is trying to acquire lock:
+[ 43.685047] (_xmit_ETHER){+.....}, at: [<ffffffff8150169e>] dev_set_rx_mode+0x1e/0x40
+[ 43.685460] but task is already holding lock:
+[ 43.685618] (_xmit_ETHER){+.....}, at: [<ffffffff815072a7>] dev_uc_add+0x27/0x80
+[ 43.686015] other info that might help us debug this:
+[ 43.686316] Possible unsafe locking scenario:
+
+[ 43.686743] CPU0
+[ 43.686967] ----
+[ 43.687197] lock(_xmit_ETHER);
+[ 43.687544] lock(_xmit_ETHER);
+[ 43.687886] *** DEADLOCK ***
+
+[ 43.688438] May be due to missing lock nesting notation
+
+[ 43.688882] 2 locks held by brctl/1187:
+[ 43.689134] #0: (rtnl_mutex){+.+.+.}, at: [<ffffffff81510317>] rtnl_lock+0x17/0x20
+[ 43.689852] #1: (_xmit_ETHER){+.....}, at: [<ffffffff815072a7>] dev_uc_add+0x27/0x80
+[ 43.690575] stack backtrace:
+[ 43.690970] CPU: 0 PID: 1187 Comm: brctl Not tainted 4.4.0-rc8+ #54
+[ 43.691270] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
+[ 43.691770] ffffffff826a25c0 ffff8800369fb8e0 ffffffff81360ceb ffffffff826a25c0
+[ 43.692425] ffff8800369fb9b8 ffffffff810d0466 ffff8800369fb968 ffffffff81537139
+[ 43.693071] ffff88003a08c880 0000000000000000 00000000ffffffff 0000000002080020
+[ 43.693709] Call Trace:
+[ 43.693931] [<ffffffff81360ceb>] dump_stack+0x4b/0x70
+[ 43.694199] [<ffffffff810d0466>] __lock_acquire+0x1e46/0x1e90
+[ 43.694483] [<ffffffff81537139>] ? netlink_broadcast_filtered+0x139/0x3e0
+[ 43.694789] [<ffffffff8153b5da>] ? nlmsg_notify+0x5a/0xc0
+[ 43.695064] [<ffffffff810d10f5>] lock_acquire+0xe5/0x1f0
+[ 43.695340] [<ffffffff8150169e>] ? dev_set_rx_mode+0x1e/0x40
+[ 43.695623] [<ffffffff815edea5>] _raw_spin_lock_bh+0x45/0x80
+[ 43.695901] [<ffffffff8150169e>] ? dev_set_rx_mode+0x1e/0x40
+[ 43.696180] [<ffffffff8150169e>] dev_set_rx_mode+0x1e/0x40
+[ 43.696460] [<ffffffff8150189c>] dev_set_promiscuity+0x3c/0x50
+[ 43.696750] [<ffffffffa0586845>] br_port_set_promisc+0x25/0x50 [bridge]
+[ 43.697052] [<ffffffffa05869aa>] br_manage_promisc+0x8a/0xe0 [bridge]
+[ 43.697348] [<ffffffffa05826ee>] br_dev_change_rx_flags+0x1e/0x20 [bridge]
+[ 43.697655] [<ffffffff81501532>] __dev_set_promiscuity+0x132/0x1f0
+[ 43.697943] [<ffffffff81501672>] __dev_set_rx_mode+0x82/0x90
+[ 43.698223] [<ffffffff815072de>] dev_uc_add+0x5e/0x80
+[ 43.698498] [<ffffffffa05b3c62>] vlan_device_event+0x542/0x650 [8021q]
+[ 43.698798] [<ffffffff8109886d>] notifier_call_chain+0x5d/0x80
+[ 43.699083] [<ffffffff810988b6>] raw_notifier_call_chain+0x16/0x20
+[ 43.699374] [<ffffffff814f456e>] call_netdevice_notifiers_info+0x6e/0x80
+[ 43.699678] [<ffffffff814f4596>] call_netdevice_notifiers+0x16/0x20
+[ 43.699973] [<ffffffffa05872be>] br_add_if+0x47e/0x4c0 [bridge]
+[ 43.700259] [<ffffffffa058801e>] add_del_if+0x6e/0x80 [bridge]
+[ 43.700548] [<ffffffffa0588b5f>] br_dev_ioctl+0xaf/0xc0 [bridge]
+[ 43.700836] [<ffffffff8151a7ac>] dev_ifsioc+0x30c/0x3c0
+[ 43.701106] [<ffffffff8151aac9>] dev_ioctl+0xf9/0x6f0
+[ 43.701379] [<ffffffff81254345>] ? mntput_no_expire+0x5/0x450
+[ 43.701665] [<ffffffff812543ee>] ? mntput_no_expire+0xae/0x450
+[ 43.701947] [<ffffffff814d7b02>] sock_do_ioctl+0x42/0x50
+[ 43.702219] [<ffffffff814d8175>] sock_ioctl+0x1e5/0x290
+[ 43.702500] [<ffffffff81242d0b>] do_vfs_ioctl+0x2cb/0x5c0
+[ 43.702771] [<ffffffff81243079>] SyS_ioctl+0x79/0x90
+[ 43.703033] [<ffffffff815eebb6>] entry_SYSCALL_64_fastpath+0x16/0x7a
+
+CC: Vlad Yasevich <vyasevic@redhat.com>
+CC: Stephen Hemminger <stephen@networkplumber.org>
+CC: Bridge list <bridge@lists.linux-foundation.org>
+CC: Andy Gospodarek <gospo@cumulusnetworks.com>
+CC: Roopa Prabhu <roopa@cumulusnetworks.com>
+Fixes: 2796d0c648c9 ("bridge: Automatically manage port promiscuous mode.")
+Reported-by: Andy Gospodarek <gospo@cumulusnetworks.com>
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_device.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/bridge/br_device.c
++++ b/net/bridge/br_device.c
+@@ -28,6 +28,8 @@
+ const struct nf_br_ops __rcu *nf_br_ops __read_mostly;
+ EXPORT_SYMBOL_GPL(nf_br_ops);
+
++static struct lock_class_key bridge_netdev_addr_lock_key;
++
+ /* net device transmit always called with BH disabled */
+ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+@@ -87,6 +89,11 @@ out:
+ return NETDEV_TX_OK;
+ }
+
++static void br_set_lockdep_class(struct net_device *dev)
++{
++ lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
++}
++
+ static int br_dev_init(struct net_device *dev)
+ {
+ struct net_bridge *br = netdev_priv(dev);
+@@ -99,6 +106,7 @@ static int br_dev_init(struct net_device
+ err = br_vlan_init(br);
+ if (err)
+ free_percpu(br->stats);
++ br_set_lockdep_class(dev);
+
+ return err;
+ }
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Tue, 5 Jan 2016 10:46:00 +0100
+Subject: bridge: Only call /sbin/bridge-stp for the initial network namespace
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit ff62198553e43cdffa9d539f6165d3e83f8a42bc ]
+
+[I stole this patch from Eric Biederman. He wrote:]
+
+> There is no defined mechanism to pass network namespace information
+> into /sbin/bridge-stp therefore don't even try to invoke it except
+> for bridge devices in the initial network namespace.
+>
+> It is possible for unprivileged users to cause /sbin/bridge-stp to be
+> invoked for any network device name which if /sbin/bridge-stp does not
+> guard against unreasonable arguments or being invoked twice on the
+> same network device could cause problems.
+
+[Hannes: changed patch using netns_eq]
+
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_stp_if.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/bridge/br_stp_if.c
++++ b/net/bridge/br_stp_if.c
+@@ -128,7 +128,10 @@ static void br_stp_start(struct net_brid
+ char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
+ char *envp[] = { NULL };
+
+- r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
++ if (net_eq(dev_net(br->dev), &init_net))
++ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
++ else
++ r = -ENOENT;
+
+ spin_lock_bh(&br->lock);
+
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Florian Westphal <fw@strlen.de>
+Date: Thu, 31 Dec 2015 14:26:33 +0100
+Subject: connector: bump skb->users before callback invocation
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 55285bf09427c5abf43ee1d54e892f352092b1f1 ]
+
+Dmitry reports memleak with syskaller program.
+Problem is that connector bumps skb usecount but might not invoke callback.
+
+So move skb_get to where we invoke the callback.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/connector/connector.c | 11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+--- a/drivers/connector/connector.c
++++ b/drivers/connector/connector.c
+@@ -178,26 +178,21 @@ static int cn_call_callback(struct sk_bu
+ *
+ * It checks skb, netlink header and msg sizes, and calls callback helper.
+ */
+-static void cn_rx_skb(struct sk_buff *__skb)
++static void cn_rx_skb(struct sk_buff *skb)
+ {
+ struct nlmsghdr *nlh;
+- struct sk_buff *skb;
+ int len, err;
+
+- skb = skb_get(__skb);
+-
+ if (skb->len >= NLMSG_HDRLEN) {
+ nlh = nlmsg_hdr(skb);
+ len = nlmsg_len(nlh);
+
+ if (len < (int)sizeof(struct cn_msg) ||
+ skb->len < nlh->nlmsg_len ||
+- len > CONNECTOR_MAX_MSG_SIZE) {
+- kfree_skb(skb);
++ len > CONNECTOR_MAX_MSG_SIZE)
+ return;
+- }
+
+- err = cn_call_callback(skb);
++ err = cn_call_callback(skb_get(skb));
+ if (err < 0)
+ kfree_skb(skb);
+ }
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Date: Mon, 21 Dec 2015 12:54:45 +0300
+Subject: ipv6/addrlabel: fix ip6addrlbl_get()
+
+From: Andrey Ryabinin <aryabinin@virtuozzo.com>
+
+[ Upstream commit e459dfeeb64008b2d23bdf600f03b3605dbb8152 ]
+
+ip6addrlbl_get() has never worked. If ip6addrlbl_hold() succeeded,
+ip6addrlbl_get() will exit with '-ESRCH'. If ip6addrlbl_hold() failed,
+ip6addrlbl_get() will use about to be free ip6addrlbl_entry pointer.
+
+Fix this by inverting ip6addrlbl_hold() check.
+
+Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.")
+Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Reviewed-by: Cong Wang <cwang@twopensource.com>
+Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrlabel.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/addrlabel.c
++++ b/net/ipv6/addrlabel.c
+@@ -552,7 +552,7 @@ static int ip6addrlbl_get(struct sk_buff
+
+ rcu_read_lock();
+ p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
+- if (p && ip6addrlbl_hold(p))
++ if (p && !ip6addrlbl_hold(p))
+ p = NULL;
+ lseq = ip6addrlbl_table.seq;
+ rcu_read_unlock();
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 8 Jan 2016 09:35:51 -0800
+Subject: ipv6: tcp: add rcu locking in tcp_v6_send_synack()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3e4006f0b86a5ae5eb0e8215f9a9e1db24506977 ]
+
+When first SYNACK is sent, we already hold rcu_read_lock(), but this
+is not true if a SYNACK is retransmitted, as a timer (soft) interrupt
+does not hold rcu_read_lock()
+
+Fixes: 45f6fad84cc30 ("ipv6: add complete rcu protection around np->opt")
+Reported-by: Dave Jones <davej@codemonkey.org.uk>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/tcp_ipv6.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -465,8 +465,10 @@ static int tcp_v6_send_synack(struct soc
+ fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
+
+ skb_set_queue_mapping(skb, queue_mapping);
++ rcu_read_lock();
+ err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+ np->tclass);
++ rcu_read_unlock();
+ err = net_xmit_eval(err);
+ }
+
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 15 Jan 2016 04:56:56 -0800
+Subject: ipv6: update skb->csum when CE mark is propagated
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 34ae6a1aa0540f0f781dd265366036355fdc8930 ]
+
+When a tunnel decapsulates the outer header, it has to comply
+with RFC 6080 and eventually propagate CE mark into inner header.
+
+It turns out IP6_ECN_set_ce() does not correctly update skb->csum
+for CHECKSUM_COMPLETE packets, triggering infamous "hw csum failure"
+messages and stack traces.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_ecn.h | 19 ++++++++++++++++---
+ net/ipv6/xfrm6_mode_tunnel.c | 2 +-
+ 2 files changed, 17 insertions(+), 4 deletions(-)
+
+--- a/include/net/inet_ecn.h
++++ b/include/net/inet_ecn.h
+@@ -111,11 +111,24 @@ static inline void ipv4_copy_dscp(unsign
+
+ struct ipv6hdr;
+
+-static inline int IP6_ECN_set_ce(struct ipv6hdr *iph)
++/* Note:
++ * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE,
++ * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE
++ * In IPv6 case, no checksum compensates the change in IPv6 header,
++ * so we have to update skb->csum.
++ */
++static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
+ {
++ __be32 from, to;
++
+ if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
+ return 0;
+- *(__be32*)iph |= htonl(INET_ECN_CE << 20);
++
++ from = *(__be32 *)iph;
++ to = from | htonl(INET_ECN_CE << 20);
++ *(__be32 *)iph = to;
++ if (skb->ip_summed == CHECKSUM_COMPLETE)
++ skb->csum = csum_add(csum_sub(skb->csum, from), to);
+ return 1;
+ }
+
+@@ -142,7 +155,7 @@ static inline int INET_ECN_set_ce(struct
+ case cpu_to_be16(ETH_P_IPV6):
+ if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
+ skb_tail_pointer(skb))
+- return IP6_ECN_set_ce(ipv6_hdr(skb));
++ return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
+ break;
+ }
+
+--- a/net/ipv6/xfrm6_mode_tunnel.c
++++ b/net/ipv6/xfrm6_mode_tunnel.c
+@@ -24,7 +24,7 @@ static inline void ipip6_ecn_decapsulate
+ struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
+
+ if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
+- IP6_ECN_set_ce(inner_iph);
++ IP6_ECN_set_ce(skb, inner_iph);
+ }
+
+ /* Add encapsulation header.
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Sun, 1 Nov 2015 16:21:24 +0000
+Subject: isdn_ppp: Add checks for allocation failure in isdn_ppp_open()
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+[ Upstream commit 0baa57d8dc32db78369d8b5176ef56c5e2e18ab3 ]
+
+Compile-tested only.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/isdn/i4l/isdn_ppp.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/isdn/i4l/isdn_ppp.c
++++ b/drivers/isdn/i4l/isdn_ppp.c
+@@ -301,6 +301,8 @@ isdn_ppp_open(int min, struct file *file
+ is->compflags = 0;
+
+ is->reset = isdn_ppp_ccp_reset_alloc(is);
++ if (!is->reset)
++ return -ENOMEM;
+
+ is->lp = NULL;
+ is->mp_seqno = 0; /* MP sequence number */
+@@ -320,6 +322,10 @@ isdn_ppp_open(int min, struct file *file
+ * VJ header compression init
+ */
+ is->slcomp = slhc_init(16, 16); /* not necessary for 2. link in bundle */
++ if (!is->slcomp) {
++ isdn_ppp_ccp_reset_free(is);
++ return -ENOMEM;
++ }
+ #endif
+ #ifdef CONFIG_IPPP_FILTER
+ is->pass_filter = NULL;
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Rabin Vincent <rabin@rab.in>
+Date: Tue, 12 Jan 2016 20:17:08 +0100
+Subject: net: bpf: reject invalid shifts
+
+From: Rabin Vincent <rabin@rab.in>
+
+[ Upstream commit 229394e8e62a4191d592842cf67e80c62a492937 ]
+
+On ARM64, a BUG() is triggered in the eBPF JIT if a filter with a
+constant shift that can't be encoded in the immediate field of the
+UBFM/SBFM instructions is passed to the JIT. Since these shifts
+amounts, which are negative or >= regsize, are invalid, reject them in
+the eBPF verifier and the classic BPF filter checker, for all
+architectures.
+
+Signed-off-by: Rabin Vincent <rabin@rab.in>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c | 10 ++++++++++
+ net/core/filter.c | 5 +++++
+ 2 files changed, 15 insertions(+)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1019,6 +1019,16 @@ static int check_alu_op(struct reg_state
+ return -EINVAL;
+ }
+
++ if ((opcode == BPF_LSH || opcode == BPF_RSH ||
++ opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
++ int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
++
++ if (insn->imm < 0 || insn->imm >= size) {
++ verbose("invalid shift %d\n", insn->imm);
++ return -EINVAL;
++ }
++ }
++
+ /* pattern match 'bpf_add Rx, imm' instruction */
+ if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
+ regs[insn->dst_reg].type == FRAME_PTR &&
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -775,6 +775,11 @@ int bpf_check_classic(const struct sock_
+ if (ftest->k == 0)
+ return -EINVAL;
+ break;
++ case BPF_ALU | BPF_LSH | BPF_K:
++ case BPF_ALU | BPF_RSH | BPF_K:
++ if (ftest->k >= 32)
++ return -EINVAL;
++ break;
+ case BPF_LD | BPF_MEM:
+ case BPF_LDX | BPF_MEM:
+ case BPF_ST:
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
+Date: Wed, 23 Dec 2015 13:42:43 +0100
+Subject: net: cdc_ncm: avoid changing RX/TX buffers on MTU changes
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
+
+[ Upstream commit 1dfddff5fcd869fcab0c52fafae099dfa435a935 ]
+
+NCM buffer sizes are negotiated with the device independently of
+the network device MTU. The RX buffers are allocated by the
+usbnet framework based on the rx_urb_size value set by cdc_ncm. A
+single RX buffer can hold a number of MTU sized packets.
+
+The default usbnet change_mtu ndo only modifies rx_urb_size if it
+is equal to hard_mtu. And the cdc_ncm driver will set rx_urb_size
+and hard_mtu independently of each other, based on dwNtbInMaxSize
+and dwNtbOutMaxSize respectively. It was therefore assumed that
+usbnet_change_mtu() would never touch rx_urb_size. This failed to
+consider the case where dwNtbInMaxSize and dwNtbOutMaxSize happens
+to be equal.
+
+Fix by implementing an NCM specific change_mtu ndo, modifying the
+netdev MTU without touching the buffer size settings.
+
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/cdc_mbim.c | 2 +-
+ drivers/net/usb/cdc_ncm.c | 31 +++++++++++++++++++++++++++++++
+ include/linux/usb/cdc_ncm.h | 1 +
+ 3 files changed, 33 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/usb/cdc_mbim.c
++++ b/drivers/net/usb/cdc_mbim.c
+@@ -100,7 +100,7 @@ static const struct net_device_ops cdc_m
+ .ndo_stop = usbnet_stop,
+ .ndo_start_xmit = usbnet_start_xmit,
+ .ndo_tx_timeout = usbnet_tx_timeout,
+- .ndo_change_mtu = usbnet_change_mtu,
++ .ndo_change_mtu = cdc_ncm_change_mtu,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_vlan_rx_add_vid = cdc_mbim_rx_add_vid,
+--- a/drivers/net/usb/cdc_ncm.c
++++ b/drivers/net/usb/cdc_ncm.c
+@@ -41,6 +41,7 @@
+ #include <linux/module.h>
+ #include <linux/netdevice.h>
+ #include <linux/ctype.h>
++#include <linux/etherdevice.h>
+ #include <linux/ethtool.h>
+ #include <linux/workqueue.h>
+ #include <linux/mii.h>
+@@ -687,6 +688,33 @@ static void cdc_ncm_free(struct cdc_ncm_
+ kfree(ctx);
+ }
+
++/* we need to override the usbnet change_mtu ndo for two reasons:
++ * - respect the negotiated maximum datagram size
++ * - avoid unwanted changes to rx and tx buffers
++ */
++int cdc_ncm_change_mtu(struct net_device *net, int new_mtu)
++{
++ struct usbnet *dev = netdev_priv(net);
++ struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
++ int maxmtu = ctx->max_datagram_size - cdc_ncm_eth_hlen(dev);
++
++ if (new_mtu <= 0 || new_mtu > maxmtu)
++ return -EINVAL;
++ net->mtu = new_mtu;
++ return 0;
++}
++EXPORT_SYMBOL_GPL(cdc_ncm_change_mtu);
++
++static const struct net_device_ops cdc_ncm_netdev_ops = {
++ .ndo_open = usbnet_open,
++ .ndo_stop = usbnet_stop,
++ .ndo_start_xmit = usbnet_start_xmit,
++ .ndo_tx_timeout = usbnet_tx_timeout,
++ .ndo_change_mtu = cdc_ncm_change_mtu,
++ .ndo_set_mac_address = eth_mac_addr,
++ .ndo_validate_addr = eth_validate_addr,
++};
++
+ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting)
+ {
+ const struct usb_cdc_union_desc *union_desc = NULL;
+@@ -861,6 +889,9 @@ advance:
+ /* add our sysfs attrs */
+ dev->net->sysfs_groups[0] = &cdc_ncm_sysfs_attr_group;
+
++ /* must handle MTU changes */
++ dev->net->netdev_ops = &cdc_ncm_netdev_ops;
++
+ return 0;
+
+ error2:
+--- a/include/linux/usb/cdc_ncm.h
++++ b/include/linux/usb/cdc_ncm.h
+@@ -133,6 +133,7 @@ struct cdc_ncm_ctx {
+ };
+
+ u8 cdc_ncm_select_altsetting(struct usb_interface *intf);
++int cdc_ncm_change_mtu(struct net_device *net, int new_mtu);
+ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
+ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
+ struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Rabin Vincent <rabin@rab.in>
+Date: Tue, 5 Jan 2016 16:23:07 +0100
+Subject: net: filter: make JITs zero A for SKF_AD_ALU_XOR_X
+
+From: Rabin Vincent <rabin@rab.in>
+
+[ Upstream commit 55795ef5469290f89f04e12e662ded604909e462 ]
+
+The SKF_AD_ALU_XOR_X ancillary is not like the other ancillary data
+instructions since it XORs A with X while all the others replace A with
+some loaded value. All the BPF JITs fail to clear A if this is used as
+the first instruction in a filter. This was found using american fuzzy
+lop.
+
+Add a helper to determine if A needs to be cleared given the first
+instruction in a filter, and use this in the JITs. Except for ARM, the
+rest have only been compile-tested.
+
+Fixes: 3480593131e0 ("net: filter: get rid of BPF_S_* enum")
+Signed-off-by: Rabin Vincent <rabin@rab.in>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/net/bpf_jit_32.c | 16 +---------------
+ arch/mips/net/bpf_jit.c | 16 +---------------
+ arch/powerpc/net/bpf_jit_comp.c | 13 ++-----------
+ arch/sparc/net/bpf_jit_comp.c | 17 ++---------------
+ include/linux/filter.h | 19 +++++++++++++++++++
+ 5 files changed, 25 insertions(+), 56 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -162,19 +162,6 @@ static inline int mem_words_used(struct
+ return fls(ctx->seen & SEEN_MEM);
+ }
+
+-static inline bool is_load_to_a(u16 inst)
+-{
+- switch (inst) {
+- case BPF_LD | BPF_W | BPF_LEN:
+- case BPF_LD | BPF_W | BPF_ABS:
+- case BPF_LD | BPF_H | BPF_ABS:
+- case BPF_LD | BPF_B | BPF_ABS:
+- return true;
+- default:
+- return false;
+- }
+-}
+-
+ static void jit_fill_hole(void *area, unsigned int size)
+ {
+ u32 *ptr;
+@@ -186,7 +173,6 @@ static void jit_fill_hole(void *area, un
+ static void build_prologue(struct jit_ctx *ctx)
+ {
+ u16 reg_set = saved_regs(ctx);
+- u16 first_inst = ctx->skf->insns[0].code;
+ u16 off;
+
+ #ifdef CONFIG_FRAME_POINTER
+@@ -216,7 +202,7 @@ static void build_prologue(struct jit_ct
+ emit(ARM_MOV_I(r_X, 0), ctx);
+
+ /* do not leak kernel data to userspace */
+- if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
++ if (bpf_needs_clear_a(&ctx->skf->insns[0]))
+ emit(ARM_MOV_I(r_A, 0), ctx);
+
+ /* stack space for the BPF_MEM words */
+--- a/arch/mips/net/bpf_jit.c
++++ b/arch/mips/net/bpf_jit.c
+@@ -556,19 +556,6 @@ static inline u16 align_sp(unsigned int
+ return num;
+ }
+
+-static bool is_load_to_a(u16 inst)
+-{
+- switch (inst) {
+- case BPF_LD | BPF_W | BPF_LEN:
+- case BPF_LD | BPF_W | BPF_ABS:
+- case BPF_LD | BPF_H | BPF_ABS:
+- case BPF_LD | BPF_B | BPF_ABS:
+- return true;
+- default:
+- return false;
+- }
+-}
+-
+ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
+ {
+ int i = 0, real_off = 0;
+@@ -686,7 +673,6 @@ static unsigned int get_stack_depth(stru
+
+ static void build_prologue(struct jit_ctx *ctx)
+ {
+- u16 first_inst = ctx->skf->insns[0].code;
+ int sp_off;
+
+ /* Calculate the total offset for the stack pointer */
+@@ -700,7 +686,7 @@ static void build_prologue(struct jit_ct
+ emit_jit_reg_move(r_X, r_zero, ctx);
+
+ /* Do not leak kernel data to userspace */
+- if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
++ if (bpf_needs_clear_a(&ctx->skf->insns[0]))
+ emit_jit_reg_move(r_A, r_zero, ctx);
+ }
+
+--- a/arch/powerpc/net/bpf_jit_comp.c
++++ b/arch/powerpc/net/bpf_jit_comp.c
+@@ -78,18 +78,9 @@ static void bpf_jit_build_prologue(struc
+ PPC_LI(r_X, 0);
+ }
+
+- switch (filter[0].code) {
+- case BPF_RET | BPF_K:
+- case BPF_LD | BPF_W | BPF_LEN:
+- case BPF_LD | BPF_W | BPF_ABS:
+- case BPF_LD | BPF_H | BPF_ABS:
+- case BPF_LD | BPF_B | BPF_ABS:
+- /* first instruction sets A register (or is RET 'constant') */
+- break;
+- default:
+- /* make sure we dont leak kernel information to user */
++ /* make sure we dont leak kernel information to user */
++ if (bpf_needs_clear_a(&filter[0]))
+ PPC_LI(r_A, 0);
+- }
+ }
+
+ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+--- a/arch/sparc/net/bpf_jit_comp.c
++++ b/arch/sparc/net/bpf_jit_comp.c
+@@ -420,22 +420,9 @@ void bpf_jit_compile(struct bpf_prog *fp
+ }
+ emit_reg_move(O7, r_saved_O7);
+
+- switch (filter[0].code) {
+- case BPF_RET | BPF_K:
+- case BPF_LD | BPF_W | BPF_LEN:
+- case BPF_LD | BPF_W | BPF_ABS:
+- case BPF_LD | BPF_H | BPF_ABS:
+- case BPF_LD | BPF_B | BPF_ABS:
+- /* The first instruction sets the A register (or is
+- * a "RET 'constant'")
+- */
+- break;
+- default:
+- /* Make sure we dont leak kernel information to the
+- * user.
+- */
++ /* Make sure we dont leak kernel information to the user. */
++ if (bpf_needs_clear_a(&filter[0]))
+ emit_clear(r_A); /* A = 0 */
+- }
+
+ for (i = 0; i < flen; i++) {
+ unsigned int K = filter[i].k;
+--- a/include/linux/filter.h
++++ b/include/linux/filter.h
+@@ -428,6 +428,25 @@ static inline void bpf_jit_free(struct b
+
+ #define BPF_ANC BIT(15)
+
++static inline bool bpf_needs_clear_a(const struct sock_filter *first)
++{
++ switch (first->code) {
++ case BPF_RET | BPF_K:
++ case BPF_LD | BPF_W | BPF_LEN:
++ return false;
++
++ case BPF_LD | BPF_W | BPF_ABS:
++ case BPF_LD | BPF_H | BPF_ABS:
++ case BPF_LD | BPF_B | BPF_ABS:
++ if (first->k == SKF_AD_OFF + SKF_AD_ALU_XOR_X)
++ return true;
++ return false;
++
++ default:
++ return true;
++ }
++}
++
+ static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
+ {
+ BUG_ON(ftest->code & BPF_ANC);
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Francesco Ruggeri <fruggeri@aristanetworks.com>
+Date: Wed, 6 Jan 2016 00:18:48 -0800
+Subject: net: possible use after free in dst_release
+
+From: Francesco Ruggeri <fruggeri@aristanetworks.com>
+
+[ Upstream commit 07a5d38453599052aff0877b16bb9c1585f08609 ]
+
+dst_release should not access dst->flags after decrementing
+__refcnt to 0. The dst_entry may be in dst_busy_list and
+dst_gc_task may dst_destroy it before dst_release gets a chance
+to access dst->flags.
+
+Fixes: d69bbf88c8d0 ("net: fix a race in dst_release()")
+Fixes: 27b75c95f10d ("net: avoid RCU for NOCACHE dst")
+Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dst.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/dst.c
++++ b/net/core/dst.c
+@@ -282,10 +282,11 @@ void dst_release(struct dst_entry *dst)
+ {
+ if (dst) {
+ int newrefcnt;
++ unsigned short nocache = dst->flags & DST_NOCACHE;
+
+ newrefcnt = atomic_dec_return(&dst->__refcnt);
+ WARN_ON(newrefcnt < 0);
+- if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
++ if (!newrefcnt && unlikely(nocache))
+ call_rcu(&dst->rcu_head, dst_destroy_rcu);
+ }
+ }
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Konstantin Khlebnikov <koct9i@gmail.com>
+Date: Fri, 8 Jan 2016 15:21:46 +0300
+Subject: net: preserve IP control block during GSO segmentation
+
+From: Konstantin Khlebnikov <koct9i@gmail.com>
+
+[ Upstream commit 9207f9d45b0ad071baa128e846d7e7ed85016df3 ]
+
+Skb_gso_segment() uses skb control block during segmentation.
+This patch adds 32-bytes room for previous control block which
+will be copied into all resulting segments.
+
+This patch fixes kernel crash during fragmenting forwarded packets.
+Fragmentation requires valid IP CB in skb for clearing ip options.
+Also patch removes custom save/restore in ovs code, now it's redundant.
+
+Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
+Link: http://lkml.kernel.org/r/CALYGNiP-0MZ-FExV2HutTvE9U-QQtkKSoE--KN=JQE5STYsjAA@mail.gmail.com
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h | 3 ++-
+ net/core/dev.c | 5 +++++
+ net/ipv4/ip_output.c | 1 +
+ net/openvswitch/datapath.c | 5 +----
+ net/xfrm/xfrm_output.c | 2 ++
+ 5 files changed, 11 insertions(+), 5 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -3320,7 +3320,8 @@ struct skb_gso_cb {
+ int encap_level;
+ __u16 csum_start;
+ };
+-#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
++#define SKB_SGO_CB_OFFSET 32
++#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_SGO_CB_OFFSET))
+
+ static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
+ {
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2479,6 +2479,8 @@ static inline bool skb_needs_check(struc
+ *
+ * It may return NULL if the skb requires no segmentation. This is
+ * only possible when GSO is used for verifying header integrity.
++ *
++ * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb.
+ */
+ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+ netdev_features_t features, bool tx_path)
+@@ -2493,6 +2495,9 @@ struct sk_buff *__skb_gso_segment(struct
+ return ERR_PTR(err);
+ }
+
++ BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
++ sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
++
+ SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+ SKB_GSO_CB(skb)->encap_level = 0;
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -235,6 +235,7 @@ static int ip_finish_output_gso(struct s
+ * from host network stack.
+ */
+ features = netif_skb_features(skb);
++ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR_OR_NULL(segs)) {
+ kfree_skb(skb);
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -337,12 +337,10 @@ static int queue_gso_packets(struct data
+ unsigned short gso_type = skb_shinfo(skb)->gso_type;
+ struct sw_flow_key later_key;
+ struct sk_buff *segs, *nskb;
+- struct ovs_skb_cb ovs_cb;
+ int err;
+
+- ovs_cb = *OVS_CB(skb);
++ BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET);
+ segs = __skb_gso_segment(skb, NETIF_F_SG, false);
+- *OVS_CB(skb) = ovs_cb;
+ if (IS_ERR(segs))
+ return PTR_ERR(segs);
+ if (segs == NULL)
+@@ -360,7 +358,6 @@ static int queue_gso_packets(struct data
+ /* Queue all of the segments. */
+ skb = segs;
+ do {
+- *OVS_CB(skb) = ovs_cb;
+ if (gso_type & SKB_GSO_UDP && skb != segs)
+ key = &later_key;
+
+--- a/net/xfrm/xfrm_output.c
++++ b/net/xfrm/xfrm_output.c
+@@ -153,6 +153,8 @@ static int xfrm_output_gso(struct sock *
+ {
+ struct sk_buff *segs;
+
++ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
++ BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET);
+ segs = skb_gso_segment(skb, 0);
+ kfree_skb(skb);
+ if (IS_ERR(segs))
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: John Fastabend <john.fastabend@gmail.com>
+Date: Tue, 5 Jan 2016 09:11:36 -0800
+Subject: net: sched: fix missing free per cpu on qstats
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit 73c20a8b7245273125cfe92c4b46e6fdb568a801 ]
+
+When a qdisc is using per cpu stats (currently just the ingress
+qdisc) only the bstats are being freed. This also free's the qstats.
+
+Fixes: b0ab6f92752b9f9d8 ("net: sched: enable per cpu qstats")
+Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_generic.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -666,8 +666,10 @@ static void qdisc_rcu_free(struct rcu_he
+ {
+ struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
+
+- if (qdisc_is_percpu_stats(qdisc))
++ if (qdisc_is_percpu_stats(qdisc)) {
+ free_percpu(qdisc->cpu_bstats);
++ free_percpu(qdisc->cpu_qstats);
++ }
+
+ kfree((char *) qdisc - qdisc->padded);
+ }
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Sasha Levin <sasha.levin@oracle.com>
+Date: Thu, 7 Jan 2016 14:52:43 -0500
+Subject: net: sctp: prevent writes to cookie_hmac_alg from accessing invalid memory
+
+From: Sasha Levin <sasha.levin@oracle.com>
+
+[ Upstream commit 320f1a4a175e7cd5d3f006f92b4d4d3e2cbb7bb5 ]
+
+proc_dostring() needs an initialized destination string, while the one
+provided in proc_sctp_do_hmac_alg() contains stack garbage.
+
+Thus, writing to cookie_hmac_alg would strlen() that garbage and end up
+accessing invalid memory.
+
+Fixes: 3c68198e7 ("sctp: Make hmac algorithm selection for cookie generation dynamic")
+Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sysctl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sctp/sysctl.c
++++ b/net/sctp/sysctl.c
+@@ -320,7 +320,7 @@ static int proc_sctp_do_hmac_alg(struct
+ struct ctl_table tbl;
+ bool changed = false;
+ char *none = "none";
+- char tmp[8];
++ char tmp[8] = {0};
+ int ret;
+
+ memset(&tbl, 0, sizeof(struct ctl_table));
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 12 Jan 2016 08:58:00 -0800
+Subject: phonet: properly unshare skbs in phonet_rcv()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7aaed57c5c2890634cfadf725173c7c68ea4cb4f ]
+
+Ivaylo Dimitrov reported a regression caused by commit 7866a621043f
+("dev: add per net_device packet type chains").
+
+skb->dev becomes NULL and we crash in __netif_receive_skb_core().
+
+Before above commit, different kind of bugs or corruptions could happen
+without major crash.
+
+But the root cause is that phonet_rcv() can queue skb without checking
+if skb is shared or not.
+
+Many thanks to Ivaylo Dimitrov for his help, diagnosis and tests.
+
+Reported-by: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
+Tested-by: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Remi Denis-Courmont <courmisch@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/phonet/af_phonet.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/phonet/af_phonet.c
++++ b/net/phonet/af_phonet.c
+@@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *sk
+ struct sockaddr_pn sa;
+ u16 len;
+
++ skb = skb_share_check(skb, GFP_ATOMIC);
++ if (!skb)
++ return NET_RX_DROP;
++
+ /* check we have at least a full Phonet header */
+ if (!pskb_pull(skb, sizeof(struct phonethdr)))
+ goto out;
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Sun, 1 Nov 2015 16:22:53 +0000
+Subject: ppp, slip: Validate VJ compression slot parameters completely
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+[ Upstream commit 4ab42d78e37a294ac7bc56901d563c642e03c4ae ]
+
+Currently slhc_init() treats out-of-range values of rslots and tslots
+as equivalent to 0, except that if tslots is too large it will
+dereference a null pointer (CVE-2015-7799).
+
+Add a range-check at the top of the function and make it return an
+ERR_PTR() on error instead of NULL. Change the callers accordingly.
+
+Compile-tested only.
+
+Reported-by: 郭永刚 <guoyonggang@360.cn>
+References: http://article.gmane.org/gmane.comp.security.oss.general/17908
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/isdn/i4l/isdn_ppp.c | 10 ++++------
+ drivers/net/ppp/ppp_generic.c | 6 ++----
+ drivers/net/slip/slhc.c | 12 ++++++++----
+ drivers/net/slip/slip.c | 2 +-
+ 4 files changed, 15 insertions(+), 15 deletions(-)
+
+--- a/drivers/isdn/i4l/isdn_ppp.c
++++ b/drivers/isdn/i4l/isdn_ppp.c
+@@ -322,9 +322,9 @@ isdn_ppp_open(int min, struct file *file
+ * VJ header compression init
+ */
+ is->slcomp = slhc_init(16, 16); /* not necessary for 2. link in bundle */
+- if (!is->slcomp) {
++ if (IS_ERR(is->slcomp)) {
+ isdn_ppp_ccp_reset_free(is);
+- return -ENOMEM;
++ return PTR_ERR(is->slcomp);
+ }
+ #endif
+ #ifdef CONFIG_IPPP_FILTER
+@@ -573,10 +573,8 @@ isdn_ppp_ioctl(int min, struct file *fil
+ is->maxcid = val;
+ #ifdef CONFIG_ISDN_PPP_VJ
+ sltmp = slhc_init(16, val);
+- if (!sltmp) {
+- printk(KERN_ERR "ippp, can't realloc slhc struct\n");
+- return -ENOMEM;
+- }
++ if (IS_ERR(sltmp))
++ return PTR_ERR(sltmp);
+ if (is->slcomp)
+ slhc_free(is->slcomp);
+ is->slcomp = sltmp;
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -715,10 +715,8 @@ static long ppp_ioctl(struct file *file,
+ val &= 0xffff;
+ }
+ vj = slhc_init(val2+1, val+1);
+- if (!vj) {
+- netdev_err(ppp->dev,
+- "PPP: no memory (VJ compressor)\n");
+- err = -ENOMEM;
++ if (IS_ERR(vj)) {
++ err = PTR_ERR(vj);
+ break;
+ }
+ ppp_lock(ppp);
+--- a/drivers/net/slip/slhc.c
++++ b/drivers/net/slip/slhc.c
+@@ -84,8 +84,9 @@ static long decode(unsigned char **cpp);
+ static unsigned char * put16(unsigned char *cp, unsigned short x);
+ static unsigned short pull16(unsigned char **cpp);
+
+-/* Initialize compression data structure
++/* Allocate compression data structure
+ * slots must be in range 0 to 255 (zero meaning no compression)
++ * Returns pointer to structure or ERR_PTR() on error.
+ */
+ struct slcompress *
+ slhc_init(int rslots, int tslots)
+@@ -94,11 +95,14 @@ slhc_init(int rslots, int tslots)
+ register struct cstate *ts;
+ struct slcompress *comp;
+
++ if (rslots < 0 || rslots > 255 || tslots < 0 || tslots > 255)
++ return ERR_PTR(-EINVAL);
++
+ comp = kzalloc(sizeof(struct slcompress), GFP_KERNEL);
+ if (! comp)
+ goto out_fail;
+
+- if ( rslots > 0 && rslots < 256 ) {
++ if (rslots > 0) {
+ size_t rsize = rslots * sizeof(struct cstate);
+ comp->rstate = kzalloc(rsize, GFP_KERNEL);
+ if (! comp->rstate)
+@@ -106,7 +110,7 @@ slhc_init(int rslots, int tslots)
+ comp->rslot_limit = rslots - 1;
+ }
+
+- if ( tslots > 0 && tslots < 256 ) {
++ if (tslots > 0) {
+ size_t tsize = tslots * sizeof(struct cstate);
+ comp->tstate = kzalloc(tsize, GFP_KERNEL);
+ if (! comp->tstate)
+@@ -141,7 +145,7 @@ out_free2:
+ out_free:
+ kfree(comp);
+ out_fail:
+- return NULL;
++ return ERR_PTR(-ENOMEM);
+ }
+
+
+--- a/drivers/net/slip/slip.c
++++ b/drivers/net/slip/slip.c
+@@ -164,7 +164,7 @@ static int sl_alloc_bufs(struct slip *sl
+ if (cbuff == NULL)
+ goto err_exit;
+ slcomp = slhc_init(16, 16);
+- if (slcomp == NULL)
++ if (IS_ERR(slcomp))
+ goto err_exit;
+ #endif
+ spin_lock_bh(&sl->lock);
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Karl Heiss <kheiss@gmail.com>
+Date: Thu, 24 Sep 2015 12:15:07 -0400
+Subject: sctp: Prevent soft lockup when sctp_accept() is called during a timeout event
+
+From: Karl Heiss <kheiss@gmail.com>
+
+[ Upstream commit 635682a14427d241bab7bbdeebb48a7d7b91638e ]
+
+A case can occur when sctp_accept() is called by the user during
+a heartbeat timeout event after the 4-way handshake. Since
+sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the
+bh_sock_lock in sctp_generate_heartbeat_event() will be taken with
+the listening socket but released with the new association socket.
+The result is a deadlock on any future attempts to take the listening
+socket lock.
+
+Note that this race can occur with other SCTP timeouts that take
+the bh_lock_sock() in the event sctp_accept() is called.
+
+ BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0]
+ ...
+ RIP: 0010:[<ffffffff8152d48e>] [<ffffffff8152d48e>] _spin_lock+0x1e/0x30
+ RSP: 0018:ffff880028323b20 EFLAGS: 00000206
+ RAX: 0000000000000002 RBX: ffff880028323b20 RCX: 0000000000000000
+ RDX: 0000000000000000 RSI: ffff880028323be0 RDI: ffff8804632c4b48
+ RBP: ffffffff8100bb93 R08: 0000000000000000 R09: 0000000000000000
+ R10: ffff880610662280 R11: 0000000000000100 R12: ffff880028323aa0
+ R13: ffff8804383c3880 R14: ffff880028323a90 R15: ffffffff81534225
+ FS: 0000000000000000(0000) GS:ffff880028320000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
+ CR2: 00000000006df528 CR3: 0000000001a85000 CR4: 00000000000006e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+ Process swapper (pid: 0, threadinfo ffff880616b70000, task ffff880616b6cab0)
+ Stack:
+ ffff880028323c40 ffffffffa01c2582 ffff880614cfb020 0000000000000000
+ <d> 0100000000000000 00000014383a6c44 ffff8804383c3880 ffff880614e93c00
+ <d> ffff880614e93c00 0000000000000000 ffff8804632c4b00 ffff8804383c38b8
+ Call Trace:
+ <IRQ>
+ [<ffffffffa01c2582>] ? sctp_rcv+0x492/0xa10 [sctp]
+ [<ffffffff8148c559>] ? nf_iterate+0x69/0xb0
+ [<ffffffff814974a0>] ? ip_local_deliver_finish+0x0/0x2d0
+ [<ffffffff8148c716>] ? nf_hook_slow+0x76/0x120
+ [<ffffffff814974a0>] ? ip_local_deliver_finish+0x0/0x2d0
+ [<ffffffff8149757d>] ? ip_local_deliver_finish+0xdd/0x2d0
+ [<ffffffff81497808>] ? ip_local_deliver+0x98/0xa0
+ [<ffffffff81496ccd>] ? ip_rcv_finish+0x12d/0x440
+ [<ffffffff81497255>] ? ip_rcv+0x275/0x350
+ [<ffffffff8145cfeb>] ? __netif_receive_skb+0x4ab/0x750
+ ...
+
+With lockdep debugging:
+
+ =====================================
+ [ BUG: bad unlock balance detected! ]
+ -------------------------------------
+ CslRx/12087 is trying to release lock (slock-AF_INET) at:
+ [<ffffffffa01bcae0>] sctp_generate_timeout_event+0x40/0xe0 [sctp]
+ but there are no more locks to release!
+
+ other info that might help us debug this:
+ 2 locks held by CslRx/12087:
+ #0: (&asoc->timers[i]){+.-...}, at: [<ffffffff8108ce1f>] run_timer_softirq+0x16f/0x3e0
+ #1: (slock-AF_INET){+.-...}, at: [<ffffffffa01bcac3>] sctp_generate_timeout_event+0x23/0xe0 [sctp]
+
+Ensure the socket taken is also the same one that is released by
+saving a copy of the socket before entering the timeout event
+critical section.
+
+Signed-off-by: Karl Heiss <kheiss@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sm_sideeffect.c | 42 +++++++++++++++++++++++-------------------
+ 1 file changed, 23 insertions(+), 19 deletions(-)
+
+--- a/net/sctp/sm_sideeffect.c
++++ b/net/sctp/sm_sideeffect.c
+@@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned
+ int error;
+ struct sctp_transport *transport = (struct sctp_transport *) peer;
+ struct sctp_association *asoc = transport->asoc;
+- struct net *net = sock_net(asoc->base.sk);
++ struct sock *sk = asoc->base.sk;
++ struct net *net = sock_net(sk);
+
+ /* Check whether a task is in the sock. */
+
+- bh_lock_sock(asoc->base.sk);
+- if (sock_owned_by_user(asoc->base.sk)) {
++ bh_lock_sock(sk);
++ if (sock_owned_by_user(sk)) {
+ pr_debug("%s: sock is busy\n", __func__);
+
+ /* Try again later. */
+@@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned
+ transport, GFP_ATOMIC);
+
+ if (error)
+- asoc->base.sk->sk_err = -error;
++ sk->sk_err = -error;
+
+ out_unlock:
+- bh_unlock_sock(asoc->base.sk);
++ bh_unlock_sock(sk);
+ sctp_transport_put(transport);
+ }
+
+@@ -285,11 +286,12 @@ out_unlock:
+ static void sctp_generate_timeout_event(struct sctp_association *asoc,
+ sctp_event_timeout_t timeout_type)
+ {
+- struct net *net = sock_net(asoc->base.sk);
++ struct sock *sk = asoc->base.sk;
++ struct net *net = sock_net(sk);
+ int error = 0;
+
+- bh_lock_sock(asoc->base.sk);
+- if (sock_owned_by_user(asoc->base.sk)) {
++ bh_lock_sock(sk);
++ if (sock_owned_by_user(sk)) {
+ pr_debug("%s: sock is busy: timer %d\n", __func__,
+ timeout_type);
+
+@@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(
+ (void *)timeout_type, GFP_ATOMIC);
+
+ if (error)
+- asoc->base.sk->sk_err = -error;
++ sk->sk_err = -error;
+
+ out_unlock:
+- bh_unlock_sock(asoc->base.sk);
++ bh_unlock_sock(sk);
+ sctp_association_put(asoc);
+ }
+
+@@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsig
+ int error = 0;
+ struct sctp_transport *transport = (struct sctp_transport *) data;
+ struct sctp_association *asoc = transport->asoc;
+- struct net *net = sock_net(asoc->base.sk);
++ struct sock *sk = asoc->base.sk;
++ struct net *net = sock_net(sk);
+
+- bh_lock_sock(asoc->base.sk);
+- if (sock_owned_by_user(asoc->base.sk)) {
++ bh_lock_sock(sk);
++ if (sock_owned_by_user(sk)) {
+ pr_debug("%s: sock is busy\n", __func__);
+
+ /* Try again later. */
+@@ -389,10 +392,10 @@ void sctp_generate_heartbeat_event(unsig
+ transport, GFP_ATOMIC);
+
+ if (error)
+- asoc->base.sk->sk_err = -error;
++ sk->sk_err = -error;
+
+ out_unlock:
+- bh_unlock_sock(asoc->base.sk);
++ bh_unlock_sock(sk);
+ sctp_transport_put(transport);
+ }
+
+@@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(u
+ {
+ struct sctp_transport *transport = (struct sctp_transport *) data;
+ struct sctp_association *asoc = transport->asoc;
+- struct net *net = sock_net(asoc->base.sk);
++ struct sock *sk = asoc->base.sk;
++ struct net *net = sock_net(sk);
+
+- bh_lock_sock(asoc->base.sk);
+- if (sock_owned_by_user(asoc->base.sk)) {
++ bh_lock_sock(sk);
++ if (sock_owned_by_user(sk)) {
+ pr_debug("%s: sock is busy\n", __func__);
+
+ /* Try again later. */
+@@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(u
+ asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
+
+ out_unlock:
+- bh_unlock_sock(asoc->base.sk);
++ bh_unlock_sock(sk);
+ sctp_association_put(asoc);
+ }
+
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 29 Dec 2015 17:49:25 +0800
+Subject: sctp: sctp should release assoc when sctp_make_abort_user return NULL in sctp_close
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 068d8bd338e855286aea54e70d1c101569284b21 ]
+
+In sctp_close, sctp_make_abort_user may return NULL because of memory
+allocation failure. If this happens, it will bypass any state change
+and never free the assoc. The assoc has no chance to be freed and it
+will be kept in memory with the state it had even after the socket is
+closed by sctp_close().
+
+So if sctp_make_abort_user fails to allocate memory, we should abort
+the asoc via sctp_primitive_ABORT as well. Just like the annotation in
+sctp_sf_cookie_wait_prm_abort and sctp_sf_do_9_1_prm_abort said,
+"Even if we can't send the ABORT due to low memory delete the TCB.
+This is a departure from our typical NOMEM handling".
+
+But then the chunk is NULL (low memory) and the SCTP_CMD_REPLY cmd would
+dereference the chunk pointer, and system crash. So we should add
+SCTP_CMD_REPLY cmd only when the chunk is not NULL, just like other
+places where it adds SCTP_CMD_REPLY cmd.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sm_statefuns.c | 6 ++++--
+ net/sctp/socket.c | 3 +--
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/net/sctp/sm_statefuns.c
++++ b/net/sctp/sm_statefuns.c
+@@ -4829,7 +4829,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_ab
+
+ retval = SCTP_DISPOSITION_CONSUME;
+
+- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
++ if (abort)
++ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+
+ /* Even if we can't send the ABORT due to low memory delete the
+ * TCB. This is a departure from our typical NOMEM handling.
+@@ -4966,7 +4967,8 @@ sctp_disposition_t sctp_sf_cookie_wait_p
+ SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
+ retval = SCTP_DISPOSITION_CONSUME;
+
+- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
++ if (abort)
++ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+
+ sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+ SCTP_STATE(SCTP_STATE_CLOSED));
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -1513,8 +1513,7 @@ static void sctp_close(struct sock *sk,
+ struct sctp_chunk *chunk;
+
+ chunk = sctp_make_abort_user(asoc, NULL, 0);
+- if (chunk)
+- sctp_primitive_ABORT(net, asoc, chunk);
++ sctp_primitive_ABORT(net, asoc, chunk);
+ } else
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
+ }
usb-ipaq.c-fix-a-timeout-loop.patch
usb-cp210x-add-id-for-elv-marble-sound-board-1.patch
xhci-refuse-loading-if-nousb-is-used.patch
+utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch
+ipv6-addrlabel-fix-ip6addrlbl_get.patch
+addrconf-always-initialize-sysctl-table-data.patch
+net-cdc_ncm-avoid-changing-rx-tx-buffers-on-mtu-changes.patch
+sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch
+connector-bump-skb-users-before-callback-invocation.patch
+unix-properly-account-for-fds-passed-over-unix-sockets.patch
+bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch
+net-filter-make-jits-zero-a-for-skf_ad_alu_xor_x.patch
+net-sched-fix-missing-free-per-cpu-on-qstats.patch
+net-possible-use-after-free-in-dst_release.patch
+vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch
+net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch
+ipv6-tcp-add-rcu-locking-in-tcp_v6_send_synack.patch
+tcp_yeah-don-t-set-ssthresh-below-2.patch
+udp-disallow-ufo-for-sockets-with-so_no_check-option.patch
+net-preserve-ip-control-block-during-gso-segmentation.patch
+bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch
+phonet-properly-unshare-skbs-in-phonet_rcv.patch
+net-bpf-reject-invalid-shifts.patch
+ipv6-update-skb-csum-when-ce-mark-is-propagated.patch
+bridge-fix-lockdep-addr_list_lock-false-positive-splat.patch
+tcp-dccp-fix-timewait-races-in-timer-handling.patch
+tcp-dccp-fix-old-style-declarations.patch
+isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch
+ppp-slip-validate-vj-compression-slot-parameters-completely.patch
+batman-adv-avoid-recursive-call_rcu-for-batadv_bla_claim.patch
+batman-adv-avoid-recursive-call_rcu-for-batadv_nc_node.patch
+batman-adv-drop-immediate-batadv_orig_ifinfo-free-function.patch
+batman-adv-drop-immediate-batadv_neigh_node-free-function.patch
+batman-adv-drop-immediate-neigh_ifinfo-free-function.patch
+batman-adv-drop-immediate-batadv_hard_iface-free-function.patch
+batman-adv-drop-immediate-orig_node-free-function.patch
+team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch
+sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch
+xen-netback-respect-user-provided-max_queues.patch
+xen-netfront-respect-user-provided-max_queues.patch
+xen-netfront-update-num_queues-to-real-created.patch
+xfrm-dst_entries_init-per-net-dst_ops.patch
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Raanan Avargil <raanan.avargil@intel.com>
+Date: Thu, 1 Oct 2015 04:48:53 -0700
+Subject: tcp/dccp: fix old style declarations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Raanan Avargil <raanan.avargil@intel.com>
+
+[ Upstream commit 8695a144da9e500a5a60fa34c06694346ec1048f ]
+
+I’m using the compilation flag -Werror=old-style-declaration, which
+requires that the “inline” word would come at the beginning of the code
+line.
+
+$ make drivers/net/ethernet/intel/e1000e/e1000e.ko
+...
+include/net/inet_timewait_sock.h:116:1: error: ‘inline’ is not at
+beginning of declaration [-Werror=old-style-declaration]
+static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int
+timeo)
+
+include/net/inet_timewait_sock.h:121:1: error: ‘inline’ is not at
+beginning of declaration [-Werror=old-style-declaration]
+static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw,
+int timeo)
+
+Fixes: ed2e92394589 ("tcp/dccp: fix timewait races in timer handling")
+Signed-off-by: Raanan Avargil <raanan.avargil@intel.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_timewait_sock.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/net/inet_timewait_sock.h
++++ b/include/net/inet_timewait_sock.h
+@@ -115,12 +115,12 @@ void __inet_twsk_hashdance(struct inet_t
+ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo,
+ bool rearm);
+
+-static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
++static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
+ {
+ __inet_twsk_schedule(tw, timeo, false);
+ }
+
+-static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo)
++static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo)
+ {
+ __inet_twsk_schedule(tw, timeo, true);
+ }
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 19 Sep 2015 09:08:34 -0700
+Subject: tcp/dccp: fix timewait races in timer handling
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ed2e923945892a8372ab70d2f61d364b0b6d9054 ]
+
+When creating a timewait socket, we need to arm the timer before
+allowing other cpus to find it. The signal allowing cpus to find
+the socket is setting tw_refcnt to non zero value.
+
+As we set tw_refcnt in __inet_twsk_hashdance(), we therefore need to
+call inet_twsk_schedule() first.
+
+This also means we need to remove tw_refcnt changes from
+inet_twsk_schedule() and let the caller handle it.
+
+Note that because we use mod_timer_pinned(), we have the guarantee
+the timer wont expire before we set tw_refcnt as we run in BH context.
+
+To make things more readable I introduced inet_twsk_reschedule() helper.
+
+When rearming the timer, we can use mod_timer_pending() to make sure
+we do not rearm a canceled timer.
+
+Note: This bug can possibly trigger if packets of a flow can hit
+multiple cpus. This does not normally happen, unless flow steering
+is broken somehow. This explains this bug was spotted ~5 months after
+its introduction.
+
+A similar fix is needed for SYN_RECV sockets in reqsk_queue_hash_req(),
+but will be provided in a separate patch for proper tracking.
+
+Fixes: 789f558cfb36 ("tcp/dccp: get rid of central timewait timer")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Ying Cai <ycai@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_timewait_sock.h | 14 +++++++++++++-
+ net/dccp/minisocks.c | 4 ++--
+ net/ipv4/inet_timewait_sock.c | 16 ++++++++++------
+ net/ipv4/tcp_minisocks.c | 13 ++++++-------
+ 4 files changed, 31 insertions(+), 16 deletions(-)
+
+--- a/include/net/inet_timewait_sock.h
++++ b/include/net/inet_timewait_sock.h
+@@ -112,7 +112,19 @@ struct inet_timewait_sock *inet_twsk_all
+ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+ struct inet_hashinfo *hashinfo);
+
+-void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo);
++void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo,
++ bool rearm);
++
++static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
++{
++ __inet_twsk_schedule(tw, timeo, false);
++}
++
++static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo)
++{
++ __inet_twsk_schedule(tw, timeo, true);
++}
++
+ void inet_twsk_deschedule(struct inet_timewait_sock *tw);
+
+ void inet_twsk_purge(struct inet_hashinfo *hashinfo,
+--- a/net/dccp/minisocks.c
++++ b/net/dccp/minisocks.c
+@@ -48,8 +48,6 @@ void dccp_time_wait(struct sock *sk, int
+ tw->tw_ipv6only = sk->sk_ipv6only;
+ }
+ #endif
+- /* Linkage updates. */
+- __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
+
+ /* Get the TIME_WAIT timeout firing. */
+ if (timeo < rto)
+@@ -60,6 +58,8 @@ void dccp_time_wait(struct sock *sk, int
+ timeo = DCCP_TIMEWAIT_LEN;
+
+ inet_twsk_schedule(tw, timeo);
++ /* Linkage updates. */
++ __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
+ inet_twsk_put(tw);
+ } else {
+ /* Sorry, if we're out of memory, just CLOSE this
+--- a/net/ipv4/inet_timewait_sock.c
++++ b/net/ipv4/inet_timewait_sock.c
+@@ -153,13 +153,15 @@ void __inet_twsk_hashdance(struct inet_t
+ /*
+ * Step 2: Hash TW into tcp ehash chain.
+ * Notes :
+- * - tw_refcnt is set to 3 because :
++ * - tw_refcnt is set to 4 because :
+ * - We have one reference from bhash chain.
+ * - We have one reference from ehash chain.
++ * - We have one reference from timer.
++ * - One reference for ourself (our caller will release it).
+ * We can use atomic_set() because prior spin_lock()/spin_unlock()
+ * committed into memory all tw fields.
+ */
+- atomic_set(&tw->tw_refcnt, 1 + 1 + 1);
++ atomic_set(&tw->tw_refcnt, 4);
+ inet_twsk_add_node_rcu(tw, &ehead->chain);
+
+ /* Step 3: Remove SK from hash chain */
+@@ -243,7 +245,7 @@ void inet_twsk_deschedule(struct inet_ti
+ }
+ EXPORT_SYMBOL(inet_twsk_deschedule);
+
+-void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo)
++void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
+ {
+ /* timeout := RTO * 3.5
+ *
+@@ -271,12 +273,14 @@ void inet_twsk_schedule(struct inet_time
+ */
+
+ tw->tw_kill = timeo <= 4*HZ;
+- if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) {
+- atomic_inc(&tw->tw_refcnt);
++ if (!rearm) {
++ BUG_ON(mod_timer_pinned(&tw->tw_timer, jiffies + timeo));
+ atomic_inc(&tw->tw_dr->tw_count);
++ } else {
++ mod_timer_pending(&tw->tw_timer, jiffies + timeo);
+ }
+ }
+-EXPORT_SYMBOL_GPL(inet_twsk_schedule);
++EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+
+ void inet_twsk_purge(struct inet_hashinfo *hashinfo,
+ struct inet_timewait_death_row *twdr, int family)
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -163,9 +163,9 @@ kill_with_rst:
+ if (tcp_death_row.sysctl_tw_recycle &&
+ tcptw->tw_ts_recent_stamp &&
+ tcp_tw_remember_stamp(tw))
+- inet_twsk_schedule(tw, tw->tw_timeout);
++ inet_twsk_reschedule(tw, tw->tw_timeout);
+ else
+- inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
++ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
+ return TCP_TW_ACK;
+ }
+
+@@ -203,7 +203,7 @@ kill:
+ return TCP_TW_SUCCESS;
+ }
+ }
+- inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
++ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
+
+ if (tmp_opt.saw_tstamp) {
+ tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
+@@ -253,7 +253,7 @@ kill:
+ * Do not reschedule in the last case.
+ */
+ if (paws_reject || th->ack)
+- inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
++ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
+
+ return tcp_timewait_check_oow_rate_limit(
+ tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
+@@ -324,9 +324,6 @@ void tcp_time_wait(struct sock *sk, int
+ } while (0);
+ #endif
+
+- /* Linkage updates. */
+- __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
+-
+ /* Get the TIME_WAIT timeout firing. */
+ if (timeo < rto)
+ timeo = rto;
+@@ -340,6 +337,8 @@ void tcp_time_wait(struct sock *sk, int
+ }
+
+ inet_twsk_schedule(tw, timeo);
++ /* Linkage updates. */
++ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
+ inet_twsk_put(tw);
+ } else {
+ /* Sorry, if we're out of memory, just CLOSE this
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Neal Cardwell <ncardwell@google.com>
+Date: Mon, 11 Jan 2016 13:42:43 -0500
+Subject: tcp_yeah: don't set ssthresh below 2
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit 83d15e70c4d8909d722c0d64747d8fb42e38a48f ]
+
+For tcp_yeah, use an ssthresh floor of 2, the same floor used by Reno
+and CUBIC, per RFC 5681 (equation 4).
+
+tcp_yeah_ssthresh() was sometimes returning a 0 or negative ssthresh
+value if the intended reduction is as big or bigger than the current
+cwnd. Congestion control modules should never return a zero or
+negative ssthresh. A zero ssthresh generally results in a zero cwnd,
+causing the connection to stall. A negative ssthresh value will be
+interpreted as a u32 and will set a target cwnd for PRR near 4
+billion.
+
+Oleksandr Natalenko reported that a system using tcp_yeah with ECN
+could see a warning about a prior_cwnd of 0 in
+tcp_cwnd_reduction(). Testing verified that this was due to
+tcp_yeah_ssthresh() misbehaving in this way.
+
+Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_yeah.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_yeah.c
++++ b/net/ipv4/tcp_yeah.c
+@@ -219,7 +219,7 @@ static u32 tcp_yeah_ssthresh(struct sock
+ yeah->fast_count = 0;
+ yeah->reno_count = max(yeah->reno_count>>1, 2U);
+
+- return tp->snd_cwnd - reduction;
++ return max_t(int, tp->snd_cwnd - reduction, 2);
+ }
+
+ static struct tcp_congestion_ops tcp_yeah __read_mostly = {
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Mon, 18 Jan 2016 17:30:22 +0200
+Subject: team: Replace rcu_read_lock with a mutex in team_vlan_rx_kill_vid
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit 60a6531bfe49555581ccd65f66a350cc5693fcde ]
+
+We can't be within an RCU read-side critical section when deleting
+VLANs, as underlying drivers might sleep during the hardware operation.
+Therefore, replace the RCU critical section with a mutex. This is
+consistent with team_vlan_rx_add_vid.
+
+Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/team/team.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/team/team.c
++++ b/drivers/net/team/team.c
+@@ -1845,10 +1845,10 @@ static int team_vlan_rx_kill_vid(struct
+ struct team *team = netdev_priv(dev);
+ struct team_port *port;
+
+- rcu_read_lock();
+- list_for_each_entry_rcu(port, &team->port_list, list)
++ mutex_lock(&team->lock);
++ list_for_each_entry(port, &team->port_list, list)
+ vlan_vid_del(port->dev, proto, vid);
+- rcu_read_unlock();
++ mutex_unlock(&team->lock);
+
+ return 0;
+ }
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= <mkubecek@suse.cz>
+Date: Mon, 11 Jan 2016 07:50:30 +0100
+Subject: udp: disallow UFO for sockets with SO_NO_CHECK option
+
+From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= <mkubecek@suse.cz>
+
+[ Upstream commit 40ba330227ad00b8c0cdf2f425736ff9549cc423 ]
+
+Commit acf8dd0a9d0b ("udp: only allow UFO for packets from SOCK_DGRAM
+sockets") disallows UFO for packets sent from raw sockets. We need to do
+the same also for SOCK_DGRAM sockets with SO_NO_CHECK options, even if
+for a bit different reason: while such socket would override the
+CHECKSUM_PARTIAL set by ip_ufo_append_data(), gso_size is still set and
+bad offloading flags warning is triggered in __skb_gso_segment().
+
+In the IPv6 case, SO_NO_CHECK option is ignored but we need to disallow
+UFO for packets sent by sockets with UDP_NO_CHECK6_TX option.
+
+Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
+Tested-by: Shannon Nelson <shannon.nelson@intel.com>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_output.c | 2 +-
+ net/ipv6/ip6_output.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -893,7 +893,7 @@ static int __ip_append_data(struct sock
+ if (((length > mtu) || (skb && skb_is_gso(skb))) &&
+ (sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+- (sk->sk_type == SOCK_DGRAM)) {
++ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
+ err = ip_ufo_append_data(sk, queue, getfrag, from, length,
+ hh_len, fragheaderlen, transhdrlen,
+ maxfraglen, flags);
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1329,7 +1329,7 @@ emsgsize:
+ (skb && skb_is_gso(skb))) &&
+ (sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->dst.dev->features & NETIF_F_UFO) &&
+- (sk->sk_type == SOCK_DGRAM)) {
++ (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
+ err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
+ hh_len, fragheaderlen,
+ transhdrlen, mtu, flags, rt);
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: willy tarreau <w@1wt.eu>
+Date: Sun, 10 Jan 2016 07:54:56 +0100
+Subject: unix: properly account for FDs passed over unix sockets
+
+From: willy tarreau <w@1wt.eu>
+
+[ Upstream commit 712f4aad406bb1ed67f3f98d04c044191f0ff593 ]
+
+It is possible for a process to allocate and accumulate far more FDs than
+the process' limit by sending them over a unix socket then closing them
+to keep the process' fd count low.
+
+This change addresses this problem by keeping track of the number of FDs
+in flight per user and preventing non-privileged processes from having
+more FDs in flight than their configured FD limit.
+
+Reported-by: socketpair@gmail.com
+Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Mitigates: CVE-2013-4312 (Linux 2.0+)
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/sched.h | 1 +
+ net/unix/af_unix.c | 24 ++++++++++++++++++++----
+ net/unix/garbage.c | 13 ++++++++-----
+ 3 files changed, 29 insertions(+), 9 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -802,6 +802,7 @@ struct user_struct {
+ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
+ #endif
+ unsigned long locked_shm; /* How many pages of mlocked shm ? */
++ unsigned long unix_inflight; /* How many files in flight in unix sockets */
+
+ #ifdef CONFIG_KEYS
+ struct key *uid_keyring; /* UID specific keyring */
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -1481,6 +1481,21 @@ static void unix_destruct_scm(struct sk_
+ sock_wfree(skb);
+ }
+
++/*
++ * The "user->unix_inflight" variable is protected by the garbage
++ * collection lock, and we just read it locklessly here. If you go
++ * over the limit, there might be a tiny race in actually noticing
++ * it across threads. Tough.
++ */
++static inline bool too_many_unix_fds(struct task_struct *p)
++{
++ struct user_struct *user = current_user();
++
++ if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
++ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
++ return false;
++}
++
+ #define MAX_RECURSION_LEVEL 4
+
+ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+@@ -1489,6 +1504,9 @@ static int unix_attach_fds(struct scm_co
+ unsigned char max_level = 0;
+ int unix_sock_count = 0;
+
++ if (too_many_unix_fds(current))
++ return -ETOOMANYREFS;
++
+ for (i = scm->fp->count - 1; i >= 0; i--) {
+ struct sock *sk = unix_get_socket(scm->fp->fp[i]);
+
+@@ -1510,10 +1528,8 @@ static int unix_attach_fds(struct scm_co
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
+- if (unix_sock_count) {
+- for (i = scm->fp->count - 1; i >= 0; i--)
+- unix_inflight(scm->fp->fp[i]);
+- }
++ for (i = scm->fp->count - 1; i >= 0; i--)
++ unix_inflight(scm->fp->fp[i]);
+ return max_level;
+ }
+
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -120,11 +120,11 @@ void unix_inflight(struct file *fp)
+ {
+ struct sock *s = unix_get_socket(fp);
+
++ spin_lock(&unix_gc_lock);
++
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
+- spin_lock(&unix_gc_lock);
+-
+ if (atomic_long_inc_return(&u->inflight) == 1) {
+ BUG_ON(!list_empty(&u->link));
+ list_add_tail(&u->link, &gc_inflight_list);
+@@ -132,25 +132,28 @@ void unix_inflight(struct file *fp)
+ BUG_ON(list_empty(&u->link));
+ }
+ unix_tot_inflight++;
+- spin_unlock(&unix_gc_lock);
+ }
++ fp->f_cred->user->unix_inflight++;
++ spin_unlock(&unix_gc_lock);
+ }
+
+ void unix_notinflight(struct file *fp)
+ {
+ struct sock *s = unix_get_socket(fp);
+
++ spin_lock(&unix_gc_lock);
++
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
+- spin_lock(&unix_gc_lock);
+ BUG_ON(list_empty(&u->link));
+
+ if (atomic_long_dec_and_test(&u->inflight))
+ list_del_init(&u->link);
+ unix_tot_inflight--;
+- spin_unlock(&unix_gc_lock);
+ }
++ fp->f_cred->user->unix_inflight--;
++ spin_unlock(&unix_gc_lock);
+ }
+
+ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Vijay Pandurangan <vijayp@vijayp.ca>
+Date: Fri, 18 Dec 2015 14:34:59 -0500
+Subject: =?UTF-8?q?veth:=20don=E2=80=99t=20modify=20ip=5Fsum?= =?UTF-8?q?med;=20doing=20so=20treats=20packets=20with=20bad=20checksums?= =?UTF-8?q?=20as=20good.?=
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Vijay Pandurangan <vijayp@vijayp.ca>
+
+[ Upstream commit ce8c839b74e3017996fad4e1b7ba2e2625ede82f ]
+
+Packets that arrive from real hardware devices have ip_summed ==
+CHECKSUM_UNNECESSARY if the hardware verified the checksums, or
+CHECKSUM_NONE if the packet is bad or it was unable to verify it. The
+current version of veth will replace CHECKSUM_NONE with
+CHECKSUM_UNNECESSARY, which causes corrupt packets routed from hardware to
+a veth device to be delivered to the application. This caused applications
+at Twitter to receive corrupt data when network hardware was corrupting
+packets.
+
+We believe this was added as an optimization to skip computing and
+verifying checksums for communication between containers. However, locally
+generated packets have ip_summed == CHECKSUM_PARTIAL, so the code as
+written does nothing for them. As far as we can tell, after removing this
+code, these packets are transmitted from one stack to another unmodified
+(tcpdump shows invalid checksums on both sides, as expected), and they are
+delivered correctly to applications. We didn’t test every possible network
+configuration, but we tried a few common ones such as bridging containers,
+using NAT between the host and a container, and routing from hardware
+devices to containers. We have effectively deployed this in production at
+Twitter (by disabling RX checksum offloading on veth devices).
+
+This code dates back to the first version of the driver, commit
+<e314dbdc1c0dc6a548ecf> ("[NET]: Virtual ethernet device driver"), so I
+suspect this bug occurred mostly because the driver API has evolved
+significantly since then. Commit <0b7967503dc97864f283a> ("net/veth: Fix
+packet checksumming") (in December 2010) fixed this for packets that get
+created locally and sent to hardware devices, by not changing
+CHECKSUM_PARTIAL. However, the same issue still occurs for packets coming
+in from hardware devices.
+
+Co-authored-by: Evan Jones <ej@evanjones.ca>
+Signed-off-by: Evan Jones <ej@evanjones.ca>
+Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Cc: Phil Sutter <phil@nwl.cc>
+Cc: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Cc: netdev@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Vijay Pandurangan <vijayp@vijayp.ca>
+Acked-by: Cong Wang <cwang@twopensource.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/veth.c | 6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -117,12 +117,6 @@ static netdev_tx_t veth_xmit(struct sk_b
+ kfree_skb(skb);
+ goto drop;
+ }
+- /* don't change ip_summed == CHECKSUM_PARTIAL, as that
+- * will cause bad checksum on forwarded packets
+- */
+- if (skb->ip_summed == CHECKSUM_NONE &&
+- rcv->features & NETIF_F_RXCSUM)
+- skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
+ struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Thu, 7 Jan 2016 11:26:53 +0100
+Subject: vxlan: fix test which detect duplicate vxlan iface
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 07b9b37c227cb8d88d478b4a9c5634fee514ede1 ]
+
+When a vxlan interface is created, the driver checks that there is not
+another vxlan interface with the same properties. To do this, it checks
+the existing vxlan udp socket. Since commit 1c51a9159dde, the creation of
+the vxlan socket is done only when the interface is set up, thus it breaks
+that test.
+
+Example:
+$ ip l a vxlan10 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0
+$ ip l a vxlan11 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0
+$ ip -br l | grep vxlan
+vxlan10 DOWN f2:55:1c:6a:fb:00 <BROADCAST,MULTICAST>
+vxlan11 DOWN 7a:cb:b9:38:59:0d <BROADCAST,MULTICAST>
+
+Instead of checking sockets, let's loop over the vxlan iface list.
+
+Fixes: 1c51a9159dde ("vxlan: fix race caused by dropping rtnl_unlock")
+Reported-by: Thomas Faivre <thomas.faivre@6wind.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2581,7 +2581,7 @@ static int vxlan_newlink(struct net *src
+ struct nlattr *tb[], struct nlattr *data[])
+ {
+ struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
+- struct vxlan_dev *vxlan = netdev_priv(dev);
++ struct vxlan_dev *vxlan = netdev_priv(dev), *tmp;
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ __u32 vni;
+ int err;
+@@ -2714,9 +2714,13 @@ static int vxlan_newlink(struct net *src
+ if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
+ vxlan->flags |= VXLAN_F_REMCSUM_NOPARTIAL;
+
+- if (vxlan_find_vni(src_net, vni, use_ipv6 ? AF_INET6 : AF_INET,
+- vxlan->dst_port, vxlan->flags)) {
+- pr_info("duplicate VNI %u\n", vni);
++ list_for_each_entry(tmp, &vn->vxlan_list, next) {
++ if (tmp->default_dst.remote_vni == vni &&
++ (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 ||
++ tmp->saddr.sa.sa_family == AF_INET6) == use_ipv6 &&
++ tmp->dst_port == vxlan->dst_port &&
++ (tmp->flags & VXLAN_F_RCV_FLAGS) ==
++ (vxlan->flags & VXLAN_F_RCV_FLAGS))
+ return -EEXIST;
+ }
+
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Wei Liu <wei.liu2@citrix.com>
+Date: Thu, 10 Sep 2015 11:18:57 +0100
+Subject: xen-netback: respect user provided max_queues
+
+From: Wei Liu <wei.liu2@citrix.com>
+
+[ Upstream commit 4c82ac3c37363e8c4ded6a5fe1ec5fa756b34df3 ]
+
+Originally that parameter was always reset to num_online_cpus during
+module initialisation, which renders it useless.
+
+The fix is to only set max_queues to num_online_cpus when user has not
+provided a value.
+
+Reported-by: Johnny Strom <johnny.strom@linuxsolutions.fi>
+Signed-off-by: Wei Liu <wei.liu2@citrix.com>
+Reviewed-by: David Vrabel <david.vrabel@citrix.com>
+Acked-by: Ian Campbell <ian.campbell@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netback/netback.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -2007,8 +2007,11 @@ static int __init netback_init(void)
+ if (!xen_domain())
+ return -ENODEV;
+
+- /* Allow as many queues as there are CPUs, by default */
+- xenvif_max_queues = num_online_cpus();
++ /* Allow as many queues as there are CPUs if user has not
++ * specified a value.
++ */
++ if (xenvif_max_queues == 0)
++ xenvif_max_queues = num_online_cpus();
+
+ if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
+ pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Wei Liu <wei.liu2@citrix.com>
+Date: Thu, 10 Sep 2015 11:18:58 +0100
+Subject: xen-netfront: respect user provided max_queues
+
+From: Wei Liu <wei.liu2@citrix.com>
+
+[ Upstream commit 32a844056fd43dda647e1c3c6b9983bdfa04d17d ]
+
+Originally that parameter was always reset to num_online_cpus during
+module initialisation, which renders it useless.
+
+The fix is to only set max_queues to num_online_cpus when user has not
+provided a value.
+
+Signed-off-by: Wei Liu <wei.liu2@citrix.com>
+Cc: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: David Vrabel <david.vrabel@citrix.com>
+Tested-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netfront.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -2140,8 +2140,11 @@ static int __init netif_init(void)
+
+ pr_info("Initialising Xen virtual ethernet driver\n");
+
+- /* Allow as many queues as there are CPUs, by default */
+- xennet_max_queues = num_online_cpus();
++ /* Allow as many queues as there are CPUs if user has not
++ * specified a value.
++ */
++ if (xennet_max_queues == 0)
++ xennet_max_queues = num_online_cpus();
+
+ return xenbus_register_frontend(&netfront_driver);
+ }
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Joe Jin <joe.jin@oracle.com>
+Date: Mon, 19 Oct 2015 13:37:17 +0800
+Subject: xen-netfront: update num_queues to real created
+
+From: Joe Jin <joe.jin@oracle.com>
+
+[ Upstream commit ca88ea1247dfee094e2467a3578eaec9bdf0833a ]
+
+Sometimes xennet_create_queues() may failed to created all requested
+queues, we need to update num_queues to real created to avoid NULL
+pointer dereference.
+
+Signed-off-by: Joe Jin <joe.jin@oracle.com>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Wei Liu <wei.liu2@citrix.com>
+Cc: Ian Campbell <ian.campbell@citrix.com>
+Cc: David S. Miller <davem@davemloft.net>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netfront.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -1710,19 +1710,19 @@ static void xennet_destroy_queues(struct
+ }
+
+ static int xennet_create_queues(struct netfront_info *info,
+- unsigned int num_queues)
++ unsigned int *num_queues)
+ {
+ unsigned int i;
+ int ret;
+
+- info->queues = kcalloc(num_queues, sizeof(struct netfront_queue),
++ info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
+ GFP_KERNEL);
+ if (!info->queues)
+ return -ENOMEM;
+
+ rtnl_lock();
+
+- for (i = 0; i < num_queues; i++) {
++ for (i = 0; i < *num_queues; i++) {
+ struct netfront_queue *queue = &info->queues[i];
+
+ queue->id = i;
+@@ -1732,7 +1732,7 @@ static int xennet_create_queues(struct n
+ if (ret < 0) {
+ dev_warn(&info->netdev->dev,
+ "only created %d queues\n", i);
+- num_queues = i;
++ *num_queues = i;
+ break;
+ }
+
+@@ -1742,11 +1742,11 @@ static int xennet_create_queues(struct n
+ napi_enable(&queue->napi);
+ }
+
+- netif_set_real_num_tx_queues(info->netdev, num_queues);
++ netif_set_real_num_tx_queues(info->netdev, *num_queues);
+
+ rtnl_unlock();
+
+- if (num_queues == 0) {
++ if (*num_queues == 0) {
+ dev_err(&info->netdev->dev, "no queues\n");
+ return -EINVAL;
+ }
+@@ -1792,7 +1792,7 @@ static int talk_to_netback(struct xenbus
+ if (info->queues)
+ xennet_destroy_queues(info);
+
+- err = xennet_create_queues(info, num_queues);
++ err = xennet_create_queues(info, &num_queues);
+ if (err < 0)
+ goto destroy_ring;
+
--- /dev/null
+From foo@baz Tue Jan 26 21:37:04 PST 2016
+From: Dan Streetman <dan.streetman@canonical.com>
+Date: Thu, 29 Oct 2015 09:51:16 -0400
+Subject: xfrm: dst_entries_init() per-net dst_ops
+
+From: Dan Streetman <dan.streetman@canonical.com>
+
+[ Upstream commit a8a572a6b5f2a79280d6e302cb3c1cb1fbaeb3e8 ]
+
+Remove the dst_entries_init/destroy calls for xfrm4 and xfrm6 dst_ops
+templates; their dst_entries counters will never be used. Move the
+xfrm dst_ops initialization from the common xfrm/xfrm_policy.c to
+xfrm4/xfrm4_policy.c and xfrm6/xfrm6_policy.c, and call dst_entries_init
+and dst_entries_destroy for each net namespace.
+
+The ipv4 and ipv6 xfrms each create dst_ops template, and perform
+dst_entries_init on the templates. The template values are copied to each
+net namespace's xfrm.xfrm*_dst_ops. The problem there is the dst_ops
+pcpuc_entries field is a percpu counter and cannot be used correctly by
+simply copying it to another object.
+
+The result of this is a very subtle bug; changes to the dst entries
+counter from one net namespace may sometimes get applied to a different
+net namespace dst entries counter. This is because of how the percpu
+counter works; it has a main count field as well as a pointer to the
+percpu variables. Each net namespace maintains its own main count
+variable, but all point to one set of percpu variables. When any net
+namespace happens to change one of the percpu variables to outside its
+small batch range, its count is moved to the net namespace's main count
+variable. So with multiple net namespaces operating concurrently, the
+dst_ops entries counter can stray from the actual value that it should
+be; if counts are consistently moved from one net namespace to another
+(which my testing showed is likely), then one net namespace winds up
+with a negative dst_ops count while another winds up with a continually
+increasing count, eventually reaching its gc_thresh limit, which causes
+all new traffic on the net namespace to fail with -ENOBUFS.
+
+Signed-off-by: Dan Streetman <dan.streetman@canonical.com>
+Signed-off-by: Dan Streetman <ddstreet@ieee.org>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/xfrm4_policy.c | 46 +++++++++++++++++++++++++++++++++--------
+ net/ipv6/xfrm6_policy.c | 53 ++++++++++++++++++++++++++++++++++--------------
+ net/xfrm/xfrm_policy.c | 38 ----------------------------------
+ 3 files changed, 75 insertions(+), 62 deletions(-)
+
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -230,7 +230,7 @@ static void xfrm4_dst_ifdown(struct dst_
+ xfrm_dst_ifdown(dst, dev);
+ }
+
+-static struct dst_ops xfrm4_dst_ops = {
++static struct dst_ops xfrm4_dst_ops_template = {
+ .family = AF_INET,
+ .gc = xfrm4_garbage_collect,
+ .update_pmtu = xfrm4_update_pmtu,
+@@ -244,7 +244,7 @@ static struct dst_ops xfrm4_dst_ops = {
+
+ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
+ .family = AF_INET,
+- .dst_ops = &xfrm4_dst_ops,
++ .dst_ops = &xfrm4_dst_ops_template,
+ .dst_lookup = xfrm4_dst_lookup,
+ .get_saddr = xfrm4_get_saddr,
+ .decode_session = _decode_session4,
+@@ -266,7 +266,7 @@ static struct ctl_table xfrm4_policy_tab
+ { }
+ };
+
+-static int __net_init xfrm4_net_init(struct net *net)
++static int __net_init xfrm4_net_sysctl_init(struct net *net)
+ {
+ struct ctl_table *table;
+ struct ctl_table_header *hdr;
+@@ -294,7 +294,7 @@ err_alloc:
+ return -ENOMEM;
+ }
+
+-static void __net_exit xfrm4_net_exit(struct net *net)
++static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
+ {
+ struct ctl_table *table;
+
+@@ -306,12 +306,44 @@ static void __net_exit xfrm4_net_exit(st
+ if (!net_eq(net, &init_net))
+ kfree(table);
+ }
++#else /* CONFIG_SYSCTL */
++static int inline xfrm4_net_sysctl_init(struct net *net)
++{
++ return 0;
++}
++
++static void inline xfrm4_net_sysctl_exit(struct net *net)
++{
++}
++#endif
++
++static int __net_init xfrm4_net_init(struct net *net)
++{
++ int ret;
++
++ memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template,
++ sizeof(xfrm4_dst_ops_template));
++ ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops);
++ if (ret)
++ return ret;
++
++ ret = xfrm4_net_sysctl_init(net);
++ if (ret)
++ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
++
++ return ret;
++}
++
++static void __net_exit xfrm4_net_exit(struct net *net)
++{
++ xfrm4_net_sysctl_exit(net);
++ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
++}
+
+ static struct pernet_operations __net_initdata xfrm4_net_ops = {
+ .init = xfrm4_net_init,
+ .exit = xfrm4_net_exit,
+ };
+-#endif
+
+ static void __init xfrm4_policy_init(void)
+ {
+@@ -320,13 +352,9 @@ static void __init xfrm4_policy_init(voi
+
+ void __init xfrm4_init(void)
+ {
+- dst_entries_init(&xfrm4_dst_ops);
+-
+ xfrm4_state_init();
+ xfrm4_policy_init();
+ xfrm4_protocol_init();
+-#ifdef CONFIG_SYSCTL
+ register_pernet_subsys(&xfrm4_net_ops);
+-#endif
+ }
+
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -289,7 +289,7 @@ static void xfrm6_dst_ifdown(struct dst_
+ xfrm_dst_ifdown(dst, dev);
+ }
+
+-static struct dst_ops xfrm6_dst_ops = {
++static struct dst_ops xfrm6_dst_ops_template = {
+ .family = AF_INET6,
+ .gc = xfrm6_garbage_collect,
+ .update_pmtu = xfrm6_update_pmtu,
+@@ -303,7 +303,7 @@ static struct dst_ops xfrm6_dst_ops = {
+
+ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
+ .family = AF_INET6,
+- .dst_ops = &xfrm6_dst_ops,
++ .dst_ops = &xfrm6_dst_ops_template,
+ .dst_lookup = xfrm6_dst_lookup,
+ .get_saddr = xfrm6_get_saddr,
+ .decode_session = _decode_session6,
+@@ -336,7 +336,7 @@ static struct ctl_table xfrm6_policy_tab
+ { }
+ };
+
+-static int __net_init xfrm6_net_init(struct net *net)
++static int __net_init xfrm6_net_sysctl_init(struct net *net)
+ {
+ struct ctl_table *table;
+ struct ctl_table_header *hdr;
+@@ -364,7 +364,7 @@ err_alloc:
+ return -ENOMEM;
+ }
+
+-static void __net_exit xfrm6_net_exit(struct net *net)
++static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
+ {
+ struct ctl_table *table;
+
+@@ -376,24 +376,52 @@ static void __net_exit xfrm6_net_exit(st
+ if (!net_eq(net, &init_net))
+ kfree(table);
+ }
++#else /* CONFIG_SYSCTL */
++static int inline xfrm6_net_sysctl_init(struct net *net)
++{
++ return 0;
++}
++
++static void inline xfrm6_net_sysctl_exit(struct net *net)
++{
++}
++#endif
++
++static int __net_init xfrm6_net_init(struct net *net)
++{
++ int ret;
++
++ memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template,
++ sizeof(xfrm6_dst_ops_template));
++ ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops);
++ if (ret)
++ return ret;
++
++ ret = xfrm6_net_sysctl_init(net);
++ if (ret)
++ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
++
++ return ret;
++}
++
++static void __net_exit xfrm6_net_exit(struct net *net)
++{
++ xfrm6_net_sysctl_exit(net);
++ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
++}
+
+ static struct pernet_operations xfrm6_net_ops = {
+ .init = xfrm6_net_init,
+ .exit = xfrm6_net_exit,
+ };
+-#endif
+
+ int __init xfrm6_init(void)
+ {
+ int ret;
+
+- dst_entries_init(&xfrm6_dst_ops);
+-
+ ret = xfrm6_policy_init();
+- if (ret) {
+- dst_entries_destroy(&xfrm6_dst_ops);
++ if (ret)
+ goto out;
+- }
+ ret = xfrm6_state_init();
+ if (ret)
+ goto out_policy;
+@@ -402,9 +430,7 @@ int __init xfrm6_init(void)
+ if (ret)
+ goto out_state;
+
+-#ifdef CONFIG_SYSCTL
+ register_pernet_subsys(&xfrm6_net_ops);
+-#endif
+ out:
+ return ret;
+ out_state:
+@@ -416,11 +442,8 @@ out_policy:
+
+ void xfrm6_fini(void)
+ {
+-#ifdef CONFIG_SYSCTL
+ unregister_pernet_subsys(&xfrm6_net_ops);
+-#endif
+ xfrm6_protocol_fini();
+ xfrm6_policy_fini();
+ xfrm6_state_fini();
+- dst_entries_destroy(&xfrm6_dst_ops);
+ }
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -2806,7 +2806,6 @@ static struct neighbour *xfrm_neigh_look
+
+ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
+ {
+- struct net *net;
+ int err = 0;
+ if (unlikely(afinfo == NULL))
+ return -EINVAL;
+@@ -2837,26 +2836,6 @@ int xfrm_policy_register_afinfo(struct x
+ }
+ spin_unlock(&xfrm_policy_afinfo_lock);
+
+- rtnl_lock();
+- for_each_net(net) {
+- struct dst_ops *xfrm_dst_ops;
+-
+- switch (afinfo->family) {
+- case AF_INET:
+- xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
+- break;
+-#if IS_ENABLED(CONFIG_IPV6)
+- case AF_INET6:
+- xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
+- break;
+-#endif
+- default:
+- BUG();
+- }
+- *xfrm_dst_ops = *afinfo->dst_ops;
+- }
+- rtnl_unlock();
+-
+ return err;
+ }
+ EXPORT_SYMBOL(xfrm_policy_register_afinfo);
+@@ -2892,22 +2871,6 @@ int xfrm_policy_unregister_afinfo(struct
+ }
+ EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
+
+-static void __net_init xfrm_dst_ops_init(struct net *net)
+-{
+- struct xfrm_policy_afinfo *afinfo;
+-
+- rcu_read_lock();
+- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
+- if (afinfo)
+- net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
+-#if IS_ENABLED(CONFIG_IPV6)
+- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
+- if (afinfo)
+- net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
+-#endif
+- rcu_read_unlock();
+-}
+-
+ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+ {
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+@@ -3056,7 +3019,6 @@ static int __net_init xfrm_net_init(stru
+ rv = xfrm_policy_init(net);
+ if (rv < 0)
+ goto out_policy;
+- xfrm_dst_ops_init(net);
+ rv = xfrm_sysctl_init(net);
+ if (rv < 0)
+ goto out_sysctl;