From: Greg Kroah-Hartman Date: Wed, 27 Jan 2016 06:27:33 +0000 (-0800) Subject: 4.1-stable patches X-Git-Tag: v3.10.96~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8cf70caa7dc42bb075931e1374cbdef04e7b226e;p=thirdparty%2Fkernel%2Fstable-queue.git 4.1-stable patches added patches: addrconf-always-initialize-sysctl-table-data.patch batman-adv-avoid-recursive-call_rcu-for-batadv_bla_claim.patch batman-adv-avoid-recursive-call_rcu-for-batadv_nc_node.patch batman-adv-drop-immediate-batadv_hard_iface-free-function.patch batman-adv-drop-immediate-batadv_neigh_node-free-function.patch batman-adv-drop-immediate-batadv_orig_ifinfo-free-function.patch batman-adv-drop-immediate-neigh_ifinfo-free-function.patch batman-adv-drop-immediate-orig_node-free-function.patch bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch bridge-fix-lockdep-addr_list_lock-false-positive-splat.patch bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch connector-bump-skb-users-before-callback-invocation.patch ipv6-addrlabel-fix-ip6addrlbl_get.patch ipv6-tcp-add-rcu-locking-in-tcp_v6_send_synack.patch ipv6-update-skb-csum-when-ce-mark-is-propagated.patch isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch net-bpf-reject-invalid-shifts.patch net-cdc_ncm-avoid-changing-rx-tx-buffers-on-mtu-changes.patch net-filter-make-jits-zero-a-for-skf_ad_alu_xor_x.patch net-possible-use-after-free-in-dst_release.patch net-preserve-ip-control-block-during-gso-segmentation.patch net-sched-fix-missing-free-per-cpu-on-qstats.patch net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch phonet-properly-unshare-skbs-in-phonet_rcv.patch ppp-slip-validate-vj-compression-slot-parameters-completely.patch sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch tcp-dccp-fix-old-style-declarations.patch tcp-dccp-fix-timewait-races-in-timer-handling.patch tcp_yeah-don-t-set-ssthresh-below-2.patch team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch udp-disallow-ufo-for-sockets-with-so_no_check-option.patch unix-properly-account-for-fds-passed-over-unix-sockets.patch utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch xen-netback-respect-user-provided-max_queues.patch xen-netfront-respect-user-provided-max_queues.patch xen-netfront-update-num_queues-to-real-created.patch xfrm-dst_entries_init-per-net-dst_ops.patch --- diff --git a/queue-4.1/addrconf-always-initialize-sysctl-table-data.patch b/queue-4.1/addrconf-always-initialize-sysctl-table-data.patch new file mode 100644 index 00000000000..dba6e8be34d --- /dev/null +++ b/queue-4.1/addrconf-always-initialize-sysctl-table-data.patch @@ -0,0 +1,44 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: WANG Cong +Date: Mon, 21 Dec 2015 10:55:45 -0800 +Subject: addrconf: always initialize sysctl table data + +From: WANG Cong + +[ Upstream commit 5449a5ca9bc27dd51a462de7ca0b1cd861cd2bd0 ] + +When sysctl performs restrict writes, it allows to write from +a middle position of a sysctl file, which requires us to initialize +the table data before calling proc_dostring() for the write case. + +Fixes: 3d1bec99320d ("ipv6: introduce secret_stable to ipv6_devconf") +Reported-by: Sasha Levin +Acked-by: Hannes Frederic Sowa +Tested-by: Sasha Levin +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -5267,13 +5267,10 @@ static int addrconf_sysctl_stable_secret + goto out; + } + +- if (!write) { +- err = snprintf(str, sizeof(str), "%pI6", +- &secret->secret); +- if (err >= sizeof(str)) { +- err = -EIO; +- goto out; +- } ++ err = snprintf(str, sizeof(str), "%pI6", &secret->secret); ++ if (err >= sizeof(str)) { ++ err = -EIO; ++ goto out; + } + + err = proc_dostring(&lctl, write, buffer, lenp, ppos); diff --git a/queue-4.1/batman-adv-avoid-recursive-call_rcu-for-batadv_bla_claim.patch b/queue-4.1/batman-adv-avoid-recursive-call_rcu-for-batadv_bla_claim.patch new file mode 100644 index 00000000000..05d19a26fbb --- /dev/null +++ b/queue-4.1/batman-adv-avoid-recursive-call_rcu-for-batadv_bla_claim.patch @@ -0,0 +1,61 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Sven Eckelmann +Date: Thu, 14 Jan 2016 15:28:19 +0100 +Subject: batman-adv: Avoid recursive call_rcu for batadv_bla_claim + +From: Sven Eckelmann + +[ Upstream commit 63b399272294e7a939cde41792dca38c549f0484 ] + +The batadv_claim_free_ref function uses call_rcu to delay the free of the +batadv_bla_claim object until no (already started) rcu_read_lock is enabled +anymore. This makes sure that no context is still trying to access the +object which should be removed. But batadv_bla_claim also contains a +reference to backbone_gw which must be removed. + +The reference drop of backbone_gw was done in the call_rcu function +batadv_claim_free_rcu but should actually be done in the +batadv_claim_release function to avoid nested call_rcus. This is important +because rcu_barrier (e.g. batadv_softif_free or batadv_exit) will not +detect the inner call_rcu as relevant for its execution. Otherwise this +barrier will most likely be inserted in the queue before the callback of +the first call_rcu was executed. The caller of rcu_barrier will therefore +continue to run before the inner call_rcu callback finished. + +Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code") +Signed-off-by: Sven Eckelmann +Acked-by: Simon Wunderlich +Signed-off-by: Marek Lindner +Signed-off-by: Antonio Quartulli +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/bridge_loop_avoidance.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +--- a/net/batman-adv/bridge_loop_avoidance.c ++++ b/net/batman-adv/bridge_loop_avoidance.c +@@ -112,21 +112,17 @@ batadv_backbone_gw_free_ref(struct batad + } + + /* finally deinitialize the claim */ +-static void batadv_claim_free_rcu(struct rcu_head *rcu) ++static void batadv_claim_release(struct batadv_bla_claim *claim) + { +- struct batadv_bla_claim *claim; +- +- claim = container_of(rcu, struct batadv_bla_claim, rcu); +- + batadv_backbone_gw_free_ref(claim->backbone_gw); +- kfree(claim); ++ kfree_rcu(claim, rcu); + } + + /* free a claim, call claim_free_rcu if its the last reference */ + static void batadv_claim_free_ref(struct batadv_bla_claim *claim) + { + if (atomic_dec_and_test(&claim->refcount)) +- call_rcu(&claim->rcu, batadv_claim_free_rcu); ++ batadv_claim_release(claim); + } + + /** diff --git a/queue-4.1/batman-adv-avoid-recursive-call_rcu-for-batadv_nc_node.patch b/queue-4.1/batman-adv-avoid-recursive-call_rcu-for-batadv_nc_node.patch new file mode 100644 index 00000000000..7a7ef419984 --- /dev/null +++ b/queue-4.1/batman-adv-avoid-recursive-call_rcu-for-batadv_nc_node.patch @@ -0,0 +1,72 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Sven Eckelmann +Date: Tue, 5 Jan 2016 12:06:19 +0100 +Subject: batman-adv: Avoid recursive call_rcu for batadv_nc_node + +From: Sven Eckelmann + +[ Upstream commit 44e8e7e91d6c7c7ab19688750f7257292640d1a0 ] + +The batadv_nc_node_free_ref function uses call_rcu to delay the free of the +batadv_nc_node object until no (already started) rcu_read_lock is enabled +anymore. This makes sure that no context is still trying to access the +object which should be removed. But batadv_nc_node also contains a +reference to orig_node which must be removed. + +The reference drop of orig_node was done in the call_rcu function +batadv_nc_node_free_rcu but should actually be done in the +batadv_nc_node_release function to avoid nested call_rcus. This is +important because rcu_barrier (e.g. batadv_softif_free or batadv_exit) will +not detect the inner call_rcu as relevant for its execution. Otherwise this +barrier will most likely be inserted in the queue before the callback of +the first call_rcu was executed. The caller of rcu_barrier will therefore +continue to run before the inner call_rcu callback finished. + +Fixes: d56b1705e28c ("batman-adv: network coding - detect coding nodes and remove these after timeout") +Signed-off-by: Sven Eckelmann +Signed-off-by: Marek Lindner +Signed-off-by: Antonio Quartulli +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/network-coding.c | 19 ++++++++----------- + 1 file changed, 8 insertions(+), 11 deletions(-) + +--- a/net/batman-adv/network-coding.c ++++ b/net/batman-adv/network-coding.c +@@ -175,28 +175,25 @@ void batadv_nc_init_orig(struct batadv_o + } + + /** +- * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove +- * its refcount on the orig_node +- * @rcu: rcu pointer of the nc node ++ * batadv_nc_node_release - release nc_node from lists and queue for free after ++ * rcu grace period ++ * @nc_node: the nc node to free + */ +-static void batadv_nc_node_free_rcu(struct rcu_head *rcu) ++static void batadv_nc_node_release(struct batadv_nc_node *nc_node) + { +- struct batadv_nc_node *nc_node; +- +- nc_node = container_of(rcu, struct batadv_nc_node, rcu); + batadv_orig_node_free_ref(nc_node->orig_node); +- kfree(nc_node); ++ kfree_rcu(nc_node, rcu); + } + + /** +- * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly +- * frees it ++ * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly ++ * release it + * @nc_node: the nc node to free + */ + static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) + { + if (atomic_dec_and_test(&nc_node->refcount)) +- call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu); ++ batadv_nc_node_release(nc_node); + } + + /** diff --git a/queue-4.1/batman-adv-drop-immediate-batadv_hard_iface-free-function.patch b/queue-4.1/batman-adv-drop-immediate-batadv_hard_iface-free-function.patch new file mode 100644 index 00000000000..81fa704cd77 --- /dev/null +++ b/queue-4.1/batman-adv-drop-immediate-batadv_hard_iface-free-function.patch @@ -0,0 +1,93 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Sven Eckelmann +Date: Tue, 5 Jan 2016 12:06:25 +0100 +Subject: batman-adv: Drop immediate batadv_hard_iface free function + +From: Sven Eckelmann + +[ Upstream commit b4d922cfc9c08318eeb77d53b7633740e6b0efb0 ] + +It is not allowed to free the memory of an object which is part of a list +which is protected by rcu-read-side-critical sections without making sure +that no other context is accessing the object anymore. This usually happens +by removing the references to this object and then waiting until the rcu +grace period is over and no one (allowedly) accesses it anymore. + +But the _now functions ignore this completely. They free the object +directly even when a different context still tries to access it. This has +to be avoided and thus these functions must be removed and all functions +have to use batadv_hardif_free_ref. + +Fixes: 89652331c00f ("batman-adv: split tq information in neigh_node struct") +Signed-off-by: Sven Eckelmann +Signed-off-by: Marek Lindner +Signed-off-by: Antonio Quartulli +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/hard-interface.h | 12 ------------ + net/batman-adv/originator.c | 16 +++++++--------- + 2 files changed, 7 insertions(+), 21 deletions(-) + +--- a/net/batman-adv/hard-interface.h ++++ b/net/batman-adv/hard-interface.h +@@ -64,18 +64,6 @@ batadv_hardif_free_ref(struct batadv_har + call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu); + } + +-/** +- * batadv_hardif_free_ref_now - decrement the hard interface refcounter and +- * possibly free it (without rcu callback) +- * @hard_iface: the hard interface to free +- */ +-static inline void +-batadv_hardif_free_ref_now(struct batadv_hard_iface *hard_iface) +-{ +- if (atomic_dec_and_test(&hard_iface->refcount)) +- batadv_hardif_free_rcu(&hard_iface->rcu); +-} +- + static inline struct batadv_hard_iface * + batadv_primary_if_get_selected(struct batadv_priv *bat_priv) + { +--- a/net/batman-adv/originator.c ++++ b/net/batman-adv/originator.c +@@ -175,24 +175,22 @@ void batadv_neigh_ifinfo_free_ref(struct + } + + /** +- * batadv_neigh_node_free_rcu - free the neigh_node +- * @rcu: rcu pointer of the neigh_node ++ * batadv_neigh_node_release - release neigh_node from lists and queue for ++ * free after rcu grace period ++ * @neigh_node: neigh neighbor to free + */ +-static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) ++static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node) + { + struct hlist_node *node_tmp; +- struct batadv_neigh_node *neigh_node; + struct batadv_neigh_ifinfo *neigh_ifinfo; + +- neigh_node = container_of(rcu, struct batadv_neigh_node, rcu); +- + hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, + &neigh_node->ifinfo_list, list) { + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + } +- batadv_hardif_free_ref_now(neigh_node->if_incoming); ++ batadv_hardif_free_ref(neigh_node->if_incoming); + +- kfree(neigh_node); ++ kfree_rcu(neigh_node, rcu); + } + + /** +@@ -203,7 +201,7 @@ static void batadv_neigh_node_free_rcu(s + void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) + { + if (atomic_dec_and_test(&neigh_node->refcount)) +- call_rcu(&neigh_node->rcu, batadv_neigh_node_free_rcu); ++ batadv_neigh_node_release(neigh_node); + } + + /** diff --git a/queue-4.1/batman-adv-drop-immediate-batadv_neigh_node-free-function.patch b/queue-4.1/batman-adv-drop-immediate-batadv_neigh_node-free-function.patch new file mode 100644 index 00000000000..b7763d56f6c --- /dev/null +++ b/queue-4.1/batman-adv-drop-immediate-batadv_neigh_node-free-function.patch @@ -0,0 +1,95 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Sven Eckelmann +Date: Tue, 5 Jan 2016 12:06:22 +0100 +Subject: batman-adv: Drop immediate batadv_neigh_node free function + +From: Sven Eckelmann + +[ Upstream commit 2baa753c276f27f8e844637561ad597867aa6fb6 ] + +It is not allowed to free the memory of an object which is part of a list +which is protected by rcu-read-side-critical sections without making sure +that no other context is accessing the object anymore. This usually happens +by removing the references to this object and then waiting until the rcu +grace period is over and no one (allowedly) accesses it anymore. + +But the _now functions ignore this completely. They free the object +directly even when a different context still tries to access it. This has +to be avoided and thus these functions must be removed and all functions +have to use batadv_neigh_node_free_ref. + +Fixes: 89652331c00f ("batman-adv: split tq information in neigh_node struct") +Signed-off-by: Sven Eckelmann +Signed-off-by: Marek Lindner +Signed-off-by: Antonio Quartulli +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/originator.c | 33 ++++++++++----------------------- + 1 file changed, 10 insertions(+), 23 deletions(-) + +--- a/net/batman-adv/originator.c ++++ b/net/batman-adv/originator.c +@@ -210,20 +210,8 @@ static void batadv_neigh_node_free_rcu(s + } + + /** +- * batadv_neigh_node_free_ref_now - decrement the neighbors refcounter +- * and possibly free it (without rcu callback) +- * @neigh_node: neigh neighbor to free +- */ +-static void +-batadv_neigh_node_free_ref_now(struct batadv_neigh_node *neigh_node) +-{ +- if (atomic_dec_and_test(&neigh_node->refcount)) +- batadv_neigh_node_free_rcu(&neigh_node->rcu); +-} +- +-/** + * batadv_neigh_node_free_ref - decrement the neighbors refcounter +- * and possibly free it ++ * and possibly release it + * @neigh_node: neigh neighbor to free + */ + void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) +@@ -495,24 +483,23 @@ batadv_neigh_node_get(const struct batad + } + + /** +- * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object +- * @rcu: rcu pointer of the orig_ifinfo object ++ * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for ++ * free after rcu grace period ++ * @orig_ifinfo: the orig_ifinfo object to release + */ +-static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) ++static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo) + { +- struct batadv_orig_ifinfo *orig_ifinfo; + struct batadv_neigh_node *router; + +- orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu); +- + if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) +- batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing); ++ batadv_hardif_free_ref(orig_ifinfo->if_outgoing); + + /* this is the last reference to this object */ + router = rcu_dereference_protected(orig_ifinfo->router, true); + if (router) +- batadv_neigh_node_free_ref_now(router); +- kfree(orig_ifinfo); ++ batadv_neigh_node_free_ref(router); ++ ++ kfree_rcu(orig_ifinfo, rcu); + } + + /** +@@ -523,7 +510,7 @@ static void batadv_orig_ifinfo_free_rcu( + void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) + { + if (atomic_dec_and_test(&orig_ifinfo->refcount)) +- call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu); ++ batadv_orig_ifinfo_release(orig_ifinfo); + } + + /** diff --git a/queue-4.1/batman-adv-drop-immediate-batadv_orig_ifinfo-free-function.patch b/queue-4.1/batman-adv-drop-immediate-batadv_orig_ifinfo-free-function.patch new file mode 100644 index 00000000000..495a763eecc --- /dev/null +++ b/queue-4.1/batman-adv-drop-immediate-batadv_orig_ifinfo-free-function.patch @@ -0,0 +1,139 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Sven Eckelmann +Date: Tue, 5 Jan 2016 12:06:21 +0100 +Subject: batman-adv: Drop immediate batadv_orig_ifinfo free function + +From: Sven Eckelmann + +[ Upstream commit deed96605f5695cb945e0b3d79429581857a2b9d ] + +It is not allowed to free the memory of an object which is part of a list +which is protected by rcu-read-side-critical sections without making sure +that no other context is accessing the object anymore. This usually happens +by removing the references to this object and then waiting until the rcu +grace period is over and no one (allowedly) accesses it anymore. + +But the _now functions ignore this completely. They free the object +directly even when a different context still tries to access it. This has +to be avoided and thus these functions must be removed and all functions +have to use batadv_orig_ifinfo_free_ref. + +Fixes: 7351a4822d42 ("batman-adv: split out router from orig_node") +Signed-off-by: Sven Eckelmann +Signed-off-by: Marek Lindner +Signed-off-by: Antonio Quartulli +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/originator.c | 59 +++++++++++++++++++++++--------------------- + 1 file changed, 31 insertions(+), 28 deletions(-) + +--- a/net/batman-adv/originator.c ++++ b/net/batman-adv/originator.c +@@ -516,76 +516,79 @@ static void batadv_orig_ifinfo_free_rcu( + } + + /** +- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free +- * the orig_ifinfo (without rcu callback) ++ * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly release ++ * the orig_ifinfo + * @orig_ifinfo: the orig_ifinfo object to release + */ +-static void +-batadv_orig_ifinfo_free_ref_now(struct batadv_orig_ifinfo *orig_ifinfo) ++void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) + { + if (atomic_dec_and_test(&orig_ifinfo->refcount)) +- batadv_orig_ifinfo_free_rcu(&orig_ifinfo->rcu); ++ call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu); + } + + /** +- * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly free +- * the orig_ifinfo +- * @orig_ifinfo: the orig_ifinfo object to release ++ * batadv_orig_node_free_rcu - free the orig_node ++ * @rcu: rcu pointer of the orig_node + */ +-void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) ++static void batadv_orig_node_free_rcu(struct rcu_head *rcu) + { +- if (atomic_dec_and_test(&orig_ifinfo->refcount)) +- call_rcu(&orig_ifinfo->rcu, batadv_orig_ifinfo_free_rcu); ++ struct batadv_orig_node *orig_node; ++ ++ orig_node = container_of(rcu, struct batadv_orig_node, rcu); ++ ++ batadv_mcast_purge_orig(orig_node); ++ ++ batadv_frag_purge_orig(orig_node, NULL); ++ ++ if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) ++ orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); ++ ++ kfree(orig_node->tt_buff); ++ kfree(orig_node); + } + +-static void batadv_orig_node_free_rcu(struct rcu_head *rcu) ++/** ++ * batadv_orig_node_release - release orig_node from lists and queue for ++ * free after rcu grace period ++ * @orig_node: the orig node to free ++ */ ++static void batadv_orig_node_release(struct batadv_orig_node *orig_node) + { + struct hlist_node *node_tmp; + struct batadv_neigh_node *neigh_node; +- struct batadv_orig_node *orig_node; + struct batadv_orig_ifinfo *orig_ifinfo; + +- orig_node = container_of(rcu, struct batadv_orig_node, rcu); +- + spin_lock_bh(&orig_node->neigh_list_lock); + + /* for all neighbors towards this originator ... */ + hlist_for_each_entry_safe(neigh_node, node_tmp, + &orig_node->neigh_list, list) { + hlist_del_rcu(&neigh_node->list); +- batadv_neigh_node_free_ref_now(neigh_node); ++ batadv_neigh_node_free_ref(neigh_node); + } + + hlist_for_each_entry_safe(orig_ifinfo, node_tmp, + &orig_node->ifinfo_list, list) { + hlist_del_rcu(&orig_ifinfo->list); +- batadv_orig_ifinfo_free_ref_now(orig_ifinfo); ++ batadv_orig_ifinfo_free_ref(orig_ifinfo); + } + spin_unlock_bh(&orig_node->neigh_list_lock); + +- batadv_mcast_purge_orig(orig_node); +- + /* Free nc_nodes */ + batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); + +- batadv_frag_purge_orig(orig_node, NULL); +- +- if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) +- orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); +- +- kfree(orig_node->tt_buff); +- kfree(orig_node); ++ call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); + } + + /** + * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly +- * schedule an rcu callback for freeing it ++ * release it + * @orig_node: the orig node to free + */ + void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) + { + if (atomic_dec_and_test(&orig_node->refcount)) +- call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); ++ batadv_orig_node_release(orig_node); + } + + /** diff --git a/queue-4.1/batman-adv-drop-immediate-neigh_ifinfo-free-function.patch b/queue-4.1/batman-adv-drop-immediate-neigh_ifinfo-free-function.patch new file mode 100644 index 00000000000..9754d5a93ed --- /dev/null +++ b/queue-4.1/batman-adv-drop-immediate-neigh_ifinfo-free-function.patch @@ -0,0 +1,92 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Sven Eckelmann +Date: Tue, 5 Jan 2016 12:06:24 +0100 +Subject: batman-adv: Drop immediate neigh_ifinfo free function + +From: Sven Eckelmann + +[ Upstream commit ae3e1e36e3cb6c686a7a2725af20ca86aa46d62a ] + +It is not allowed to free the memory of an object which is part of a list +which is protected by rcu-read-side-critical sections without making sure +that no other context is accessing the object anymore. This usually happens +by removing the references to this object and then waiting until the rcu +grace period is over and no one (allowedly) accesses it anymore. + +But the _now functions ignore this completely. They free the object +directly even when a different context still tries to access it. This has +to be avoided and thus these functions must be removed and all functions +have to use batadv_neigh_ifinfo_free_ref. + +Fixes: 89652331c00f ("batman-adv: split tq information in neigh_node struct") +Signed-off-by: Sven Eckelmann +Signed-off-by: Marek Lindner +Signed-off-by: Antonio Quartulli +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/originator.c | 34 ++++++++++------------------------ + 1 file changed, 10 insertions(+), 24 deletions(-) + +--- a/net/batman-adv/originator.c ++++ b/net/batman-adv/originator.c +@@ -150,42 +150,28 @@ err: + } + + /** +- * batadv_neigh_ifinfo_free_rcu - free the neigh_ifinfo object +- * @rcu: rcu pointer of the neigh_ifinfo object +- */ +-static void batadv_neigh_ifinfo_free_rcu(struct rcu_head *rcu) +-{ +- struct batadv_neigh_ifinfo *neigh_ifinfo; +- +- neigh_ifinfo = container_of(rcu, struct batadv_neigh_ifinfo, rcu); +- +- if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) +- batadv_hardif_free_ref_now(neigh_ifinfo->if_outgoing); +- +- kfree(neigh_ifinfo); +-} +- +-/** +- * batadv_neigh_ifinfo_free_now - decrement the refcounter and possibly free +- * the neigh_ifinfo (without rcu callback) ++ * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for ++ * free after rcu grace period + * @neigh_ifinfo: the neigh_ifinfo object to release + */ + static void +-batadv_neigh_ifinfo_free_ref_now(struct batadv_neigh_ifinfo *neigh_ifinfo) ++batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo) + { +- if (atomic_dec_and_test(&neigh_ifinfo->refcount)) +- batadv_neigh_ifinfo_free_rcu(&neigh_ifinfo->rcu); ++ if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) ++ batadv_hardif_free_ref(neigh_ifinfo->if_outgoing); ++ ++ kfree_rcu(neigh_ifinfo, rcu); + } + + /** +- * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly free ++ * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly release + * the neigh_ifinfo + * @neigh_ifinfo: the neigh_ifinfo object to release + */ + void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) + { + if (atomic_dec_and_test(&neigh_ifinfo->refcount)) +- call_rcu(&neigh_ifinfo->rcu, batadv_neigh_ifinfo_free_rcu); ++ batadv_neigh_ifinfo_release(neigh_ifinfo); + } + + /** +@@ -202,7 +188,7 @@ static void batadv_neigh_node_free_rcu(s + + hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, + &neigh_node->ifinfo_list, list) { +- batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo); ++ batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + } + batadv_hardif_free_ref_now(neigh_node->if_incoming); + diff --git a/queue-4.1/batman-adv-drop-immediate-orig_node-free-function.patch b/queue-4.1/batman-adv-drop-immediate-orig_node-free-function.patch new file mode 100644 index 00000000000..b5429dad17e --- /dev/null +++ b/queue-4.1/batman-adv-drop-immediate-orig_node-free-function.patch @@ -0,0 +1,111 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Sven Eckelmann +Date: Tue, 5 Jan 2016 12:06:20 +0100 +Subject: batman-adv: Drop immediate orig_node free function + +From: Sven Eckelmann + +[ Upstream commit 42eff6a617e23b691f8e4467f4687ed7245a92db ] + +It is not allowed to free the memory of an object which is part of a list +which is protected by rcu-read-side-critical sections without making sure +that no other context is accessing the object anymore. This usually happens +by removing the references to this object and then waiting until the rcu +grace period is over and no one (allowedly) accesses it anymore. + +But the _now functions ignore this completely. They free the object +directly even when a different context still tries to access it. This has +to be avoided and thus these functions must be removed and all functions +have to use batadv_orig_node_free_ref. + +Fixes: 72822225bd41 ("batman-adv: Fix rcu_barrier() miss due to double call_rcu() in TT code") +Signed-off-by: Sven Eckelmann +Signed-off-by: Marek Lindner +Signed-off-by: Antonio Quartulli +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/originator.c | 11 ----------- + net/batman-adv/originator.h | 1 - + net/batman-adv/translation-table.c | 28 +++++++++++++--------------- + 3 files changed, 13 insertions(+), 27 deletions(-) + +--- a/net/batman-adv/originator.c ++++ b/net/batman-adv/originator.c +@@ -562,17 +562,6 @@ void batadv_orig_node_free_ref(struct ba + batadv_orig_node_release(orig_node); + } + +-/** +- * batadv_orig_node_free_ref_now - decrement the orig node refcounter and +- * possibly free it (without rcu callback) +- * @orig_node: the orig node to free +- */ +-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node) +-{ +- if (atomic_dec_and_test(&orig_node->refcount)) +- batadv_orig_node_free_rcu(&orig_node->rcu); +-} +- + void batadv_originator_free(struct batadv_priv *bat_priv) + { + struct batadv_hashtable *hash = bat_priv->orig_hash; +--- a/net/batman-adv/originator.h ++++ b/net/batman-adv/originator.h +@@ -25,7 +25,6 @@ int batadv_originator_init(struct batadv + void batadv_originator_free(struct batadv_priv *bat_priv); + void batadv_purge_orig_ref(struct batadv_priv *bat_priv); + void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); +-void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); + struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, + const uint8_t *addr); + struct batadv_neigh_node * +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -219,20 +219,6 @@ int batadv_tt_global_hash_count(struct b + return count; + } + +-static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) +-{ +- struct batadv_tt_orig_list_entry *orig_entry; +- +- orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); +- +- /* We are in an rcu callback here, therefore we cannot use +- * batadv_orig_node_free_ref() and its call_rcu(): +- * An rcu_barrier() wouldn't wait for that to finish +- */ +- batadv_orig_node_free_ref_now(orig_entry->orig_node); +- kfree(orig_entry); +-} +- + /** + * batadv_tt_local_size_mod - change the size by v of the local table identified + * by vid +@@ -328,13 +314,25 @@ static void batadv_tt_global_size_dec(st + batadv_tt_global_size_mod(orig_node, vid, -1); + } + ++/** ++ * batadv_tt_orig_list_entry_release - release tt orig entry from lists and ++ * queue for free after rcu grace period ++ * @orig_entry: tt orig entry to be free'd ++ */ ++static void ++batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry) ++{ ++ batadv_orig_node_free_ref(orig_entry->orig_node); ++ kfree_rcu(orig_entry, rcu); ++} ++ + static void + batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) + { + if (!atomic_dec_and_test(&orig_entry->refcount)) + return; + +- call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); ++ batadv_tt_orig_list_entry_release(orig_entry); + } + + /** diff --git a/queue-4.1/bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch b/queue-4.1/bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch new file mode 100644 index 00000000000..053abc6786a --- /dev/null +++ b/queue-4.1/bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch @@ -0,0 +1,58 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Karl Heiss +Date: Mon, 11 Jan 2016 08:28:43 -0500 +Subject: bonding: Prevent IPv6 link local address on enslaved devices + +From: Karl Heiss + +[ Upstream commit 03d84a5f83a67e692af00a3d3901e7820e3e84d5 ] + +Commit 1f718f0f4f97 ("bonding: populate neighbour's private on enslave") +undoes the fix provided by commit c2edacf80e15 ("bonding / ipv6: no addrconf +for slaves separately from master") by effectively setting the slave flag +after the slave has been opened. If the slave comes up quickly enough, it +will go through the IPv6 addrconf before the slave flag has been set and +will get a link local IPv6 address. + +In order to ensure that addrconf knows to ignore the slave devices on state +change, set IFF_SLAVE before dev_open() during bonding enslavement. + +Fixes: 1f718f0f4f97 ("bonding: populate neighbour's private on enslave") +Signed-off-by: Karl Heiss +Signed-off-by: Jay Vosburgh +Reviewed-by: Jarod Wilson +Signed-off-by: Andy Gospodarek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1194,7 +1194,6 @@ static int bond_master_upper_dev_link(st + err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); + if (err) + return err; +- slave_dev->flags |= IFF_SLAVE; + rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); + return 0; + } +@@ -1452,6 +1451,9 @@ int bond_enslave(struct net_device *bond + } + } + ++ /* set slave flag before open to prevent IPv6 addrconf */ ++ slave_dev->flags |= IFF_SLAVE; ++ + /* open the slave since the application closed it */ + res = dev_open(slave_dev); + if (res) { +@@ -1712,6 +1714,7 @@ err_close: + dev_close(slave_dev); + + err_restore_mac: ++ slave_dev->flags &= ~IFF_SLAVE; + if (!bond->params.fail_over_mac || + BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { + /* XXX TODO - fom follow mode needs to change master's diff --git a/queue-4.1/bridge-fix-lockdep-addr_list_lock-false-positive-splat.patch b/queue-4.1/bridge-fix-lockdep-addr_list_lock-false-positive-splat.patch new file mode 100644 index 00000000000..e65d27fb29e --- /dev/null +++ b/queue-4.1/bridge-fix-lockdep-addr_list_lock-false-positive-splat.patch @@ -0,0 +1,133 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Nikolay Aleksandrov +Date: Fri, 15 Jan 2016 19:03:54 +0100 +Subject: bridge: fix lockdep addr_list_lock false positive splat + +From: Nikolay Aleksandrov + +[ Upstream commit c6894dec8ea9ae05747124dce98b3b5c2e69b168 ] + +After promisc mode management was introduced a bridge device could do +dev_set_promiscuity from its ndo_change_rx_flags() callback which in +turn can be called after the bridge's addr_list_lock has been taken +(e.g. by dev_uc_add). This causes a false positive lockdep splat because +the port interfaces' addr_list_lock is taken when br_manage_promisc() +runs after the bridge's addr list lock was already taken. +To remove the false positive introduce a custom bridge addr_list_lock +class and set it on bridge init. +A simple way to reproduce this is with the following: +$ brctl addbr br0 +$ ip l add l br0 br0.100 type vlan id 100 +$ ip l set br0 up +$ ip l set br0.100 up +$ echo 1 > /sys/class/net/br0/bridge/vlan_filtering +$ brctl addif br0 eth0 +Splat: +[ 43.684325] ============================================= +[ 43.684485] [ INFO: possible recursive locking detected ] +[ 43.684636] 4.4.0-rc8+ #54 Not tainted +[ 43.684755] --------------------------------------------- +[ 43.684906] brctl/1187 is trying to acquire lock: +[ 43.685047] (_xmit_ETHER){+.....}, at: [] dev_set_rx_mode+0x1e/0x40 +[ 43.685460] but task is already holding lock: +[ 43.685618] (_xmit_ETHER){+.....}, at: [] dev_uc_add+0x27/0x80 +[ 43.686015] other info that might help us debug this: +[ 43.686316] Possible unsafe locking scenario: + +[ 43.686743] CPU0 +[ 43.686967] ---- +[ 43.687197] lock(_xmit_ETHER); +[ 43.687544] lock(_xmit_ETHER); +[ 43.687886] *** DEADLOCK *** + +[ 43.688438] May be due to missing lock nesting notation + +[ 43.688882] 2 locks held by brctl/1187: +[ 43.689134] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 +[ 43.689852] #1: (_xmit_ETHER){+.....}, at: [] dev_uc_add+0x27/0x80 +[ 43.690575] stack backtrace: +[ 43.690970] CPU: 0 PID: 1187 Comm: brctl Not tainted 4.4.0-rc8+ #54 +[ 43.691270] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 +[ 43.691770] ffffffff826a25c0 ffff8800369fb8e0 ffffffff81360ceb ffffffff826a25c0 +[ 43.692425] ffff8800369fb9b8 ffffffff810d0466 ffff8800369fb968 ffffffff81537139 +[ 43.693071] ffff88003a08c880 0000000000000000 00000000ffffffff 0000000002080020 +[ 43.693709] Call Trace: +[ 43.693931] [] dump_stack+0x4b/0x70 +[ 43.694199] [] __lock_acquire+0x1e46/0x1e90 +[ 43.694483] [] ? netlink_broadcast_filtered+0x139/0x3e0 +[ 43.694789] [] ? nlmsg_notify+0x5a/0xc0 +[ 43.695064] [] lock_acquire+0xe5/0x1f0 +[ 43.695340] [] ? dev_set_rx_mode+0x1e/0x40 +[ 43.695623] [] _raw_spin_lock_bh+0x45/0x80 +[ 43.695901] [] ? dev_set_rx_mode+0x1e/0x40 +[ 43.696180] [] dev_set_rx_mode+0x1e/0x40 +[ 43.696460] [] dev_set_promiscuity+0x3c/0x50 +[ 43.696750] [] br_port_set_promisc+0x25/0x50 [bridge] +[ 43.697052] [] br_manage_promisc+0x8a/0xe0 [bridge] +[ 43.697348] [] br_dev_change_rx_flags+0x1e/0x20 [bridge] +[ 43.697655] [] __dev_set_promiscuity+0x132/0x1f0 +[ 43.697943] [] __dev_set_rx_mode+0x82/0x90 +[ 43.698223] [] dev_uc_add+0x5e/0x80 +[ 43.698498] [] vlan_device_event+0x542/0x650 [8021q] +[ 43.698798] [] notifier_call_chain+0x5d/0x80 +[ 43.699083] [] raw_notifier_call_chain+0x16/0x20 +[ 43.699374] [] call_netdevice_notifiers_info+0x6e/0x80 +[ 43.699678] [] call_netdevice_notifiers+0x16/0x20 +[ 43.699973] [] br_add_if+0x47e/0x4c0 [bridge] +[ 43.700259] [] add_del_if+0x6e/0x80 [bridge] +[ 43.700548] [] br_dev_ioctl+0xaf/0xc0 [bridge] +[ 43.700836] [] dev_ifsioc+0x30c/0x3c0 +[ 43.701106] [] dev_ioctl+0xf9/0x6f0 +[ 43.701379] [] ? mntput_no_expire+0x5/0x450 +[ 43.701665] [] ? mntput_no_expire+0xae/0x450 +[ 43.701947] [] sock_do_ioctl+0x42/0x50 +[ 43.702219] [] sock_ioctl+0x1e5/0x290 +[ 43.702500] [] do_vfs_ioctl+0x2cb/0x5c0 +[ 43.702771] [] SyS_ioctl+0x79/0x90 +[ 43.703033] [] entry_SYSCALL_64_fastpath+0x16/0x7a + +CC: Vlad Yasevich +CC: Stephen Hemminger +CC: Bridge list +CC: Andy Gospodarek +CC: Roopa Prabhu +Fixes: 2796d0c648c9 ("bridge: Automatically manage port promiscuous mode.") +Reported-by: Andy Gospodarek +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_device.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/net/bridge/br_device.c ++++ b/net/bridge/br_device.c +@@ -28,6 +28,8 @@ + const struct nf_br_ops __rcu *nf_br_ops __read_mostly; + EXPORT_SYMBOL_GPL(nf_br_ops); + ++static struct lock_class_key bridge_netdev_addr_lock_key; ++ + /* net device transmit always called with BH disabled */ + netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) + { +@@ -87,6 +89,11 @@ out: + return NETDEV_TX_OK; + } + ++static void br_set_lockdep_class(struct net_device *dev) ++{ ++ lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key); ++} ++ + static int br_dev_init(struct net_device *dev) + { + struct net_bridge *br = netdev_priv(dev); +@@ -99,6 +106,7 @@ static int br_dev_init(struct net_device + err = br_vlan_init(br); + if (err) + free_percpu(br->stats); ++ br_set_lockdep_class(dev); + + return err; + } diff --git a/queue-4.1/bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch b/queue-4.1/bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch new file mode 100644 index 00000000000..8d1a9d7a10c --- /dev/null +++ b/queue-4.1/bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch @@ -0,0 +1,45 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Hannes Frederic Sowa +Date: Tue, 5 Jan 2016 10:46:00 +0100 +Subject: bridge: Only call /sbin/bridge-stp for the initial network namespace + +From: Hannes Frederic Sowa + +[ Upstream commit ff62198553e43cdffa9d539f6165d3e83f8a42bc ] + +[I stole this patch from Eric Biederman. He wrote:] + +> There is no defined mechanism to pass network namespace information +> into /sbin/bridge-stp therefore don't even try to invoke it except +> for bridge devices in the initial network namespace. +> +> It is possible for unprivileged users to cause /sbin/bridge-stp to be +> invoked for any network device name which if /sbin/bridge-stp does not +> guard against unreasonable arguments or being invoked twice on the +> same network device could cause problems. + +[Hannes: changed patch using netns_eq] + +Cc: Eric W. Biederman +Signed-off-by: Eric W. Biederman +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_stp_if.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/bridge/br_stp_if.c ++++ b/net/bridge/br_stp_if.c +@@ -128,7 +128,10 @@ static void br_stp_start(struct net_brid + char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; + char *envp[] = { NULL }; + +- r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); ++ if (net_eq(dev_net(br->dev), &init_net)) ++ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); ++ else ++ r = -ENOENT; + + spin_lock_bh(&br->lock); + diff --git a/queue-4.1/connector-bump-skb-users-before-callback-invocation.patch b/queue-4.1/connector-bump-skb-users-before-callback-invocation.patch new file mode 100644 index 00000000000..cd2f5d6c5d7 --- /dev/null +++ b/queue-4.1/connector-bump-skb-users-before-callback-invocation.patch @@ -0,0 +1,54 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Florian Westphal +Date: Thu, 31 Dec 2015 14:26:33 +0100 +Subject: connector: bump skb->users before callback invocation + +From: Florian Westphal + +[ Upstream commit 55285bf09427c5abf43ee1d54e892f352092b1f1 ] + +Dmitry reports memleak with syskaller program. +Problem is that connector bumps skb usecount but might not invoke callback. + +So move skb_get to where we invoke the callback. + +Reported-by: Dmitry Vyukov +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/connector/connector.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +--- a/drivers/connector/connector.c ++++ b/drivers/connector/connector.c +@@ -178,26 +178,21 @@ static int cn_call_callback(struct sk_bu + * + * It checks skb, netlink header and msg sizes, and calls callback helper. + */ +-static void cn_rx_skb(struct sk_buff *__skb) ++static void cn_rx_skb(struct sk_buff *skb) + { + struct nlmsghdr *nlh; +- struct sk_buff *skb; + int len, err; + +- skb = skb_get(__skb); +- + if (skb->len >= NLMSG_HDRLEN) { + nlh = nlmsg_hdr(skb); + len = nlmsg_len(nlh); + + if (len < (int)sizeof(struct cn_msg) || + skb->len < nlh->nlmsg_len || +- len > CONNECTOR_MAX_MSG_SIZE) { +- kfree_skb(skb); ++ len > CONNECTOR_MAX_MSG_SIZE) + return; +- } + +- err = cn_call_callback(skb); ++ err = cn_call_callback(skb_get(skb)); + if (err < 0) + kfree_skb(skb); + } diff --git a/queue-4.1/ipv6-addrlabel-fix-ip6addrlbl_get.patch b/queue-4.1/ipv6-addrlabel-fix-ip6addrlbl_get.patch new file mode 100644 index 00000000000..9d8146cece3 --- /dev/null +++ b/queue-4.1/ipv6-addrlabel-fix-ip6addrlbl_get.patch @@ -0,0 +1,36 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Andrey Ryabinin +Date: Mon, 21 Dec 2015 12:54:45 +0300 +Subject: ipv6/addrlabel: fix ip6addrlbl_get() + +From: Andrey Ryabinin + +[ Upstream commit e459dfeeb64008b2d23bdf600f03b3605dbb8152 ] + +ip6addrlbl_get() has never worked. If ip6addrlbl_hold() succeeded, +ip6addrlbl_get() will exit with '-ESRCH'. If ip6addrlbl_hold() failed, +ip6addrlbl_get() will use about to be free ip6addrlbl_entry pointer. + +Fix this by inverting ip6addrlbl_hold() check. + +Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.") +Signed-off-by: Andrey Ryabinin +Reviewed-by: Cong Wang +Acked-by: YOSHIFUJI Hideaki +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrlabel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/addrlabel.c ++++ b/net/ipv6/addrlabel.c +@@ -552,7 +552,7 @@ static int ip6addrlbl_get(struct sk_buff + + rcu_read_lock(); + p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); +- if (p && ip6addrlbl_hold(p)) ++ if (p && !ip6addrlbl_hold(p)) + p = NULL; + lseq = ip6addrlbl_table.seq; + rcu_read_unlock(); diff --git a/queue-4.1/ipv6-tcp-add-rcu-locking-in-tcp_v6_send_synack.patch b/queue-4.1/ipv6-tcp-add-rcu-locking-in-tcp_v6_send_synack.patch new file mode 100644 index 00000000000..506d359fd46 --- /dev/null +++ b/queue-4.1/ipv6-tcp-add-rcu-locking-in-tcp_v6_send_synack.patch @@ -0,0 +1,35 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Eric Dumazet +Date: Fri, 8 Jan 2016 09:35:51 -0800 +Subject: ipv6: tcp: add rcu locking in tcp_v6_send_synack() + +From: Eric Dumazet + +[ Upstream commit 3e4006f0b86a5ae5eb0e8215f9a9e1db24506977 ] + +When first SYNACK is sent, we already hold rcu_read_lock(), but this +is not true if a SYNACK is retransmitted, as a timer (soft) interrupt +does not hold rcu_read_lock() + +Fixes: 45f6fad84cc30 ("ipv6: add complete rcu protection around np->opt") +Reported-by: Dave Jones +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/tcp_ipv6.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -465,8 +465,10 @@ static int tcp_v6_send_synack(struct soc + fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + + skb_set_queue_mapping(skb, queue_mapping); ++ rcu_read_lock(); + err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), + np->tclass); ++ rcu_read_unlock(); + err = net_xmit_eval(err); + } + diff --git a/queue-4.1/ipv6-update-skb-csum-when-ce-mark-is-propagated.patch b/queue-4.1/ipv6-update-skb-csum-when-ce-mark-is-propagated.patch new file mode 100644 index 00000000000..db10bf6a4fe --- /dev/null +++ b/queue-4.1/ipv6-update-skb-csum-when-ce-mark-is-propagated.patch @@ -0,0 +1,74 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Eric Dumazet +Date: Fri, 15 Jan 2016 04:56:56 -0800 +Subject: ipv6: update skb->csum when CE mark is propagated + +From: Eric Dumazet + +[ Upstream commit 34ae6a1aa0540f0f781dd265366036355fdc8930 ] + +When a tunnel decapsulates the outer header, it has to comply +with RFC 6080 and eventually propagate CE mark into inner header. + +It turns out IP6_ECN_set_ce() does not correctly update skb->csum +for CHECKSUM_COMPLETE packets, triggering infamous "hw csum failure" +messages and stack traces. + +Signed-off-by: Eric Dumazet +Acked-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_ecn.h | 19 ++++++++++++++++--- + net/ipv6/xfrm6_mode_tunnel.c | 2 +- + 2 files changed, 17 insertions(+), 4 deletions(-) + +--- a/include/net/inet_ecn.h ++++ b/include/net/inet_ecn.h +@@ -111,11 +111,24 @@ static inline void ipv4_copy_dscp(unsign + + struct ipv6hdr; + +-static inline int IP6_ECN_set_ce(struct ipv6hdr *iph) ++/* Note: ++ * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, ++ * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE ++ * In IPv6 case, no checksum compensates the change in IPv6 header, ++ * so we have to update skb->csum. ++ */ ++static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) + { ++ __be32 from, to; ++ + if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) + return 0; +- *(__be32*)iph |= htonl(INET_ECN_CE << 20); ++ ++ from = *(__be32 *)iph; ++ to = from | htonl(INET_ECN_CE << 20); ++ *(__be32 *)iph = to; ++ if (skb->ip_summed == CHECKSUM_COMPLETE) ++ skb->csum = csum_add(csum_sub(skb->csum, from), to); + return 1; + } + +@@ -142,7 +155,7 @@ static inline int INET_ECN_set_ce(struct + case cpu_to_be16(ETH_P_IPV6): + if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= + skb_tail_pointer(skb)) +- return IP6_ECN_set_ce(ipv6_hdr(skb)); ++ return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); + break; + } + +--- a/net/ipv6/xfrm6_mode_tunnel.c ++++ b/net/ipv6/xfrm6_mode_tunnel.c +@@ -24,7 +24,7 @@ static inline void ipip6_ecn_decapsulate + struct ipv6hdr *inner_iph = ipipv6_hdr(skb); + + if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) +- IP6_ECN_set_ce(inner_iph); ++ IP6_ECN_set_ce(skb, inner_iph); + } + + /* Add encapsulation header. diff --git a/queue-4.1/isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch b/queue-4.1/isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch new file mode 100644 index 00000000000..d01c3eff5e8 --- /dev/null +++ b/queue-4.1/isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Ben Hutchings +Date: Sun, 1 Nov 2015 16:21:24 +0000 +Subject: isdn_ppp: Add checks for allocation failure in isdn_ppp_open() + +From: Ben Hutchings + +[ Upstream commit 0baa57d8dc32db78369d8b5176ef56c5e2e18ab3 ] + +Compile-tested only. + +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/i4l/isdn_ppp.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/isdn/i4l/isdn_ppp.c ++++ b/drivers/isdn/i4l/isdn_ppp.c +@@ -301,6 +301,8 @@ isdn_ppp_open(int min, struct file *file + is->compflags = 0; + + is->reset = isdn_ppp_ccp_reset_alloc(is); ++ if (!is->reset) ++ return -ENOMEM; + + is->lp = NULL; + is->mp_seqno = 0; /* MP sequence number */ +@@ -320,6 +322,10 @@ isdn_ppp_open(int min, struct file *file + * VJ header compression init + */ + is->slcomp = slhc_init(16, 16); /* not necessary for 2. link in bundle */ ++ if (!is->slcomp) { ++ isdn_ppp_ccp_reset_free(is); ++ return -ENOMEM; ++ } + #endif + #ifdef CONFIG_IPPP_FILTER + is->pass_filter = NULL; diff --git a/queue-4.1/net-bpf-reject-invalid-shifts.patch b/queue-4.1/net-bpf-reject-invalid-shifts.patch new file mode 100644 index 00000000000..28167dbd539 --- /dev/null +++ b/queue-4.1/net-bpf-reject-invalid-shifts.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Rabin Vincent +Date: Tue, 12 Jan 2016 20:17:08 +0100 +Subject: net: bpf: reject invalid shifts + +From: Rabin Vincent + +[ Upstream commit 229394e8e62a4191d592842cf67e80c62a492937 ] + +On ARM64, a BUG() is triggered in the eBPF JIT if a filter with a +constant shift that can't be encoded in the immediate field of the +UBFM/SBFM instructions is passed to the JIT. Since these shifts +amounts, which are negative or >= regsize, are invalid, reject them in +the eBPF verifier and the classic BPF filter checker, for all +architectures. + +Signed-off-by: Rabin Vincent +Acked-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 10 ++++++++++ + net/core/filter.c | 5 +++++ + 2 files changed, 15 insertions(+) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1019,6 +1019,16 @@ static int check_alu_op(struct reg_state + return -EINVAL; + } + ++ if ((opcode == BPF_LSH || opcode == BPF_RSH || ++ opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { ++ int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; ++ ++ if (insn->imm < 0 || insn->imm >= size) { ++ verbose("invalid shift %d\n", insn->imm); ++ return -EINVAL; ++ } ++ } ++ + /* pattern match 'bpf_add Rx, imm' instruction */ + if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && + regs[insn->dst_reg].type == FRAME_PTR && +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -775,6 +775,11 @@ int bpf_check_classic(const struct sock_ + if (ftest->k == 0) + return -EINVAL; + break; ++ case BPF_ALU | BPF_LSH | BPF_K: ++ case BPF_ALU | BPF_RSH | BPF_K: ++ if (ftest->k >= 32) ++ return -EINVAL; ++ break; + case BPF_LD | BPF_MEM: + case BPF_LDX | BPF_MEM: + case BPF_ST: diff --git a/queue-4.1/net-cdc_ncm-avoid-changing-rx-tx-buffers-on-mtu-changes.patch b/queue-4.1/net-cdc_ncm-avoid-changing-rx-tx-buffers-on-mtu-changes.patch new file mode 100644 index 00000000000..cecc8c683e7 --- /dev/null +++ b/queue-4.1/net-cdc_ncm-avoid-changing-rx-tx-buffers-on-mtu-changes.patch @@ -0,0 +1,112 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= +Date: Wed, 23 Dec 2015 13:42:43 +0100 +Subject: net: cdc_ncm: avoid changing RX/TX buffers on MTU changes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= + +[ Upstream commit 1dfddff5fcd869fcab0c52fafae099dfa435a935 ] + +NCM buffer sizes are negotiated with the device independently of +the network device MTU. The RX buffers are allocated by the +usbnet framework based on the rx_urb_size value set by cdc_ncm. A +single RX buffer can hold a number of MTU sized packets. + +The default usbnet change_mtu ndo only modifies rx_urb_size if it +is equal to hard_mtu. And the cdc_ncm driver will set rx_urb_size +and hard_mtu independently of each other, based on dwNtbInMaxSize +and dwNtbOutMaxSize respectively. It was therefore assumed that +usbnet_change_mtu() would never touch rx_urb_size. This failed to +consider the case where dwNtbInMaxSize and dwNtbOutMaxSize happens +to be equal. + +Fix by implementing an NCM specific change_mtu ndo, modifying the +netdev MTU without touching the buffer size settings. + +Signed-off-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/cdc_mbim.c | 2 +- + drivers/net/usb/cdc_ncm.c | 31 +++++++++++++++++++++++++++++++ + include/linux/usb/cdc_ncm.h | 1 + + 3 files changed, 33 insertions(+), 1 deletion(-) + +--- a/drivers/net/usb/cdc_mbim.c ++++ b/drivers/net/usb/cdc_mbim.c +@@ -100,7 +100,7 @@ static const struct net_device_ops cdc_m + .ndo_stop = usbnet_stop, + .ndo_start_xmit = usbnet_start_xmit, + .ndo_tx_timeout = usbnet_tx_timeout, +- .ndo_change_mtu = usbnet_change_mtu, ++ .ndo_change_mtu = cdc_ncm_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_vlan_rx_add_vid = cdc_mbim_rx_add_vid, +--- a/drivers/net/usb/cdc_ncm.c ++++ b/drivers/net/usb/cdc_ncm.c +@@ -41,6 +41,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -687,6 +688,33 @@ static void cdc_ncm_free(struct cdc_ncm_ + kfree(ctx); + } + ++/* we need to override the usbnet change_mtu ndo for two reasons: ++ * - respect the negotiated maximum datagram size ++ * - avoid unwanted changes to rx and tx buffers ++ */ ++int cdc_ncm_change_mtu(struct net_device *net, int new_mtu) ++{ ++ struct usbnet *dev = netdev_priv(net); ++ struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; ++ int maxmtu = ctx->max_datagram_size - cdc_ncm_eth_hlen(dev); ++ ++ if (new_mtu <= 0 || new_mtu > maxmtu) ++ return -EINVAL; ++ net->mtu = new_mtu; ++ return 0; ++} ++EXPORT_SYMBOL_GPL(cdc_ncm_change_mtu); ++ ++static const struct net_device_ops cdc_ncm_netdev_ops = { ++ .ndo_open = usbnet_open, ++ .ndo_stop = usbnet_stop, ++ .ndo_start_xmit = usbnet_start_xmit, ++ .ndo_tx_timeout = usbnet_tx_timeout, ++ .ndo_change_mtu = cdc_ncm_change_mtu, ++ .ndo_set_mac_address = eth_mac_addr, ++ .ndo_validate_addr = eth_validate_addr, ++}; ++ + int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting) + { + const struct usb_cdc_union_desc *union_desc = NULL; +@@ -861,6 +889,9 @@ advance: + /* add our sysfs attrs */ + dev->net->sysfs_groups[0] = &cdc_ncm_sysfs_attr_group; + ++ /* must handle MTU changes */ ++ dev->net->netdev_ops = &cdc_ncm_netdev_ops; ++ + return 0; + + error2: +--- a/include/linux/usb/cdc_ncm.h ++++ b/include/linux/usb/cdc_ncm.h +@@ -133,6 +133,7 @@ struct cdc_ncm_ctx { + }; + + u8 cdc_ncm_select_altsetting(struct usb_interface *intf); ++int cdc_ncm_change_mtu(struct net_device *net, int new_mtu); + int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); + void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); + struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); diff --git a/queue-4.1/net-filter-make-jits-zero-a-for-skf_ad_alu_xor_x.patch b/queue-4.1/net-filter-make-jits-zero-a-for-skf_ad_alu_xor_x.patch new file mode 100644 index 00000000000..4364533d46c --- /dev/null +++ b/queue-4.1/net-filter-make-jits-zero-a-for-skf_ad_alu_xor_x.patch @@ -0,0 +1,189 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Rabin Vincent +Date: Tue, 5 Jan 2016 16:23:07 +0100 +Subject: net: filter: make JITs zero A for SKF_AD_ALU_XOR_X + +From: Rabin Vincent + +[ Upstream commit 55795ef5469290f89f04e12e662ded604909e462 ] + +The SKF_AD_ALU_XOR_X ancillary is not like the other ancillary data +instructions since it XORs A with X while all the others replace A with +some loaded value. All the BPF JITs fail to clear A if this is used as +the first instruction in a filter. This was found using american fuzzy +lop. + +Add a helper to determine if A needs to be cleared given the first +instruction in a filter, and use this in the JITs. Except for ARM, the +rest have only been compile-tested. + +Fixes: 3480593131e0 ("net: filter: get rid of BPF_S_* enum") +Signed-off-by: Rabin Vincent +Acked-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/net/bpf_jit_32.c | 16 +--------------- + arch/mips/net/bpf_jit.c | 16 +--------------- + arch/powerpc/net/bpf_jit_comp.c | 13 ++----------- + arch/sparc/net/bpf_jit_comp.c | 17 ++--------------- + include/linux/filter.h | 19 +++++++++++++++++++ + 5 files changed, 25 insertions(+), 56 deletions(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -162,19 +162,6 @@ static inline int mem_words_used(struct + return fls(ctx->seen & SEEN_MEM); + } + +-static inline bool is_load_to_a(u16 inst) +-{ +- switch (inst) { +- case BPF_LD | BPF_W | BPF_LEN: +- case BPF_LD | BPF_W | BPF_ABS: +- case BPF_LD | BPF_H | BPF_ABS: +- case BPF_LD | BPF_B | BPF_ABS: +- return true; +- default: +- return false; +- } +-} +- + static void jit_fill_hole(void *area, unsigned int size) + { + u32 *ptr; +@@ -186,7 +173,6 @@ static void jit_fill_hole(void *area, un + static void build_prologue(struct jit_ctx *ctx) + { + u16 reg_set = saved_regs(ctx); +- u16 first_inst = ctx->skf->insns[0].code; + u16 off; + + #ifdef CONFIG_FRAME_POINTER +@@ -216,7 +202,7 @@ static void build_prologue(struct jit_ct + emit(ARM_MOV_I(r_X, 0), ctx); + + /* do not leak kernel data to userspace */ +- if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) ++ if (bpf_needs_clear_a(&ctx->skf->insns[0])) + emit(ARM_MOV_I(r_A, 0), ctx); + + /* stack space for the BPF_MEM words */ +--- a/arch/mips/net/bpf_jit.c ++++ b/arch/mips/net/bpf_jit.c +@@ -556,19 +556,6 @@ static inline u16 align_sp(unsigned int + return num; + } + +-static bool is_load_to_a(u16 inst) +-{ +- switch (inst) { +- case BPF_LD | BPF_W | BPF_LEN: +- case BPF_LD | BPF_W | BPF_ABS: +- case BPF_LD | BPF_H | BPF_ABS: +- case BPF_LD | BPF_B | BPF_ABS: +- return true; +- default: +- return false; +- } +-} +- + static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) + { + int i = 0, real_off = 0; +@@ -686,7 +673,6 @@ static unsigned int get_stack_depth(stru + + static void build_prologue(struct jit_ctx *ctx) + { +- u16 first_inst = ctx->skf->insns[0].code; + int sp_off; + + /* Calculate the total offset for the stack pointer */ +@@ -700,7 +686,7 @@ static void build_prologue(struct jit_ct + emit_jit_reg_move(r_X, r_zero, ctx); + + /* Do not leak kernel data to userspace */ +- if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) ++ if (bpf_needs_clear_a(&ctx->skf->insns[0])) + emit_jit_reg_move(r_A, r_zero, ctx); + } + +--- a/arch/powerpc/net/bpf_jit_comp.c ++++ b/arch/powerpc/net/bpf_jit_comp.c +@@ -78,18 +78,9 @@ static void bpf_jit_build_prologue(struc + PPC_LI(r_X, 0); + } + +- switch (filter[0].code) { +- case BPF_RET | BPF_K: +- case BPF_LD | BPF_W | BPF_LEN: +- case BPF_LD | BPF_W | BPF_ABS: +- case BPF_LD | BPF_H | BPF_ABS: +- case BPF_LD | BPF_B | BPF_ABS: +- /* first instruction sets A register (or is RET 'constant') */ +- break; +- default: +- /* make sure we dont leak kernel information to user */ ++ /* make sure we dont leak kernel information to user */ ++ if (bpf_needs_clear_a(&filter[0])) + PPC_LI(r_A, 0); +- } + } + + static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +--- a/arch/sparc/net/bpf_jit_comp.c ++++ b/arch/sparc/net/bpf_jit_comp.c +@@ -420,22 +420,9 @@ void bpf_jit_compile(struct bpf_prog *fp + } + emit_reg_move(O7, r_saved_O7); + +- switch (filter[0].code) { +- case BPF_RET | BPF_K: +- case BPF_LD | BPF_W | BPF_LEN: +- case BPF_LD | BPF_W | BPF_ABS: +- case BPF_LD | BPF_H | BPF_ABS: +- case BPF_LD | BPF_B | BPF_ABS: +- /* The first instruction sets the A register (or is +- * a "RET 'constant'") +- */ +- break; +- default: +- /* Make sure we dont leak kernel information to the +- * user. +- */ ++ /* Make sure we dont leak kernel information to the user. */ ++ if (bpf_needs_clear_a(&filter[0])) + emit_clear(r_A); /* A = 0 */ +- } + + for (i = 0; i < flen; i++) { + unsigned int K = filter[i].k; +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -428,6 +428,25 @@ static inline void bpf_jit_free(struct b + + #define BPF_ANC BIT(15) + ++static inline bool bpf_needs_clear_a(const struct sock_filter *first) ++{ ++ switch (first->code) { ++ case BPF_RET | BPF_K: ++ case BPF_LD | BPF_W | BPF_LEN: ++ return false; ++ ++ case BPF_LD | BPF_W | BPF_ABS: ++ case BPF_LD | BPF_H | BPF_ABS: ++ case BPF_LD | BPF_B | BPF_ABS: ++ if (first->k == SKF_AD_OFF + SKF_AD_ALU_XOR_X) ++ return true; ++ return false; ++ ++ default: ++ return true; ++ } ++} ++ + static inline u16 bpf_anc_helper(const struct sock_filter *ftest) + { + BUG_ON(ftest->code & BPF_ANC); diff --git a/queue-4.1/net-possible-use-after-free-in-dst_release.patch b/queue-4.1/net-possible-use-after-free-in-dst_release.patch new file mode 100644 index 00000000000..e7573b062de --- /dev/null +++ b/queue-4.1/net-possible-use-after-free-in-dst_release.patch @@ -0,0 +1,39 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Francesco Ruggeri +Date: Wed, 6 Jan 2016 00:18:48 -0800 +Subject: net: possible use after free in dst_release + +From: Francesco Ruggeri + +[ Upstream commit 07a5d38453599052aff0877b16bb9c1585f08609 ] + +dst_release should not access dst->flags after decrementing +__refcnt to 0. The dst_entry may be in dst_busy_list and +dst_gc_task may dst_destroy it before dst_release gets a chance +to access dst->flags. + +Fixes: d69bbf88c8d0 ("net: fix a race in dst_release()") +Fixes: 27b75c95f10d ("net: avoid RCU for NOCACHE dst") +Signed-off-by: Francesco Ruggeri +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dst.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/dst.c ++++ b/net/core/dst.c +@@ -282,10 +282,11 @@ void dst_release(struct dst_entry *dst) + { + if (dst) { + int newrefcnt; ++ unsigned short nocache = dst->flags & DST_NOCACHE; + + newrefcnt = atomic_dec_return(&dst->__refcnt); + WARN_ON(newrefcnt < 0); +- if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE)) ++ if (!newrefcnt && unlikely(nocache)) + call_rcu(&dst->rcu_head, dst_destroy_rcu); + } + } diff --git a/queue-4.1/net-preserve-ip-control-block-during-gso-segmentation.patch b/queue-4.1/net-preserve-ip-control-block-during-gso-segmentation.patch new file mode 100644 index 00000000000..a929c056a08 --- /dev/null +++ b/queue-4.1/net-preserve-ip-control-block-during-gso-segmentation.patch @@ -0,0 +1,107 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Konstantin Khlebnikov +Date: Fri, 8 Jan 2016 15:21:46 +0300 +Subject: net: preserve IP control block during GSO segmentation + +From: Konstantin Khlebnikov + +[ Upstream commit 9207f9d45b0ad071baa128e846d7e7ed85016df3 ] + +Skb_gso_segment() uses skb control block during segmentation. +This patch adds 32-bytes room for previous control block which +will be copied into all resulting segments. + +This patch fixes kernel crash during fragmenting forwarded packets. +Fragmentation requires valid IP CB in skb for clearing ip options. +Also patch removes custom save/restore in ovs code, now it's redundant. + +Signed-off-by: Konstantin Khlebnikov +Link: http://lkml.kernel.org/r/CALYGNiP-0MZ-FExV2HutTvE9U-QQtkKSoE--KN=JQE5STYsjAA@mail.gmail.com +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 3 ++- + net/core/dev.c | 5 +++++ + net/ipv4/ip_output.c | 1 + + net/openvswitch/datapath.c | 5 +---- + net/xfrm/xfrm_output.c | 2 ++ + 5 files changed, 11 insertions(+), 5 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -3320,7 +3320,8 @@ struct skb_gso_cb { + int encap_level; + __u16 csum_start; + }; +-#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) ++#define SKB_SGO_CB_OFFSET 32 ++#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_SGO_CB_OFFSET)) + + static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) + { +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2479,6 +2479,8 @@ static inline bool skb_needs_check(struc + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. ++ * ++ * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb. + */ + struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) +@@ -2493,6 +2495,9 @@ struct sk_buff *__skb_gso_segment(struct + return ERR_PTR(err); + } + ++ BUILD_BUG_ON(SKB_SGO_CB_OFFSET + ++ sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); ++ + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); + SKB_GSO_CB(skb)->encap_level = 0; + +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -235,6 +235,7 @@ static int ip_finish_output_gso(struct s + * from host network stack. + */ + features = netif_skb_features(skb); ++ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR_OR_NULL(segs)) { + kfree_skb(skb); +--- a/net/openvswitch/datapath.c ++++ b/net/openvswitch/datapath.c +@@ -337,12 +337,10 @@ static int queue_gso_packets(struct data + unsigned short gso_type = skb_shinfo(skb)->gso_type; + struct sw_flow_key later_key; + struct sk_buff *segs, *nskb; +- struct ovs_skb_cb ovs_cb; + int err; + +- ovs_cb = *OVS_CB(skb); ++ BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET); + segs = __skb_gso_segment(skb, NETIF_F_SG, false); +- *OVS_CB(skb) = ovs_cb; + if (IS_ERR(segs)) + return PTR_ERR(segs); + if (segs == NULL) +@@ -360,7 +358,6 @@ static int queue_gso_packets(struct data + /* Queue all of the segments. */ + skb = segs; + do { +- *OVS_CB(skb) = ovs_cb; + if (gso_type & SKB_GSO_UDP && skb != segs) + key = &later_key; + +--- a/net/xfrm/xfrm_output.c ++++ b/net/xfrm/xfrm_output.c +@@ -153,6 +153,8 @@ static int xfrm_output_gso(struct sock * + { + struct sk_buff *segs; + ++ BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); ++ BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET); + segs = skb_gso_segment(skb, 0); + kfree_skb(skb); + if (IS_ERR(segs)) diff --git a/queue-4.1/net-sched-fix-missing-free-per-cpu-on-qstats.patch b/queue-4.1/net-sched-fix-missing-free-per-cpu-on-qstats.patch new file mode 100644 index 00000000000..2e868c3b3de --- /dev/null +++ b/queue-4.1/net-sched-fix-missing-free-per-cpu-on-qstats.patch @@ -0,0 +1,36 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: John Fastabend +Date: Tue, 5 Jan 2016 09:11:36 -0800 +Subject: net: sched: fix missing free per cpu on qstats + +From: John Fastabend + +[ Upstream commit 73c20a8b7245273125cfe92c4b46e6fdb568a801 ] + +When a qdisc is using per cpu stats (currently just the ingress +qdisc) only the bstats are being freed. This also free's the qstats. + +Fixes: b0ab6f92752b9f9d8 ("net: sched: enable per cpu qstats") +Signed-off-by: John Fastabend +Acked-by: Eric Dumazet +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_generic.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -666,8 +666,10 @@ static void qdisc_rcu_free(struct rcu_he + { + struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); + +- if (qdisc_is_percpu_stats(qdisc)) ++ if (qdisc_is_percpu_stats(qdisc)) { + free_percpu(qdisc->cpu_bstats); ++ free_percpu(qdisc->cpu_qstats); ++ } + + kfree((char *) qdisc - qdisc->padded); + } diff --git a/queue-4.1/net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch b/queue-4.1/net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch new file mode 100644 index 00000000000..16c7065f2a9 --- /dev/null +++ b/queue-4.1/net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch @@ -0,0 +1,34 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Sasha Levin +Date: Thu, 7 Jan 2016 14:52:43 -0500 +Subject: net: sctp: prevent writes to cookie_hmac_alg from accessing invalid memory + +From: Sasha Levin + +[ Upstream commit 320f1a4a175e7cd5d3f006f92b4d4d3e2cbb7bb5 ] + +proc_dostring() needs an initialized destination string, while the one +provided in proc_sctp_do_hmac_alg() contains stack garbage. + +Thus, writing to cookie_hmac_alg would strlen() that garbage and end up +accessing invalid memory. + +Fixes: 3c68198e7 ("sctp: Make hmac algorithm selection for cookie generation dynamic") +Signed-off-by: Sasha Levin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sysctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/sysctl.c ++++ b/net/sctp/sysctl.c +@@ -320,7 +320,7 @@ static int proc_sctp_do_hmac_alg(struct + struct ctl_table tbl; + bool changed = false; + char *none = "none"; +- char tmp[8]; ++ char tmp[8] = {0}; + int ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); diff --git a/queue-4.1/phonet-properly-unshare-skbs-in-phonet_rcv.patch b/queue-4.1/phonet-properly-unshare-skbs-in-phonet_rcv.patch new file mode 100644 index 00000000000..7415010f467 --- /dev/null +++ b/queue-4.1/phonet-properly-unshare-skbs-in-phonet_rcv.patch @@ -0,0 +1,45 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Eric Dumazet +Date: Tue, 12 Jan 2016 08:58:00 -0800 +Subject: phonet: properly unshare skbs in phonet_rcv() + +From: Eric Dumazet + +[ Upstream commit 7aaed57c5c2890634cfadf725173c7c68ea4cb4f ] + +Ivaylo Dimitrov reported a regression caused by commit 7866a621043f +("dev: add per net_device packet type chains"). + +skb->dev becomes NULL and we crash in __netif_receive_skb_core(). + +Before above commit, different kind of bugs or corruptions could happen +without major crash. + +But the root cause is that phonet_rcv() can queue skb without checking +if skb is shared or not. + +Many thanks to Ivaylo Dimitrov for his help, diagnosis and tests. + +Reported-by: Ivaylo Dimitrov +Tested-by: Ivaylo Dimitrov +Signed-off-by: Eric Dumazet +Cc: Remi Denis-Courmont +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/phonet/af_phonet.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/phonet/af_phonet.c ++++ b/net/phonet/af_phonet.c +@@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *sk + struct sockaddr_pn sa; + u16 len; + ++ skb = skb_share_check(skb, GFP_ATOMIC); ++ if (!skb) ++ return NET_RX_DROP; ++ + /* check we have at least a full Phonet header */ + if (!pskb_pull(skb, sizeof(struct phonethdr))) + goto out; diff --git a/queue-4.1/ppp-slip-validate-vj-compression-slot-parameters-completely.patch b/queue-4.1/ppp-slip-validate-vj-compression-slot-parameters-completely.patch new file mode 100644 index 00000000000..f18a8b9d128 --- /dev/null +++ b/queue-4.1/ppp-slip-validate-vj-compression-slot-parameters-completely.patch @@ -0,0 +1,133 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Ben Hutchings +Date: Sun, 1 Nov 2015 16:22:53 +0000 +Subject: ppp, slip: Validate VJ compression slot parameters completely +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ben Hutchings + +[ Upstream commit 4ab42d78e37a294ac7bc56901d563c642e03c4ae ] + +Currently slhc_init() treats out-of-range values of rslots and tslots +as equivalent to 0, except that if tslots is too large it will +dereference a null pointer (CVE-2015-7799). + +Add a range-check at the top of the function and make it return an +ERR_PTR() on error instead of NULL. Change the callers accordingly. + +Compile-tested only. + +Reported-by: 郭永刚 +References: http://article.gmane.org/gmane.comp.security.oss.general/17908 +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/i4l/isdn_ppp.c | 10 ++++------ + drivers/net/ppp/ppp_generic.c | 6 ++---- + drivers/net/slip/slhc.c | 12 ++++++++---- + drivers/net/slip/slip.c | 2 +- + 4 files changed, 15 insertions(+), 15 deletions(-) + +--- a/drivers/isdn/i4l/isdn_ppp.c ++++ b/drivers/isdn/i4l/isdn_ppp.c +@@ -322,9 +322,9 @@ isdn_ppp_open(int min, struct file *file + * VJ header compression init + */ + is->slcomp = slhc_init(16, 16); /* not necessary for 2. link in bundle */ +- if (!is->slcomp) { ++ if (IS_ERR(is->slcomp)) { + isdn_ppp_ccp_reset_free(is); +- return -ENOMEM; ++ return PTR_ERR(is->slcomp); + } + #endif + #ifdef CONFIG_IPPP_FILTER +@@ -573,10 +573,8 @@ isdn_ppp_ioctl(int min, struct file *fil + is->maxcid = val; + #ifdef CONFIG_ISDN_PPP_VJ + sltmp = slhc_init(16, val); +- if (!sltmp) { +- printk(KERN_ERR "ippp, can't realloc slhc struct\n"); +- return -ENOMEM; +- } ++ if (IS_ERR(sltmp)) ++ return PTR_ERR(sltmp); + if (is->slcomp) + slhc_free(is->slcomp); + is->slcomp = sltmp; +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -715,10 +715,8 @@ static long ppp_ioctl(struct file *file, + val &= 0xffff; + } + vj = slhc_init(val2+1, val+1); +- if (!vj) { +- netdev_err(ppp->dev, +- "PPP: no memory (VJ compressor)\n"); +- err = -ENOMEM; ++ if (IS_ERR(vj)) { ++ err = PTR_ERR(vj); + break; + } + ppp_lock(ppp); +--- a/drivers/net/slip/slhc.c ++++ b/drivers/net/slip/slhc.c +@@ -84,8 +84,9 @@ static long decode(unsigned char **cpp); + static unsigned char * put16(unsigned char *cp, unsigned short x); + static unsigned short pull16(unsigned char **cpp); + +-/* Initialize compression data structure ++/* Allocate compression data structure + * slots must be in range 0 to 255 (zero meaning no compression) ++ * Returns pointer to structure or ERR_PTR() on error. + */ + struct slcompress * + slhc_init(int rslots, int tslots) +@@ -94,11 +95,14 @@ slhc_init(int rslots, int tslots) + register struct cstate *ts; + struct slcompress *comp; + ++ if (rslots < 0 || rslots > 255 || tslots < 0 || tslots > 255) ++ return ERR_PTR(-EINVAL); ++ + comp = kzalloc(sizeof(struct slcompress), GFP_KERNEL); + if (! comp) + goto out_fail; + +- if ( rslots > 0 && rslots < 256 ) { ++ if (rslots > 0) { + size_t rsize = rslots * sizeof(struct cstate); + comp->rstate = kzalloc(rsize, GFP_KERNEL); + if (! comp->rstate) +@@ -106,7 +110,7 @@ slhc_init(int rslots, int tslots) + comp->rslot_limit = rslots - 1; + } + +- if ( tslots > 0 && tslots < 256 ) { ++ if (tslots > 0) { + size_t tsize = tslots * sizeof(struct cstate); + comp->tstate = kzalloc(tsize, GFP_KERNEL); + if (! comp->tstate) +@@ -141,7 +145,7 @@ out_free2: + out_free: + kfree(comp); + out_fail: +- return NULL; ++ return ERR_PTR(-ENOMEM); + } + + +--- a/drivers/net/slip/slip.c ++++ b/drivers/net/slip/slip.c +@@ -164,7 +164,7 @@ static int sl_alloc_bufs(struct slip *sl + if (cbuff == NULL) + goto err_exit; + slcomp = slhc_init(16, 16); +- if (slcomp == NULL) ++ if (IS_ERR(slcomp)) + goto err_exit; + #endif + spin_lock_bh(&sl->lock); diff --git a/queue-4.1/sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch b/queue-4.1/sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch new file mode 100644 index 00000000000..2dfa5a06c75 --- /dev/null +++ b/queue-4.1/sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch @@ -0,0 +1,191 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Karl Heiss +Date: Thu, 24 Sep 2015 12:15:07 -0400 +Subject: sctp: Prevent soft lockup when sctp_accept() is called during a timeout event + +From: Karl Heiss + +[ Upstream commit 635682a14427d241bab7bbdeebb48a7d7b91638e ] + +A case can occur when sctp_accept() is called by the user during +a heartbeat timeout event after the 4-way handshake. Since +sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the +bh_sock_lock in sctp_generate_heartbeat_event() will be taken with +the listening socket but released with the new association socket. +The result is a deadlock on any future attempts to take the listening +socket lock. + +Note that this race can occur with other SCTP timeouts that take +the bh_lock_sock() in the event sctp_accept() is called. + + BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0] + ... + RIP: 0010:[] [] _spin_lock+0x1e/0x30 + RSP: 0018:ffff880028323b20 EFLAGS: 00000206 + RAX: 0000000000000002 RBX: ffff880028323b20 RCX: 0000000000000000 + RDX: 0000000000000000 RSI: ffff880028323be0 RDI: ffff8804632c4b48 + RBP: ffffffff8100bb93 R08: 0000000000000000 R09: 0000000000000000 + R10: ffff880610662280 R11: 0000000000000100 R12: ffff880028323aa0 + R13: ffff8804383c3880 R14: ffff880028323a90 R15: ffffffff81534225 + FS: 0000000000000000(0000) GS:ffff880028320000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b + CR2: 00000000006df528 CR3: 0000000001a85000 CR4: 00000000000006e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 + Process swapper (pid: 0, threadinfo ffff880616b70000, task ffff880616b6cab0) + Stack: + ffff880028323c40 ffffffffa01c2582 ffff880614cfb020 0000000000000000 + 0100000000000000 00000014383a6c44 ffff8804383c3880 ffff880614e93c00 + ffff880614e93c00 0000000000000000 ffff8804632c4b00 ffff8804383c38b8 + Call Trace: + + [] ? sctp_rcv+0x492/0xa10 [sctp] + [] ? nf_iterate+0x69/0xb0 + [] ? ip_local_deliver_finish+0x0/0x2d0 + [] ? nf_hook_slow+0x76/0x120 + [] ? ip_local_deliver_finish+0x0/0x2d0 + [] ? ip_local_deliver_finish+0xdd/0x2d0 + [] ? ip_local_deliver+0x98/0xa0 + [] ? ip_rcv_finish+0x12d/0x440 + [] ? ip_rcv+0x275/0x350 + [] ? __netif_receive_skb+0x4ab/0x750 + ... + +With lockdep debugging: + + ===================================== + [ BUG: bad unlock balance detected! ] + ------------------------------------- + CslRx/12087 is trying to release lock (slock-AF_INET) at: + [] sctp_generate_timeout_event+0x40/0xe0 [sctp] + but there are no more locks to release! + + other info that might help us debug this: + 2 locks held by CslRx/12087: + #0: (&asoc->timers[i]){+.-...}, at: [] run_timer_softirq+0x16f/0x3e0 + #1: (slock-AF_INET){+.-...}, at: [] sctp_generate_timeout_event+0x23/0xe0 [sctp] + +Ensure the socket taken is also the same one that is released by +saving a copy of the socket before entering the timeout event +critical section. + +Signed-off-by: Karl Heiss +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sm_sideeffect.c | 42 +++++++++++++++++++++++------------------- + 1 file changed, 23 insertions(+), 19 deletions(-) + +--- a/net/sctp/sm_sideeffect.c ++++ b/net/sctp/sm_sideeffect.c +@@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned + int error; + struct sctp_transport *transport = (struct sctp_transport *) peer; + struct sctp_association *asoc = transport->asoc; +- struct net *net = sock_net(asoc->base.sk); ++ struct sock *sk = asoc->base.sk; ++ struct net *net = sock_net(sk); + + /* Check whether a task is in the sock. */ + +- bh_lock_sock(asoc->base.sk); +- if (sock_owned_by_user(asoc->base.sk)) { ++ bh_lock_sock(sk); ++ if (sock_owned_by_user(sk)) { + pr_debug("%s: sock is busy\n", __func__); + + /* Try again later. */ +@@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned + transport, GFP_ATOMIC); + + if (error) +- asoc->base.sk->sk_err = -error; ++ sk->sk_err = -error; + + out_unlock: +- bh_unlock_sock(asoc->base.sk); ++ bh_unlock_sock(sk); + sctp_transport_put(transport); + } + +@@ -285,11 +286,12 @@ out_unlock: + static void sctp_generate_timeout_event(struct sctp_association *asoc, + sctp_event_timeout_t timeout_type) + { +- struct net *net = sock_net(asoc->base.sk); ++ struct sock *sk = asoc->base.sk; ++ struct net *net = sock_net(sk); + int error = 0; + +- bh_lock_sock(asoc->base.sk); +- if (sock_owned_by_user(asoc->base.sk)) { ++ bh_lock_sock(sk); ++ if (sock_owned_by_user(sk)) { + pr_debug("%s: sock is busy: timer %d\n", __func__, + timeout_type); + +@@ -312,10 +314,10 @@ static void sctp_generate_timeout_event( + (void *)timeout_type, GFP_ATOMIC); + + if (error) +- asoc->base.sk->sk_err = -error; ++ sk->sk_err = -error; + + out_unlock: +- bh_unlock_sock(asoc->base.sk); ++ bh_unlock_sock(sk); + sctp_association_put(asoc); + } + +@@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsig + int error = 0; + struct sctp_transport *transport = (struct sctp_transport *) data; + struct sctp_association *asoc = transport->asoc; +- struct net *net = sock_net(asoc->base.sk); ++ struct sock *sk = asoc->base.sk; ++ struct net *net = sock_net(sk); + +- bh_lock_sock(asoc->base.sk); +- if (sock_owned_by_user(asoc->base.sk)) { ++ bh_lock_sock(sk); ++ if (sock_owned_by_user(sk)) { + pr_debug("%s: sock is busy\n", __func__); + + /* Try again later. */ +@@ -389,10 +392,10 @@ void sctp_generate_heartbeat_event(unsig + transport, GFP_ATOMIC); + + if (error) +- asoc->base.sk->sk_err = -error; ++ sk->sk_err = -error; + + out_unlock: +- bh_unlock_sock(asoc->base.sk); ++ bh_unlock_sock(sk); + sctp_transport_put(transport); + } + +@@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(u + { + struct sctp_transport *transport = (struct sctp_transport *) data; + struct sctp_association *asoc = transport->asoc; +- struct net *net = sock_net(asoc->base.sk); ++ struct sock *sk = asoc->base.sk; ++ struct net *net = sock_net(sk); + +- bh_lock_sock(asoc->base.sk); +- if (sock_owned_by_user(asoc->base.sk)) { ++ bh_lock_sock(sk); ++ if (sock_owned_by_user(sk)) { + pr_debug("%s: sock is busy\n", __func__); + + /* Try again later. */ +@@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(u + asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); + + out_unlock: +- bh_unlock_sock(asoc->base.sk); ++ bh_unlock_sock(sk); + sctp_association_put(asoc); + } + diff --git a/queue-4.1/sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch b/queue-4.1/sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch new file mode 100644 index 00000000000..7811c367ef8 --- /dev/null +++ b/queue-4.1/sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch @@ -0,0 +1,69 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Xin Long +Date: Tue, 29 Dec 2015 17:49:25 +0800 +Subject: sctp: sctp should release assoc when sctp_make_abort_user return NULL in sctp_close + +From: Xin Long + +[ Upstream commit 068d8bd338e855286aea54e70d1c101569284b21 ] + +In sctp_close, sctp_make_abort_user may return NULL because of memory +allocation failure. If this happens, it will bypass any state change +and never free the assoc. The assoc has no chance to be freed and it +will be kept in memory with the state it had even after the socket is +closed by sctp_close(). + +So if sctp_make_abort_user fails to allocate memory, we should abort +the asoc via sctp_primitive_ABORT as well. Just like the annotation in +sctp_sf_cookie_wait_prm_abort and sctp_sf_do_9_1_prm_abort said, +"Even if we can't send the ABORT due to low memory delete the TCB. +This is a departure from our typical NOMEM handling". + +But then the chunk is NULL (low memory) and the SCTP_CMD_REPLY cmd would +dereference the chunk pointer, and system crash. So we should add +SCTP_CMD_REPLY cmd only when the chunk is not NULL, just like other +places where it adds SCTP_CMD_REPLY cmd. + +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sm_statefuns.c | 6 ++++-- + net/sctp/socket.c | 3 +-- + 2 files changed, 5 insertions(+), 4 deletions(-) + +--- a/net/sctp/sm_statefuns.c ++++ b/net/sctp/sm_statefuns.c +@@ -4829,7 +4829,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_ab + + retval = SCTP_DISPOSITION_CONSUME; + +- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); ++ if (abort) ++ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + + /* Even if we can't send the ABORT due to low memory delete the + * TCB. This is a departure from our typical NOMEM handling. +@@ -4966,7 +4967,8 @@ sctp_disposition_t sctp_sf_cookie_wait_p + SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); + retval = SCTP_DISPOSITION_CONSUME; + +- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); ++ if (abort) ++ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, + SCTP_STATE(SCTP_STATE_CLOSED)); +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -1513,8 +1513,7 @@ static void sctp_close(struct sock *sk, + struct sctp_chunk *chunk; + + chunk = sctp_make_abort_user(asoc, NULL, 0); +- if (chunk) +- sctp_primitive_ABORT(net, asoc, chunk); ++ sctp_primitive_ABORT(net, asoc, chunk); + } else + sctp_primitive_SHUTDOWN(net, asoc, NULL); + } diff --git a/queue-4.1/series b/queue-4.1/series index c872ee79a4d..2c6f1d8afb4 100644 --- a/queue-4.1/series +++ b/queue-4.1/series @@ -59,3 +59,42 @@ usb-fix-invalid-memory-access-in-hub_activate.patch usb-ipaq.c-fix-a-timeout-loop.patch usb-cp210x-add-id-for-elv-marble-sound-board-1.patch xhci-refuse-loading-if-nousb-is-used.patch +utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch +ipv6-addrlabel-fix-ip6addrlbl_get.patch +addrconf-always-initialize-sysctl-table-data.patch +net-cdc_ncm-avoid-changing-rx-tx-buffers-on-mtu-changes.patch +sctp-sctp-should-release-assoc-when-sctp_make_abort_user-return-null-in-sctp_close.patch +connector-bump-skb-users-before-callback-invocation.patch +unix-properly-account-for-fds-passed-over-unix-sockets.patch +bridge-only-call-sbin-bridge-stp-for-the-initial-network-namespace.patch +net-filter-make-jits-zero-a-for-skf_ad_alu_xor_x.patch +net-sched-fix-missing-free-per-cpu-on-qstats.patch +net-possible-use-after-free-in-dst_release.patch +vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch +net-sctp-prevent-writes-to-cookie_hmac_alg-from-accessing-invalid-memory.patch +ipv6-tcp-add-rcu-locking-in-tcp_v6_send_synack.patch +tcp_yeah-don-t-set-ssthresh-below-2.patch +udp-disallow-ufo-for-sockets-with-so_no_check-option.patch +net-preserve-ip-control-block-during-gso-segmentation.patch +bonding-prevent-ipv6-link-local-address-on-enslaved-devices.patch +phonet-properly-unshare-skbs-in-phonet_rcv.patch +net-bpf-reject-invalid-shifts.patch +ipv6-update-skb-csum-when-ce-mark-is-propagated.patch +bridge-fix-lockdep-addr_list_lock-false-positive-splat.patch +tcp-dccp-fix-timewait-races-in-timer-handling.patch +tcp-dccp-fix-old-style-declarations.patch +isdn_ppp-add-checks-for-allocation-failure-in-isdn_ppp_open.patch +ppp-slip-validate-vj-compression-slot-parameters-completely.patch +batman-adv-avoid-recursive-call_rcu-for-batadv_bla_claim.patch +batman-adv-avoid-recursive-call_rcu-for-batadv_nc_node.patch +batman-adv-drop-immediate-batadv_orig_ifinfo-free-function.patch +batman-adv-drop-immediate-batadv_neigh_node-free-function.patch +batman-adv-drop-immediate-neigh_ifinfo-free-function.patch +batman-adv-drop-immediate-batadv_hard_iface-free-function.patch +batman-adv-drop-immediate-orig_node-free-function.patch +team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch +sctp-prevent-soft-lockup-when-sctp_accept-is-called-during-a-timeout-event.patch +xen-netback-respect-user-provided-max_queues.patch +xen-netfront-respect-user-provided-max_queues.patch +xen-netfront-update-num_queues-to-real-created.patch +xfrm-dst_entries_init-per-net-dst_ops.patch diff --git a/queue-4.1/tcp-dccp-fix-old-style-declarations.patch b/queue-4.1/tcp-dccp-fix-old-style-declarations.patch new file mode 100644 index 00000000000..4675244498c --- /dev/null +++ b/queue-4.1/tcp-dccp-fix-old-style-declarations.patch @@ -0,0 +1,54 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Raanan Avargil +Date: Thu, 1 Oct 2015 04:48:53 -0700 +Subject: tcp/dccp: fix old style declarations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Raanan Avargil + +[ Upstream commit 8695a144da9e500a5a60fa34c06694346ec1048f ] + +I’m using the compilation flag -Werror=old-style-declaration, which +requires that the “inline” word would come at the beginning of the code +line. + +$ make drivers/net/ethernet/intel/e1000e/e1000e.ko +... +include/net/inet_timewait_sock.h:116:1: error: ‘inline’ is not at +beginning of declaration [-Werror=old-style-declaration] +static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int +timeo) + +include/net/inet_timewait_sock.h:121:1: error: ‘inline’ is not at +beginning of declaration [-Werror=old-style-declaration] +static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, +int timeo) + +Fixes: ed2e92394589 ("tcp/dccp: fix timewait races in timer handling") +Signed-off-by: Raanan Avargil +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_timewait_sock.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/net/inet_timewait_sock.h ++++ b/include/net/inet_timewait_sock.h +@@ -115,12 +115,12 @@ void __inet_twsk_hashdance(struct inet_t + void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, + bool rearm); + +-static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) ++static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) + { + __inet_twsk_schedule(tw, timeo, false); + } + +-static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) ++static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) + { + __inet_twsk_schedule(tw, timeo, true); + } diff --git a/queue-4.1/tcp-dccp-fix-timewait-races-in-timer-handling.patch b/queue-4.1/tcp-dccp-fix-timewait-races-in-timer-handling.patch new file mode 100644 index 00000000000..6bb37db294a --- /dev/null +++ b/queue-4.1/tcp-dccp-fix-timewait-races-in-timer-handling.patch @@ -0,0 +1,188 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Eric Dumazet +Date: Sat, 19 Sep 2015 09:08:34 -0700 +Subject: tcp/dccp: fix timewait races in timer handling + +From: Eric Dumazet + +[ Upstream commit ed2e923945892a8372ab70d2f61d364b0b6d9054 ] + +When creating a timewait socket, we need to arm the timer before +allowing other cpus to find it. The signal allowing cpus to find +the socket is setting tw_refcnt to non zero value. + +As we set tw_refcnt in __inet_twsk_hashdance(), we therefore need to +call inet_twsk_schedule() first. + +This also means we need to remove tw_refcnt changes from +inet_twsk_schedule() and let the caller handle it. + +Note that because we use mod_timer_pinned(), we have the guarantee +the timer wont expire before we set tw_refcnt as we run in BH context. + +To make things more readable I introduced inet_twsk_reschedule() helper. + +When rearming the timer, we can use mod_timer_pending() to make sure +we do not rearm a canceled timer. + +Note: This bug can possibly trigger if packets of a flow can hit +multiple cpus. This does not normally happen, unless flow steering +is broken somehow. This explains this bug was spotted ~5 months after +its introduction. + +A similar fix is needed for SYN_RECV sockets in reqsk_queue_hash_req(), +but will be provided in a separate patch for proper tracking. + +Fixes: 789f558cfb36 ("tcp/dccp: get rid of central timewait timer") +Signed-off-by: Eric Dumazet +Reported-by: Ying Cai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_timewait_sock.h | 14 +++++++++++++- + net/dccp/minisocks.c | 4 ++-- + net/ipv4/inet_timewait_sock.c | 16 ++++++++++------ + net/ipv4/tcp_minisocks.c | 13 ++++++------- + 4 files changed, 31 insertions(+), 16 deletions(-) + +--- a/include/net/inet_timewait_sock.h ++++ b/include/net/inet_timewait_sock.h +@@ -112,7 +112,19 @@ struct inet_timewait_sock *inet_twsk_all + void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, + struct inet_hashinfo *hashinfo); + +-void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo); ++void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, ++ bool rearm); ++ ++static void inline inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) ++{ ++ __inet_twsk_schedule(tw, timeo, false); ++} ++ ++static void inline inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) ++{ ++ __inet_twsk_schedule(tw, timeo, true); ++} ++ + void inet_twsk_deschedule(struct inet_timewait_sock *tw); + + void inet_twsk_purge(struct inet_hashinfo *hashinfo, +--- a/net/dccp/minisocks.c ++++ b/net/dccp/minisocks.c +@@ -48,8 +48,6 @@ void dccp_time_wait(struct sock *sk, int + tw->tw_ipv6only = sk->sk_ipv6only; + } + #endif +- /* Linkage updates. */ +- __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); + + /* Get the TIME_WAIT timeout firing. */ + if (timeo < rto) +@@ -60,6 +58,8 @@ void dccp_time_wait(struct sock *sk, int + timeo = DCCP_TIMEWAIT_LEN; + + inet_twsk_schedule(tw, timeo); ++ /* Linkage updates. */ ++ __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); + inet_twsk_put(tw); + } else { + /* Sorry, if we're out of memory, just CLOSE this +--- a/net/ipv4/inet_timewait_sock.c ++++ b/net/ipv4/inet_timewait_sock.c +@@ -153,13 +153,15 @@ void __inet_twsk_hashdance(struct inet_t + /* + * Step 2: Hash TW into tcp ehash chain. + * Notes : +- * - tw_refcnt is set to 3 because : ++ * - tw_refcnt is set to 4 because : + * - We have one reference from bhash chain. + * - We have one reference from ehash chain. ++ * - We have one reference from timer. ++ * - One reference for ourself (our caller will release it). + * We can use atomic_set() because prior spin_lock()/spin_unlock() + * committed into memory all tw fields. + */ +- atomic_set(&tw->tw_refcnt, 1 + 1 + 1); ++ atomic_set(&tw->tw_refcnt, 4); + inet_twsk_add_node_rcu(tw, &ehead->chain); + + /* Step 3: Remove SK from hash chain */ +@@ -243,7 +245,7 @@ void inet_twsk_deschedule(struct inet_ti + } + EXPORT_SYMBOL(inet_twsk_deschedule); + +-void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) ++void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) + { + /* timeout := RTO * 3.5 + * +@@ -271,12 +273,14 @@ void inet_twsk_schedule(struct inet_time + */ + + tw->tw_kill = timeo <= 4*HZ; +- if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) { +- atomic_inc(&tw->tw_refcnt); ++ if (!rearm) { ++ BUG_ON(mod_timer_pinned(&tw->tw_timer, jiffies + timeo)); + atomic_inc(&tw->tw_dr->tw_count); ++ } else { ++ mod_timer_pending(&tw->tw_timer, jiffies + timeo); + } + } +-EXPORT_SYMBOL_GPL(inet_twsk_schedule); ++EXPORT_SYMBOL_GPL(__inet_twsk_schedule); + + void inet_twsk_purge(struct inet_hashinfo *hashinfo, + struct inet_timewait_death_row *twdr, int family) +--- a/net/ipv4/tcp_minisocks.c ++++ b/net/ipv4/tcp_minisocks.c +@@ -163,9 +163,9 @@ kill_with_rst: + if (tcp_death_row.sysctl_tw_recycle && + tcptw->tw_ts_recent_stamp && + tcp_tw_remember_stamp(tw)) +- inet_twsk_schedule(tw, tw->tw_timeout); ++ inet_twsk_reschedule(tw, tw->tw_timeout); + else +- inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); ++ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); + return TCP_TW_ACK; + } + +@@ -203,7 +203,7 @@ kill: + return TCP_TW_SUCCESS; + } + } +- inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); ++ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); + + if (tmp_opt.saw_tstamp) { + tcptw->tw_ts_recent = tmp_opt.rcv_tsval; +@@ -253,7 +253,7 @@ kill: + * Do not reschedule in the last case. + */ + if (paws_reject || th->ack) +- inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); ++ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); + + return tcp_timewait_check_oow_rate_limit( + tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); +@@ -324,9 +324,6 @@ void tcp_time_wait(struct sock *sk, int + } while (0); + #endif + +- /* Linkage updates. */ +- __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); +- + /* Get the TIME_WAIT timeout firing. */ + if (timeo < rto) + timeo = rto; +@@ -340,6 +337,8 @@ void tcp_time_wait(struct sock *sk, int + } + + inet_twsk_schedule(tw, timeo); ++ /* Linkage updates. */ ++ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); + inet_twsk_put(tw); + } else { + /* Sorry, if we're out of memory, just CLOSE this diff --git a/queue-4.1/tcp_yeah-don-t-set-ssthresh-below-2.patch b/queue-4.1/tcp_yeah-don-t-set-ssthresh-below-2.patch new file mode 100644 index 00000000000..8d2010598c4 --- /dev/null +++ b/queue-4.1/tcp_yeah-don-t-set-ssthresh-below-2.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Neal Cardwell +Date: Mon, 11 Jan 2016 13:42:43 -0500 +Subject: tcp_yeah: don't set ssthresh below 2 + +From: Neal Cardwell + +[ Upstream commit 83d15e70c4d8909d722c0d64747d8fb42e38a48f ] + +For tcp_yeah, use an ssthresh floor of 2, the same floor used by Reno +and CUBIC, per RFC 5681 (equation 4). + +tcp_yeah_ssthresh() was sometimes returning a 0 or negative ssthresh +value if the intended reduction is as big or bigger than the current +cwnd. Congestion control modules should never return a zero or +negative ssthresh. A zero ssthresh generally results in a zero cwnd, +causing the connection to stall. A negative ssthresh value will be +interpreted as a u32 and will set a target cwnd for PRR near 4 +billion. + +Oleksandr Natalenko reported that a system using tcp_yeah with ECN +could see a warning about a prior_cwnd of 0 in +tcp_cwnd_reduction(). Testing verified that this was due to +tcp_yeah_ssthresh() misbehaving in this way. + +Reported-by: Oleksandr Natalenko +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_yeah.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_yeah.c ++++ b/net/ipv4/tcp_yeah.c +@@ -219,7 +219,7 @@ static u32 tcp_yeah_ssthresh(struct sock + yeah->fast_count = 0; + yeah->reno_count = max(yeah->reno_count>>1, 2U); + +- return tp->snd_cwnd - reduction; ++ return max_t(int, tp->snd_cwnd - reduction, 2); + } + + static struct tcp_congestion_ops tcp_yeah __read_mostly = { diff --git a/queue-4.1/team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch b/queue-4.1/team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch new file mode 100644 index 00000000000..e573f03dbd1 --- /dev/null +++ b/queue-4.1/team-replace-rcu_read_lock-with-a-mutex-in-team_vlan_rx_kill_vid.patch @@ -0,0 +1,39 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Ido Schimmel +Date: Mon, 18 Jan 2016 17:30:22 +0200 +Subject: team: Replace rcu_read_lock with a mutex in team_vlan_rx_kill_vid + +From: Ido Schimmel + +[ Upstream commit 60a6531bfe49555581ccd65f66a350cc5693fcde ] + +We can't be within an RCU read-side critical section when deleting +VLANs, as underlying drivers might sleep during the hardware operation. +Therefore, replace the RCU critical section with a mutex. This is +consistent with team_vlan_rx_add_vid. + +Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") +Acked-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -1845,10 +1845,10 @@ static int team_vlan_rx_kill_vid(struct + struct team *team = netdev_priv(dev); + struct team_port *port; + +- rcu_read_lock(); +- list_for_each_entry_rcu(port, &team->port_list, list) ++ mutex_lock(&team->lock); ++ list_for_each_entry(port, &team->port_list, list) + vlan_vid_del(port->dev, proto, vid); +- rcu_read_unlock(); ++ mutex_unlock(&team->lock); + + return 0; + } diff --git a/queue-4.1/udp-disallow-ufo-for-sockets-with-so_no_check-option.patch b/queue-4.1/udp-disallow-ufo-for-sockets-with-so_no_check-option.patch new file mode 100644 index 00000000000..c0ab882797f --- /dev/null +++ b/queue-4.1/udp-disallow-ufo-for-sockets-with-so_no_check-option.patch @@ -0,0 +1,51 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= +Date: Mon, 11 Jan 2016 07:50:30 +0100 +Subject: udp: disallow UFO for sockets with SO_NO_CHECK option + +From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= + +[ Upstream commit 40ba330227ad00b8c0cdf2f425736ff9549cc423 ] + +Commit acf8dd0a9d0b ("udp: only allow UFO for packets from SOCK_DGRAM +sockets") disallows UFO for packets sent from raw sockets. We need to do +the same also for SOCK_DGRAM sockets with SO_NO_CHECK options, even if +for a bit different reason: while such socket would override the +CHECKSUM_PARTIAL set by ip_ufo_append_data(), gso_size is still set and +bad offloading flags warning is triggered in __skb_gso_segment(). + +In the IPv6 case, SO_NO_CHECK option is ignored but we need to disallow +UFO for packets sent by sockets with UDP_NO_CHECK6_TX option. + +Signed-off-by: Michal Kubecek +Tested-by: Shannon Nelson +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_output.c | 2 +- + net/ipv6/ip6_output.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -893,7 +893,7 @@ static int __ip_append_data(struct sock + if (((length > mtu) || (skb && skb_is_gso(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && +- (sk->sk_type == SOCK_DGRAM)) { ++ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { + err = ip_ufo_append_data(sk, queue, getfrag, from, length, + hh_len, fragheaderlen, transhdrlen, + maxfraglen, flags); +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1329,7 +1329,7 @@ emsgsize: + (skb && skb_is_gso(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO) && +- (sk->sk_type == SOCK_DGRAM)) { ++ (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { + err = ip6_ufo_append_data(sk, queue, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags, rt); diff --git a/queue-4.1/unix-properly-account-for-fds-passed-over-unix-sockets.patch b/queue-4.1/unix-properly-account-for-fds-passed-over-unix-sockets.patch new file mode 100644 index 00000000000..7c55d3d4445 --- /dev/null +++ b/queue-4.1/unix-properly-account-for-fds-passed-over-unix-sockets.patch @@ -0,0 +1,136 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: willy tarreau +Date: Sun, 10 Jan 2016 07:54:56 +0100 +Subject: unix: properly account for FDs passed over unix sockets + +From: willy tarreau + +[ Upstream commit 712f4aad406bb1ed67f3f98d04c044191f0ff593 ] + +It is possible for a process to allocate and accumulate far more FDs than +the process' limit by sending them over a unix socket then closing them +to keep the process' fd count low. + +This change addresses this problem by keeping track of the number of FDs +in flight per user and preventing non-privileged processes from having +more FDs in flight than their configured FD limit. + +Reported-by: socketpair@gmail.com +Reported-by: Tetsuo Handa +Mitigates: CVE-2013-4312 (Linux 2.0+) +Suggested-by: Linus Torvalds +Acked-by: Hannes Frederic Sowa +Signed-off-by: Willy Tarreau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched.h | 1 + + net/unix/af_unix.c | 24 ++++++++++++++++++++---- + net/unix/garbage.c | 13 ++++++++----- + 3 files changed, 29 insertions(+), 9 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -802,6 +802,7 @@ struct user_struct { + unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ + #endif + unsigned long locked_shm; /* How many pages of mlocked shm ? */ ++ unsigned long unix_inflight; /* How many files in flight in unix sockets */ + + #ifdef CONFIG_KEYS + struct key *uid_keyring; /* UID specific keyring */ +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1481,6 +1481,21 @@ static void unix_destruct_scm(struct sk_ + sock_wfree(skb); + } + ++/* ++ * The "user->unix_inflight" variable is protected by the garbage ++ * collection lock, and we just read it locklessly here. If you go ++ * over the limit, there might be a tiny race in actually noticing ++ * it across threads. Tough. ++ */ ++static inline bool too_many_unix_fds(struct task_struct *p) ++{ ++ struct user_struct *user = current_user(); ++ ++ if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) ++ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); ++ return false; ++} ++ + #define MAX_RECURSION_LEVEL 4 + + static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +@@ -1489,6 +1504,9 @@ static int unix_attach_fds(struct scm_co + unsigned char max_level = 0; + int unix_sock_count = 0; + ++ if (too_many_unix_fds(current)) ++ return -ETOOMANYREFS; ++ + for (i = scm->fp->count - 1; i >= 0; i--) { + struct sock *sk = unix_get_socket(scm->fp->fp[i]); + +@@ -1510,10 +1528,8 @@ static int unix_attach_fds(struct scm_co + if (!UNIXCB(skb).fp) + return -ENOMEM; + +- if (unix_sock_count) { +- for (i = scm->fp->count - 1; i >= 0; i--) +- unix_inflight(scm->fp->fp[i]); +- } ++ for (i = scm->fp->count - 1; i >= 0; i--) ++ unix_inflight(scm->fp->fp[i]); + return max_level; + } + +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) + { + struct sock *s = unix_get_socket(fp); + ++ spin_lock(&unix_gc_lock); ++ + if (s) { + struct unix_sock *u = unix_sk(s); + +- spin_lock(&unix_gc_lock); +- + if (atomic_long_inc_return(&u->inflight) == 1) { + BUG_ON(!list_empty(&u->link)); + list_add_tail(&u->link, &gc_inflight_list); +@@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) + BUG_ON(list_empty(&u->link)); + } + unix_tot_inflight++; +- spin_unlock(&unix_gc_lock); + } ++ fp->f_cred->user->unix_inflight++; ++ spin_unlock(&unix_gc_lock); + } + + void unix_notinflight(struct file *fp) + { + struct sock *s = unix_get_socket(fp); + ++ spin_lock(&unix_gc_lock); ++ + if (s) { + struct unix_sock *u = unix_sk(s); + +- spin_lock(&unix_gc_lock); + BUG_ON(list_empty(&u->link)); + + if (atomic_long_dec_and_test(&u->inflight)) + list_del_init(&u->link); + unix_tot_inflight--; +- spin_unlock(&unix_gc_lock); + } ++ fp->f_cred->user->unix_inflight--; ++ spin_unlock(&unix_gc_lock); + } + + static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), diff --git a/queue-4.1/utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch b/queue-4.1/utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch new file mode 100644 index 00000000000..446272e0a20 --- /dev/null +++ b/queue-4.1/utf-8-q-veth-20don-e2-80-99t-20modify-20ip-5fsum-utf-8-q-med-20doing-20so-20treats-20packets-20with-20bad-20checksums-utf-8-q-20as-20good.patch @@ -0,0 +1,72 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Vijay Pandurangan +Date: Fri, 18 Dec 2015 14:34:59 -0500 +Subject: =?UTF-8?q?veth:=20don=E2=80=99t=20modify=20ip=5Fsum?= =?UTF-8?q?med;=20doing=20so=20treats=20packets=20with=20bad=20checksums?= =?UTF-8?q?=20as=20good.?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Vijay Pandurangan + +[ Upstream commit ce8c839b74e3017996fad4e1b7ba2e2625ede82f ] + +Packets that arrive from real hardware devices have ip_summed == +CHECKSUM_UNNECESSARY if the hardware verified the checksums, or +CHECKSUM_NONE if the packet is bad or it was unable to verify it. The +current version of veth will replace CHECKSUM_NONE with +CHECKSUM_UNNECESSARY, which causes corrupt packets routed from hardware to +a veth device to be delivered to the application. This caused applications +at Twitter to receive corrupt data when network hardware was corrupting +packets. + +We believe this was added as an optimization to skip computing and +verifying checksums for communication between containers. However, locally +generated packets have ip_summed == CHECKSUM_PARTIAL, so the code as +written does nothing for them. As far as we can tell, after removing this +code, these packets are transmitted from one stack to another unmodified +(tcpdump shows invalid checksums on both sides, as expected), and they are +delivered correctly to applications. We didn’t test every possible network +configuration, but we tried a few common ones such as bridging containers, +using NAT between the host and a container, and routing from hardware +devices to containers. We have effectively deployed this in production at +Twitter (by disabling RX checksum offloading on veth devices). + +This code dates back to the first version of the driver, commit + ("[NET]: Virtual ethernet device driver"), so I +suspect this bug occurred mostly because the driver API has evolved +significantly since then. Commit <0b7967503dc97864f283a> ("net/veth: Fix +packet checksumming") (in December 2010) fixed this for packets that get +created locally and sent to hardware devices, by not changing +CHECKSUM_PARTIAL. However, the same issue still occurs for packets coming +in from hardware devices. + +Co-authored-by: Evan Jones +Signed-off-by: Evan Jones +Cc: Nicolas Dichtel +Cc: Phil Sutter +Cc: Toshiaki Makita +Cc: netdev@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Vijay Pandurangan +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/veth.c | 6 ------ + 1 file changed, 6 deletions(-) + +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -117,12 +117,6 @@ static netdev_tx_t veth_xmit(struct sk_b + kfree_skb(skb); + goto drop; + } +- /* don't change ip_summed == CHECKSUM_PARTIAL, as that +- * will cause bad checksum on forwarded packets +- */ +- if (skb->ip_summed == CHECKSUM_NONE && +- rcv->features & NETIF_F_RXCSUM) +- skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { + struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); diff --git a/queue-4.1/vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch b/queue-4.1/vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch new file mode 100644 index 00000000000..4a64215a04f --- /dev/null +++ b/queue-4.1/vxlan-fix-test-which-detect-duplicate-vxlan-iface.patch @@ -0,0 +1,61 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Nicolas Dichtel +Date: Thu, 7 Jan 2016 11:26:53 +0100 +Subject: vxlan: fix test which detect duplicate vxlan iface + +From: Nicolas Dichtel + +[ Upstream commit 07b9b37c227cb8d88d478b4a9c5634fee514ede1 ] + +When a vxlan interface is created, the driver checks that there is not +another vxlan interface with the same properties. To do this, it checks +the existing vxlan udp socket. Since commit 1c51a9159dde, the creation of +the vxlan socket is done only when the interface is set up, thus it breaks +that test. + +Example: +$ ip l a vxlan10 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0 +$ ip l a vxlan11 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0 +$ ip -br l | grep vxlan +vxlan10 DOWN f2:55:1c:6a:fb:00 +vxlan11 DOWN 7a:cb:b9:38:59:0d + +Instead of checking sockets, let's loop over the vxlan iface list. + +Fixes: 1c51a9159dde ("vxlan: fix race caused by dropping rtnl_unlock") +Reported-by: Thomas Faivre +Signed-off-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2581,7 +2581,7 @@ static int vxlan_newlink(struct net *src + struct nlattr *tb[], struct nlattr *data[]) + { + struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); +- struct vxlan_dev *vxlan = netdev_priv(dev); ++ struct vxlan_dev *vxlan = netdev_priv(dev), *tmp; + struct vxlan_rdst *dst = &vxlan->default_dst; + __u32 vni; + int err; +@@ -2714,9 +2714,13 @@ static int vxlan_newlink(struct net *src + if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) + vxlan->flags |= VXLAN_F_REMCSUM_NOPARTIAL; + +- if (vxlan_find_vni(src_net, vni, use_ipv6 ? AF_INET6 : AF_INET, +- vxlan->dst_port, vxlan->flags)) { +- pr_info("duplicate VNI %u\n", vni); ++ list_for_each_entry(tmp, &vn->vxlan_list, next) { ++ if (tmp->default_dst.remote_vni == vni && ++ (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 || ++ tmp->saddr.sa.sa_family == AF_INET6) == use_ipv6 && ++ tmp->dst_port == vxlan->dst_port && ++ (tmp->flags & VXLAN_F_RCV_FLAGS) == ++ (vxlan->flags & VXLAN_F_RCV_FLAGS)) + return -EEXIST; + } + diff --git a/queue-4.1/xen-netback-respect-user-provided-max_queues.patch b/queue-4.1/xen-netback-respect-user-provided-max_queues.patch new file mode 100644 index 00000000000..1357797f9e3 --- /dev/null +++ b/queue-4.1/xen-netback-respect-user-provided-max_queues.patch @@ -0,0 +1,41 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Wei Liu +Date: Thu, 10 Sep 2015 11:18:57 +0100 +Subject: xen-netback: respect user provided max_queues + +From: Wei Liu + +[ Upstream commit 4c82ac3c37363e8c4ded6a5fe1ec5fa756b34df3 ] + +Originally that parameter was always reset to num_online_cpus during +module initialisation, which renders it useless. + +The fix is to only set max_queues to num_online_cpus when user has not +provided a value. + +Reported-by: Johnny Strom +Signed-off-by: Wei Liu +Reviewed-by: David Vrabel +Acked-by: Ian Campbell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netback/netback.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -2007,8 +2007,11 @@ static int __init netback_init(void) + if (!xen_domain()) + return -ENODEV; + +- /* Allow as many queues as there are CPUs, by default */ +- xenvif_max_queues = num_online_cpus(); ++ /* Allow as many queues as there are CPUs if user has not ++ * specified a value. ++ */ ++ if (xenvif_max_queues == 0) ++ xenvif_max_queues = num_online_cpus(); + + if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { + pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", diff --git a/queue-4.1/xen-netfront-respect-user-provided-max_queues.patch b/queue-4.1/xen-netfront-respect-user-provided-max_queues.patch new file mode 100644 index 00000000000..9ef47d0fb9e --- /dev/null +++ b/queue-4.1/xen-netfront-respect-user-provided-max_queues.patch @@ -0,0 +1,41 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Wei Liu +Date: Thu, 10 Sep 2015 11:18:58 +0100 +Subject: xen-netfront: respect user provided max_queues + +From: Wei Liu + +[ Upstream commit 32a844056fd43dda647e1c3c6b9983bdfa04d17d ] + +Originally that parameter was always reset to num_online_cpus during +module initialisation, which renders it useless. + +The fix is to only set max_queues to num_online_cpus when user has not +provided a value. + +Signed-off-by: Wei Liu +Cc: David Vrabel +Reviewed-by: David Vrabel +Tested-by: David Vrabel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -2140,8 +2140,11 @@ static int __init netif_init(void) + + pr_info("Initialising Xen virtual ethernet driver\n"); + +- /* Allow as many queues as there are CPUs, by default */ +- xennet_max_queues = num_online_cpus(); ++ /* Allow as many queues as there are CPUs if user has not ++ * specified a value. ++ */ ++ if (xennet_max_queues == 0) ++ xennet_max_queues = num_online_cpus(); + + return xenbus_register_frontend(&netfront_driver); + } diff --git a/queue-4.1/xen-netfront-update-num_queues-to-real-created.patch b/queue-4.1/xen-netfront-update-num_queues-to-real-created.patch new file mode 100644 index 00000000000..206354aede8 --- /dev/null +++ b/queue-4.1/xen-netfront-update-num_queues-to-real-created.patch @@ -0,0 +1,83 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Joe Jin +Date: Mon, 19 Oct 2015 13:37:17 +0800 +Subject: xen-netfront: update num_queues to real created + +From: Joe Jin + +[ Upstream commit ca88ea1247dfee094e2467a3578eaec9bdf0833a ] + +Sometimes xennet_create_queues() may failed to created all requested +queues, we need to update num_queues to real created to avoid NULL +pointer dereference. + +Signed-off-by: Joe Jin +Cc: Boris Ostrovsky +Cc: Konrad Rzeszutek Wilk +Cc: Wei Liu +Cc: Ian Campbell +Cc: David S. Miller +Reviewed-by: Boris Ostrovsky +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -1710,19 +1710,19 @@ static void xennet_destroy_queues(struct + } + + static int xennet_create_queues(struct netfront_info *info, +- unsigned int num_queues) ++ unsigned int *num_queues) + { + unsigned int i; + int ret; + +- info->queues = kcalloc(num_queues, sizeof(struct netfront_queue), ++ info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue), + GFP_KERNEL); + if (!info->queues) + return -ENOMEM; + + rtnl_lock(); + +- for (i = 0; i < num_queues; i++) { ++ for (i = 0; i < *num_queues; i++) { + struct netfront_queue *queue = &info->queues[i]; + + queue->id = i; +@@ -1732,7 +1732,7 @@ static int xennet_create_queues(struct n + if (ret < 0) { + dev_warn(&info->netdev->dev, + "only created %d queues\n", i); +- num_queues = i; ++ *num_queues = i; + break; + } + +@@ -1742,11 +1742,11 @@ static int xennet_create_queues(struct n + napi_enable(&queue->napi); + } + +- netif_set_real_num_tx_queues(info->netdev, num_queues); ++ netif_set_real_num_tx_queues(info->netdev, *num_queues); + + rtnl_unlock(); + +- if (num_queues == 0) { ++ if (*num_queues == 0) { + dev_err(&info->netdev->dev, "no queues\n"); + return -EINVAL; + } +@@ -1792,7 +1792,7 @@ static int talk_to_netback(struct xenbus + if (info->queues) + xennet_destroy_queues(info); + +- err = xennet_create_queues(info, num_queues); ++ err = xennet_create_queues(info, &num_queues); + if (err < 0) + goto destroy_ring; + diff --git a/queue-4.1/xfrm-dst_entries_init-per-net-dst_ops.patch b/queue-4.1/xfrm-dst_entries_init-per-net-dst_ops.patch new file mode 100644 index 00000000000..e8cd13163ed --- /dev/null +++ b/queue-4.1/xfrm-dst_entries_init-per-net-dst_ops.patch @@ -0,0 +1,332 @@ +From foo@baz Tue Jan 26 21:37:04 PST 2016 +From: Dan Streetman +Date: Thu, 29 Oct 2015 09:51:16 -0400 +Subject: xfrm: dst_entries_init() per-net dst_ops + +From: Dan Streetman + +[ Upstream commit a8a572a6b5f2a79280d6e302cb3c1cb1fbaeb3e8 ] + +Remove the dst_entries_init/destroy calls for xfrm4 and xfrm6 dst_ops +templates; their dst_entries counters will never be used. Move the +xfrm dst_ops initialization from the common xfrm/xfrm_policy.c to +xfrm4/xfrm4_policy.c and xfrm6/xfrm6_policy.c, and call dst_entries_init +and dst_entries_destroy for each net namespace. + +The ipv4 and ipv6 xfrms each create dst_ops template, and perform +dst_entries_init on the templates. The template values are copied to each +net namespace's xfrm.xfrm*_dst_ops. The problem there is the dst_ops +pcpuc_entries field is a percpu counter and cannot be used correctly by +simply copying it to another object. + +The result of this is a very subtle bug; changes to the dst entries +counter from one net namespace may sometimes get applied to a different +net namespace dst entries counter. This is because of how the percpu +counter works; it has a main count field as well as a pointer to the +percpu variables. Each net namespace maintains its own main count +variable, but all point to one set of percpu variables. When any net +namespace happens to change one of the percpu variables to outside its +small batch range, its count is moved to the net namespace's main count +variable. So with multiple net namespaces operating concurrently, the +dst_ops entries counter can stray from the actual value that it should +be; if counts are consistently moved from one net namespace to another +(which my testing showed is likely), then one net namespace winds up +with a negative dst_ops count while another winds up with a continually +increasing count, eventually reaching its gc_thresh limit, which causes +all new traffic on the net namespace to fail with -ENOBUFS. + +Signed-off-by: Dan Streetman +Signed-off-by: Dan Streetman +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/xfrm4_policy.c | 46 +++++++++++++++++++++++++++++++++-------- + net/ipv6/xfrm6_policy.c | 53 ++++++++++++++++++++++++++++++++++-------------- + net/xfrm/xfrm_policy.c | 38 ---------------------------------- + 3 files changed, 75 insertions(+), 62 deletions(-) + +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -230,7 +230,7 @@ static void xfrm4_dst_ifdown(struct dst_ + xfrm_dst_ifdown(dst, dev); + } + +-static struct dst_ops xfrm4_dst_ops = { ++static struct dst_ops xfrm4_dst_ops_template = { + .family = AF_INET, + .gc = xfrm4_garbage_collect, + .update_pmtu = xfrm4_update_pmtu, +@@ -244,7 +244,7 @@ static struct dst_ops xfrm4_dst_ops = { + + static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { + .family = AF_INET, +- .dst_ops = &xfrm4_dst_ops, ++ .dst_ops = &xfrm4_dst_ops_template, + .dst_lookup = xfrm4_dst_lookup, + .get_saddr = xfrm4_get_saddr, + .decode_session = _decode_session4, +@@ -266,7 +266,7 @@ static struct ctl_table xfrm4_policy_tab + { } + }; + +-static int __net_init xfrm4_net_init(struct net *net) ++static int __net_init xfrm4_net_sysctl_init(struct net *net) + { + struct ctl_table *table; + struct ctl_table_header *hdr; +@@ -294,7 +294,7 @@ err_alloc: + return -ENOMEM; + } + +-static void __net_exit xfrm4_net_exit(struct net *net) ++static void __net_exit xfrm4_net_sysctl_exit(struct net *net) + { + struct ctl_table *table; + +@@ -306,12 +306,44 @@ static void __net_exit xfrm4_net_exit(st + if (!net_eq(net, &init_net)) + kfree(table); + } ++#else /* CONFIG_SYSCTL */ ++static int inline xfrm4_net_sysctl_init(struct net *net) ++{ ++ return 0; ++} ++ ++static void inline xfrm4_net_sysctl_exit(struct net *net) ++{ ++} ++#endif ++ ++static int __net_init xfrm4_net_init(struct net *net) ++{ ++ int ret; ++ ++ memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template, ++ sizeof(xfrm4_dst_ops_template)); ++ ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops); ++ if (ret) ++ return ret; ++ ++ ret = xfrm4_net_sysctl_init(net); ++ if (ret) ++ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); ++ ++ return ret; ++} ++ ++static void __net_exit xfrm4_net_exit(struct net *net) ++{ ++ xfrm4_net_sysctl_exit(net); ++ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); ++} + + static struct pernet_operations __net_initdata xfrm4_net_ops = { + .init = xfrm4_net_init, + .exit = xfrm4_net_exit, + }; +-#endif + + static void __init xfrm4_policy_init(void) + { +@@ -320,13 +352,9 @@ static void __init xfrm4_policy_init(voi + + void __init xfrm4_init(void) + { +- dst_entries_init(&xfrm4_dst_ops); +- + xfrm4_state_init(); + xfrm4_policy_init(); + xfrm4_protocol_init(); +-#ifdef CONFIG_SYSCTL + register_pernet_subsys(&xfrm4_net_ops); +-#endif + } + +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -289,7 +289,7 @@ static void xfrm6_dst_ifdown(struct dst_ + xfrm_dst_ifdown(dst, dev); + } + +-static struct dst_ops xfrm6_dst_ops = { ++static struct dst_ops xfrm6_dst_ops_template = { + .family = AF_INET6, + .gc = xfrm6_garbage_collect, + .update_pmtu = xfrm6_update_pmtu, +@@ -303,7 +303,7 @@ static struct dst_ops xfrm6_dst_ops = { + + static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { + .family = AF_INET6, +- .dst_ops = &xfrm6_dst_ops, ++ .dst_ops = &xfrm6_dst_ops_template, + .dst_lookup = xfrm6_dst_lookup, + .get_saddr = xfrm6_get_saddr, + .decode_session = _decode_session6, +@@ -336,7 +336,7 @@ static struct ctl_table xfrm6_policy_tab + { } + }; + +-static int __net_init xfrm6_net_init(struct net *net) ++static int __net_init xfrm6_net_sysctl_init(struct net *net) + { + struct ctl_table *table; + struct ctl_table_header *hdr; +@@ -364,7 +364,7 @@ err_alloc: + return -ENOMEM; + } + +-static void __net_exit xfrm6_net_exit(struct net *net) ++static void __net_exit xfrm6_net_sysctl_exit(struct net *net) + { + struct ctl_table *table; + +@@ -376,24 +376,52 @@ static void __net_exit xfrm6_net_exit(st + if (!net_eq(net, &init_net)) + kfree(table); + } ++#else /* CONFIG_SYSCTL */ ++static int inline xfrm6_net_sysctl_init(struct net *net) ++{ ++ return 0; ++} ++ ++static void inline xfrm6_net_sysctl_exit(struct net *net) ++{ ++} ++#endif ++ ++static int __net_init xfrm6_net_init(struct net *net) ++{ ++ int ret; ++ ++ memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template, ++ sizeof(xfrm6_dst_ops_template)); ++ ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops); ++ if (ret) ++ return ret; ++ ++ ret = xfrm6_net_sysctl_init(net); ++ if (ret) ++ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); ++ ++ return ret; ++} ++ ++static void __net_exit xfrm6_net_exit(struct net *net) ++{ ++ xfrm6_net_sysctl_exit(net); ++ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); ++} + + static struct pernet_operations xfrm6_net_ops = { + .init = xfrm6_net_init, + .exit = xfrm6_net_exit, + }; +-#endif + + int __init xfrm6_init(void) + { + int ret; + +- dst_entries_init(&xfrm6_dst_ops); +- + ret = xfrm6_policy_init(); +- if (ret) { +- dst_entries_destroy(&xfrm6_dst_ops); ++ if (ret) + goto out; +- } + ret = xfrm6_state_init(); + if (ret) + goto out_policy; +@@ -402,9 +430,7 @@ int __init xfrm6_init(void) + if (ret) + goto out_state; + +-#ifdef CONFIG_SYSCTL + register_pernet_subsys(&xfrm6_net_ops); +-#endif + out: + return ret; + out_state: +@@ -416,11 +442,8 @@ out_policy: + + void xfrm6_fini(void) + { +-#ifdef CONFIG_SYSCTL + unregister_pernet_subsys(&xfrm6_net_ops); +-#endif + xfrm6_protocol_fini(); + xfrm6_policy_fini(); + xfrm6_state_fini(); +- dst_entries_destroy(&xfrm6_dst_ops); + } +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -2806,7 +2806,6 @@ static struct neighbour *xfrm_neigh_look + + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) + { +- struct net *net; + int err = 0; + if (unlikely(afinfo == NULL)) + return -EINVAL; +@@ -2837,26 +2836,6 @@ int xfrm_policy_register_afinfo(struct x + } + spin_unlock(&xfrm_policy_afinfo_lock); + +- rtnl_lock(); +- for_each_net(net) { +- struct dst_ops *xfrm_dst_ops; +- +- switch (afinfo->family) { +- case AF_INET: +- xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; +- break; +-#if IS_ENABLED(CONFIG_IPV6) +- case AF_INET6: +- xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; +- break; +-#endif +- default: +- BUG(); +- } +- *xfrm_dst_ops = *afinfo->dst_ops; +- } +- rtnl_unlock(); +- + return err; + } + EXPORT_SYMBOL(xfrm_policy_register_afinfo); +@@ -2892,22 +2871,6 @@ int xfrm_policy_unregister_afinfo(struct + } + EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); + +-static void __net_init xfrm_dst_ops_init(struct net *net) +-{ +- struct xfrm_policy_afinfo *afinfo; +- +- rcu_read_lock(); +- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); +- if (afinfo) +- net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; +-#if IS_ENABLED(CONFIG_IPV6) +- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); +- if (afinfo) +- net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; +-#endif +- rcu_read_unlock(); +-} +- + static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) + { + struct net_device *dev = netdev_notifier_info_to_dev(ptr); +@@ -3056,7 +3019,6 @@ static int __net_init xfrm_net_init(stru + rv = xfrm_policy_init(net); + if (rv < 0) + goto out_policy; +- xfrm_dst_ops_init(net); + rv = xfrm_sysctl_init(net); + if (rv < 0) + goto out_sysctl;