From: Greg Kroah-Hartman Date: Wed, 30 Sep 2015 03:31:28 +0000 (+0200) Subject: 4.1-stable patches X-Git-Tag: v3.14.54~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a30eec036a8dae7eaa3693357ad42ff8f24bab56;p=thirdparty%2Fkernel%2Fstable-queue.git 4.1-stable patches added patches: bridge-fix-igmpv3-mldv2-report-parsing.patch cls_u32-complete-the-check-for-non-forced-case-in-u32_destroy.patch fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch ip6_gre-release-cached-dst-on-tunnel-removal.patch ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch ipv6-fix-multipath-route-replace-error-recovery.patch macvtap-fix-tunsetsndbuf-values-64k.patch mvneta-use-inband-status-only-when-explicitly-enabled.patch net-dsa-bcm_sf2-do-not-override-speed-settings.patch net-dsa-bcm_sf2-fix-64-bits-register-writes.patch net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch net-eth-altera-fix-napi-poll_list-corruption.patch net-fec-clear-receive-interrupts-before-processing-a-packet.patch net-ipv6-correct-pim6-mrt_lock-handling.patch net-mlx4_en-really-allow-to-change-rss-key.patch net-phy-fixed_phy-handle-link-down-case.patch net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch netlink-replace-rhash_portid-with-bound.patch of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch openvswitch-zero-flows-on-allocation.patch sctp-fix-race-on-protocol-netns-initialization.patch sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch tcp-add-proper-ts-val-into-rst-packets.patch usbnet-get-event_no_runtime_pm-bit-before-it-is-cleared.patch vxlan-re-ignore-eaddrinuse-from-igmp_join.patch --- diff --git a/queue-4.1/bridge-fix-igmpv3-mldv2-report-parsing.patch b/queue-4.1/bridge-fix-igmpv3-mldv2-report-parsing.patch new file mode 100644 index 00000000000..018a4f75f24 --- /dev/null +++ b/queue-4.1/bridge-fix-igmpv3-mldv2-report-parsing.patch @@ -0,0 +1,53 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: =?UTF-8?q?Linus=20L=C3=BCssing?= +Date: Fri, 11 Sep 2015 18:39:48 +0200 +Subject: bridge: fix igmpv3 / mldv2 report parsing +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: =?UTF-8?q?Linus=20L=C3=BCssing?= + +[ Upstream commit c2d4fbd2163e607915cc05798ce7fb7f31117cc1 ] + +With the newly introduced helper functions the skb pulling is hidden in +the checksumming function - and undone before returning to the caller. + +The IGMPv3 and MLDv2 report parsing functions in the bridge still +assumed that the skb is pointing to the beginning of the IGMP/MLD +message while it is now kept at the beginning of the IPv4/6 header, +breaking the message parsing and creating packet loss. + +Fixing this by taking the offset between IP and IGMP/MLD header into +account, too. + +Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code") +Reported-by: Tobias Powalowski +Tested-by: Tobias Powalowski +Signed-off-by: Linus Lüssing +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -980,7 +980,7 @@ static int br_ip4_multicast_igmp3_report + + ih = igmpv3_report_hdr(skb); + num = ntohs(ih->ngrec); +- len = sizeof(*ih); ++ len = skb_transport_offset(skb) + sizeof(*ih); + + for (i = 0; i < num; i++) { + len += sizeof(*grec); +@@ -1035,7 +1035,7 @@ static int br_ip6_multicast_mld2_report( + + icmp6h = icmp6_hdr(skb); + num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); +- len = sizeof(*icmp6h); ++ len = skb_transport_offset(skb) + sizeof(*icmp6h); + + for (i = 0; i < num; i++) { + __be16 *nsrcs, _nsrcs; diff --git a/queue-4.1/cls_u32-complete-the-check-for-non-forced-case-in-u32_destroy.patch b/queue-4.1/cls_u32-complete-the-check-for-non-forced-case-in-u32_destroy.patch new file mode 100644 index 00000000000..e4fa66c6526 --- /dev/null +++ b/queue-4.1/cls_u32-complete-the-check-for-non-forced-case-in-u32_destroy.patch @@ -0,0 +1,55 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: WANG Cong +Date: Tue, 25 Aug 2015 16:38:12 -0700 +Subject: cls_u32: complete the check for non-forced case in u32_destroy() + +From: WANG Cong + +[ Upstream commit a6c1aea044e490da3e59124ec55991fe316818d5 ] + +In commit 1e052be69d04 ("net_sched: destroy proto tp when all filters are gone") +I added a check in u32_destroy() to see if all real filters are gone +for each tp, however, that is only done for root_ht, same is needed +for others. + +This can be reproduced by the following tc commands: + +tc filter add dev eth0 parent 1:0 prio 5 handle 15: protocol ip u32 divisor 256 +tc filter add dev eth0 protocol ip parent 1: prio 5 handle 15:2:2 u32 +ht 15:2: match ip src 10.0.0.2 flowid 1:10 +tc filter add dev eth0 protocol ip parent 1: prio 5 handle 15:2:3 u32 +ht 15:2: match ip src 10.0.0.3 flowid 1:10 + +Fixes: 1e052be69d04 ("net_sched: destroy proto tp when all filters are gone") +Reported-by: Akshat Kakkar +Cc: Jamal Hadi Salim +Signed-off-by: Cong Wang +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_u32.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -490,6 +490,19 @@ static bool u32_destroy(struct tcf_proto + return false; + } + } ++ ++ if (tp_c->refcnt > 1) ++ return false; ++ ++ if (tp_c->refcnt == 1) { ++ struct tc_u_hnode *ht; ++ ++ for (ht = rtnl_dereference(tp_c->hlist); ++ ht; ++ ht = rtnl_dereference(ht->next)) ++ if (!ht_empty(ht)) ++ return false; ++ } + } + + if (root_ht && --root_ht->refcnt == 0) diff --git a/queue-4.1/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch b/queue-4.1/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch new file mode 100644 index 00000000000..4c4f28f6ca5 --- /dev/null +++ b/queue-4.1/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch @@ -0,0 +1,72 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Wilson Kok +Date: Tue, 22 Sep 2015 21:40:22 -0700 +Subject: fib_rules: fix fib rule dumps across multiple skbs + +From: Wilson Kok + +[ Upstream commit 41fc014332d91ee90c32840bf161f9685b7fbf2b ] + +dump_rules returns skb length and not error. +But when family == AF_UNSPEC, the caller of dump_rules +assumes that it returns an error. Hence, when family == AF_UNSPEC, +we continue trying to dump on -EMSGSIZE errors resulting in +incorrect dump idx carried between skbs belonging to the same dump. +This results in fib rule dump always only dumping rules that fit +into the first skb. + +This patch fixes dump_rules to return error so that we exit correctly +and idx is correctly maintained between skbs that are part of the +same dump. + +Signed-off-by: Wilson Kok +Signed-off-by: Roopa Prabhu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/fib_rules.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/net/core/fib_rules.c ++++ b/net/core/fib_rules.c +@@ -615,15 +615,17 @@ static int dump_rules(struct sk_buff *sk + { + int idx = 0; + struct fib_rule *rule; ++ int err = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(rule, &ops->rules_list, list) { + if (idx < cb->args[1]) + goto skip; + +- if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, +- cb->nlh->nlmsg_seq, RTM_NEWRULE, +- NLM_F_MULTI, ops) < 0) ++ err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, ++ cb->nlh->nlmsg_seq, RTM_NEWRULE, ++ NLM_F_MULTI, ops); ++ if (err) + break; + skip: + idx++; +@@ -632,7 +634,7 @@ skip: + cb->args[1] = idx; + rules_ops_put(ops); + +- return skb->len; ++ return err; + } + + static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) +@@ -648,7 +650,9 @@ static int fib_nl_dumprule(struct sk_buf + if (ops == NULL) + return -EAFNOSUPPORT; + +- return dump_rules(skb, cb, ops); ++ dump_rules(skb, cb, ops); ++ ++ return skb->len; + } + + rcu_read_lock(); diff --git a/queue-4.1/ip6_gre-release-cached-dst-on-tunnel-removal.patch b/queue-4.1/ip6_gre-release-cached-dst-on-tunnel-removal.patch new file mode 100644 index 00000000000..a606cf85b57 --- /dev/null +++ b/queue-4.1/ip6_gre-release-cached-dst-on-tunnel-removal.patch @@ -0,0 +1,35 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: huaibin Wang +Date: Tue, 25 Aug 2015 16:20:34 +0200 +Subject: ip6_gre: release cached dst on tunnel removal + +From: huaibin Wang + +[ Upstream commit d4257295ba1b389c693b79de857a96e4b7cd8ac0 ] + +When a tunnel is deleted, the cached dst entry should be released. + +This problem may prevent the removal of a netns (seen with a x-netns IPv6 +gre tunnel): + unregister_netdevice: waiting for lo to become free. Usage count = 3 + +CC: Dmitry Kozlov +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Signed-off-by: huaibin Wang +Signed-off-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -361,6 +361,7 @@ static void ip6gre_tunnel_uninit(struct + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); + + ip6gre_tunnel_unlink(ign, t); ++ ip6_tnl_dst_reset(t); + dev_put(dev); + } + diff --git a/queue-4.1/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch b/queue-4.1/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch new file mode 100644 index 00000000000..47a8e39456f --- /dev/null +++ b/queue-4.1/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch @@ -0,0 +1,33 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Daniel Borkmann +Date: Thu, 3 Sep 2015 00:29:07 +0200 +Subject: ipv6: fix exthdrs offload registration in out_rt path + +From: Daniel Borkmann + +[ Upstream commit e41b0bedba0293b9e1e8d1e8ed553104b9693656 ] + +We previously register IPPROTO_ROUTING offload under inet6_add_offload(), +but in error path, we try to unregister it with inet_del_offload(). This +doesn't seem correct, it should actually be inet6_del_offload(), also +ipv6_exthdrs_offload_exit() from that commit seems rather incorrect (it +also uses rthdr_offload twice), but it got removed entirely later on. + +Fixes: 3336288a9fea ("ipv6: Switch to using new offload infrastructure.") +Signed-off-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/exthdrs_offload.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/exthdrs_offload.c ++++ b/net/ipv6/exthdrs_offload.c +@@ -36,6 +36,6 @@ out: + return ret; + + out_rt: +- inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); ++ inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING); + goto out; + } diff --git a/queue-4.1/ipv6-fix-multipath-route-replace-error-recovery.patch b/queue-4.1/ipv6-fix-multipath-route-replace-error-recovery.patch new file mode 100644 index 00000000000..f58ac7f5b6c --- /dev/null +++ b/queue-4.1/ipv6-fix-multipath-route-replace-error-recovery.patch @@ -0,0 +1,352 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Roopa Prabhu +Date: Tue, 8 Sep 2015 10:53:04 -0700 +Subject: ipv6: fix multipath route replace error recovery + +From: Roopa Prabhu + +[ Upstream commit 6b9ea5a64ed5eeb3f68f2e6fcce0ed1179801d1e ] + +Problem: +The ecmp route replace support for ipv6 in the kernel, deletes the +existing ecmp route too early, ie when it installs the first nexthop. +If there is an error in installing the subsequent nexthops, its too late +to recover the already deleted existing route leaving the fib +in an inconsistent state. + +This patch reduces the possibility of this by doing the following: +a) Changes the existing multipath route add code to a two stage process: + build rt6_infos + insert them + ip6_route_add rt6_info creation code is moved into + ip6_route_info_create. +b) This ensures that most errors are caught during building rt6_infos + and we fail early +c) Separates multipath add and del code. Because add needs the special + two stage mode in a) and delete essentially does not care. +d) In any event if the code fails during inserting a route again, a + warning is printed (This should be unlikely) + +Before the patch: +$ip -6 route show +3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 +3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 +3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 + +/* Try replacing the route with a duplicate nexthop */ +$ip -6 route change 3000:1000:1000:1000::2/128 nexthop via +fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev +swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1 +RTNETLINK answers: File exists + +$ip -6 route show +/* previously added ecmp route 3000:1000:1000:1000::2 dissappears from + * kernel */ + +After the patch: +$ip -6 route show +3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 +3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 +3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 + +/* Try replacing the route with a duplicate nexthop */ +$ip -6 route change 3000:1000:1000:1000::2/128 nexthop via +fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev +swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1 +RTNETLINK answers: File exists + +$ip -6 route show +3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 +3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 +3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 + +Fixes: 27596472473a ("ipv6: fix ECMP route replacement") +Signed-off-by: Roopa Prabhu +Reviewed-by: Nikolay Aleksandrov +Acked-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 201 +++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 175 insertions(+), 26 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1515,7 +1515,7 @@ static int ip6_convert_metrics(struct mx + return -EINVAL; + } + +-int ip6_route_add(struct fib6_config *cfg) ++int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) + { + int err; + struct net *net = cfg->fc_nlinfo.nl_net; +@@ -1523,7 +1523,6 @@ int ip6_route_add(struct fib6_config *cf + struct net_device *dev = NULL; + struct inet6_dev *idev = NULL; + struct fib6_table *table; +- struct mx6_config mxc = { .mx = NULL, }; + int addr_type; + + if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) +@@ -1719,6 +1718,32 @@ install_route: + + cfg->fc_nlinfo.nl_net = dev_net(dev); + ++ *rt_ret = rt; ++ ++ return 0; ++out: ++ if (dev) ++ dev_put(dev); ++ if (idev) ++ in6_dev_put(idev); ++ if (rt) ++ dst_free(&rt->dst); ++ ++ *rt_ret = NULL; ++ ++ return err; ++} ++ ++int ip6_route_add(struct fib6_config *cfg) ++{ ++ struct mx6_config mxc = { .mx = NULL, }; ++ struct rt6_info *rt = NULL; ++ int err; ++ ++ err = ip6_route_info_create(cfg, &rt); ++ if (err) ++ goto out; ++ + err = ip6_convert_metrics(&mxc, cfg); + if (err) + goto out; +@@ -1726,14 +1751,12 @@ install_route: + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); + + kfree(mxc.mx); ++ + return err; + out: +- if (dev) +- dev_put(dev); +- if (idev) +- in6_dev_put(idev); + if (rt) + dst_free(&rt->dst); ++ + return err; + } + +@@ -2496,19 +2519,78 @@ errout: + return err; + } + +-static int ip6_route_multipath(struct fib6_config *cfg, int add) ++struct rt6_nh { ++ struct rt6_info *rt6_info; ++ struct fib6_config r_cfg; ++ struct mx6_config mxc; ++ struct list_head next; ++}; ++ ++static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) ++{ ++ struct rt6_nh *nh; ++ ++ list_for_each_entry(nh, rt6_nh_list, next) { ++ pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n", ++ &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, ++ nh->r_cfg.fc_ifindex); ++ } ++} ++ ++static int ip6_route_info_append(struct list_head *rt6_nh_list, ++ struct rt6_info *rt, struct fib6_config *r_cfg) ++{ ++ struct rt6_nh *nh; ++ struct rt6_info *rtnh; ++ int err = -EEXIST; ++ ++ list_for_each_entry(nh, rt6_nh_list, next) { ++ /* check if rt6_info already exists */ ++ rtnh = nh->rt6_info; ++ ++ if (rtnh->dst.dev == rt->dst.dev && ++ rtnh->rt6i_idev == rt->rt6i_idev && ++ ipv6_addr_equal(&rtnh->rt6i_gateway, ++ &rt->rt6i_gateway)) ++ return err; ++ } ++ ++ nh = kzalloc(sizeof(*nh), GFP_KERNEL); ++ if (!nh) ++ return -ENOMEM; ++ nh->rt6_info = rt; ++ err = ip6_convert_metrics(&nh->mxc, r_cfg); ++ if (err) { ++ kfree(nh); ++ return err; ++ } ++ memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); ++ list_add_tail(&nh->next, rt6_nh_list); ++ ++ return 0; ++} ++ ++static int ip6_route_multipath_add(struct fib6_config *cfg) + { + struct fib6_config r_cfg; + struct rtnexthop *rtnh; ++ struct rt6_info *rt; ++ struct rt6_nh *err_nh; ++ struct rt6_nh *nh, *nh_safe; + int remaining; + int attrlen; +- int err = 0, last_err = 0; ++ int err = 1; ++ int nhn = 0; ++ int replace = (cfg->fc_nlinfo.nlh && ++ (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); ++ LIST_HEAD(rt6_nh_list); + + remaining = cfg->fc_mp_len; +-beginning: + rtnh = (struct rtnexthop *)cfg->fc_mp; + +- /* Parse a Multipath Entry */ ++ /* Parse a Multipath Entry and build a list (rt6_nh_list) of ++ * rt6_info structs per nexthop ++ */ + while (rtnh_ok(rtnh, remaining)) { + memcpy(&r_cfg, cfg, sizeof(*cfg)); + if (rtnh->rtnh_ifindex) +@@ -2524,22 +2606,32 @@ beginning: + r_cfg.fc_flags |= RTF_GATEWAY; + } + } +- err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); ++ ++ err = ip6_route_info_create(&r_cfg, &rt); ++ if (err) ++ goto cleanup; ++ ++ err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); + if (err) { +- last_err = err; +- /* If we are trying to remove a route, do not stop the +- * loop when ip6_route_del() fails (because next hop is +- * already gone), we should try to remove all next hops. +- */ +- if (add) { +- /* If add fails, we should try to delete all +- * next hops that have been already added. +- */ +- add = 0; +- remaining = cfg->fc_mp_len - remaining; +- goto beginning; +- } ++ dst_free(&rt->dst); ++ goto cleanup; ++ } ++ ++ rtnh = rtnh_next(rtnh, &remaining); ++ } ++ ++ err_nh = NULL; ++ list_for_each_entry(nh, &rt6_nh_list, next) { ++ err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); ++ /* nh->rt6_info is used or freed at this point, reset to NULL*/ ++ nh->rt6_info = NULL; ++ if (err) { ++ if (replace && nhn) ++ ip6_print_replace_route_err(&rt6_nh_list); ++ err_nh = nh; ++ goto add_errout; + } ++ + /* Because each route is added like a single route we remove + * these flags after the first nexthop: if there is a collision, + * we have already failed to add the first nexthop: +@@ -2549,6 +2641,63 @@ beginning: + */ + cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | + NLM_F_REPLACE); ++ nhn++; ++ } ++ ++ goto cleanup; ++ ++add_errout: ++ /* Delete routes that were already added */ ++ list_for_each_entry(nh, &rt6_nh_list, next) { ++ if (err_nh == nh) ++ break; ++ ip6_route_del(&nh->r_cfg); ++ } ++ ++cleanup: ++ list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { ++ if (nh->rt6_info) ++ dst_free(&nh->rt6_info->dst); ++ if (nh->mxc.mx) ++ kfree(nh->mxc.mx); ++ list_del(&nh->next); ++ kfree(nh); ++ } ++ ++ return err; ++} ++ ++static int ip6_route_multipath_del(struct fib6_config *cfg) ++{ ++ struct fib6_config r_cfg; ++ struct rtnexthop *rtnh; ++ int remaining; ++ int attrlen; ++ int err = 1, last_err = 0; ++ ++ remaining = cfg->fc_mp_len; ++ rtnh = (struct rtnexthop *)cfg->fc_mp; ++ ++ /* Parse a Multipath Entry */ ++ while (rtnh_ok(rtnh, remaining)) { ++ memcpy(&r_cfg, cfg, sizeof(*cfg)); ++ if (rtnh->rtnh_ifindex) ++ r_cfg.fc_ifindex = rtnh->rtnh_ifindex; ++ ++ attrlen = rtnh_attrlen(rtnh); ++ if (attrlen > 0) { ++ struct nlattr *nla, *attrs = rtnh_attrs(rtnh); ++ ++ nla = nla_find(attrs, attrlen, RTA_GATEWAY); ++ if (nla) { ++ nla_memcpy(&r_cfg.fc_gateway, nla, 16); ++ r_cfg.fc_flags |= RTF_GATEWAY; ++ } ++ } ++ err = ip6_route_del(&r_cfg); ++ if (err) ++ last_err = err; ++ + rtnh = rtnh_next(rtnh, &remaining); + } + +@@ -2565,7 +2714,7 @@ static int inet6_rtm_delroute(struct sk_ + return err; + + if (cfg.fc_mp) +- return ip6_route_multipath(&cfg, 0); ++ return ip6_route_multipath_del(&cfg); + else + return ip6_route_del(&cfg); + } +@@ -2580,7 +2729,7 @@ static int inet6_rtm_newroute(struct sk_ + return err; + + if (cfg.fc_mp) +- return ip6_route_multipath(&cfg, 1); ++ return ip6_route_multipath_add(&cfg); + else + return ip6_route_add(&cfg); + } diff --git a/queue-4.1/macvtap-fix-tunsetsndbuf-values-64k.patch b/queue-4.1/macvtap-fix-tunsetsndbuf-values-64k.patch new file mode 100644 index 00000000000..3b271d286aa --- /dev/null +++ b/queue-4.1/macvtap-fix-tunsetsndbuf-values-64k.patch @@ -0,0 +1,47 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: "Michael S. Tsirkin" +Date: Fri, 18 Sep 2015 13:41:09 +0300 +Subject: macvtap: fix TUNSETSNDBUF values > 64k + +From: "Michael S. Tsirkin" + +[ Upstream commit 3ea79249e81e5ed051f2e6480cbde896d99046e8 ] + +Upon TUNSETSNDBUF, macvtap reads the requested sndbuf size into +a local variable u. +commit 39ec7de7092b ("macvtap: fix uninitialized access on +TUNSETIFF") changed its type to u16 (which is the right thing to +do for all other macvtap ioctls), breaking all values > 64k. + +The value of TUNSETSNDBUF is actually a signed 32 bit integer, so +the right thing to do is to read it into an int. + +Cc: David S. Miller +Fixes: 39ec7de7092b ("macvtap: fix uninitialized access on TUNSETIFF") +Reported-by: Mark A. Peloquin +Bisected-by: Matthew Rosato +Reported-by: Christian Borntraeger +Signed-off-by: Michael S. Tsirkin +Tested-by: Matthew Rosato +Acked-by: Christian Borntraeger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvtap.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -1054,10 +1054,10 @@ static long macvtap_ioctl(struct file *f + return 0; + + case TUNSETSNDBUF: +- if (get_user(u, up)) ++ if (get_user(s, sp)) + return -EFAULT; + +- q->sk.sk_sndbuf = u; ++ q->sk.sk_sndbuf = s; + return 0; + + case TUNGETVNETHDRSZ: diff --git a/queue-4.1/mvneta-use-inband-status-only-when-explicitly-enabled.patch b/queue-4.1/mvneta-use-inband-status-only-when-explicitly-enabled.patch new file mode 100644 index 00000000000..b6fb5780d0a --- /dev/null +++ b/queue-4.1/mvneta-use-inband-status-only-when-explicitly-enabled.patch @@ -0,0 +1,66 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Stas Sergeev +Date: Mon, 20 Jul 2015 17:49:58 -0700 +Subject: mvneta: use inband status only when explicitly enabled + +From: Stas Sergeev + +[ Upstream commit f8af8e6eb95093d5ce5ebcc52bd1929b0433e172 in net-next tree, + will be pushed to Linus very soon. ] + +The commit 898b2970e2c9 ("mvneta: implement SGMII-based in-band link state +signaling") implemented the link parameters auto-negotiation unconditionally. +Unfortunately it appears that some HW that implements SGMII protocol, +doesn't generate the inband status, so it is not possible to auto-negotiate +anything with such HW. + +This patch enables the auto-negotiation only if explicitly requested with +the 'managed' DT property. + +This patch fixes the following regression: +https://lkml.org/lkml/2015/7/8/865 + +Signed-off-by: Stas Sergeev + +CC: Thomas Petazzoni +CC: netdev@vger.kernel.org +CC: linux-kernel@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -3029,8 +3029,8 @@ static int mvneta_probe(struct platform_ + const char *dt_mac_addr; + char hw_mac_addr[ETH_ALEN]; + const char *mac_from; ++ const char *managed; + int phy_mode; +- int fixed_phy = 0; + int err; + + /* Our multiqueue support is not complete, so for now, only +@@ -3064,7 +3064,6 @@ static int mvneta_probe(struct platform_ + dev_err(&pdev->dev, "cannot register fixed PHY\n"); + goto err_free_irq; + } +- fixed_phy = 1; + + /* In the case of a fixed PHY, the DT node associated + * to the PHY is the Ethernet MAC DT node. +@@ -3088,8 +3087,10 @@ static int mvneta_probe(struct platform_ + pp = netdev_priv(dev); + pp->phy_node = phy_node; + pp->phy_interface = phy_mode; +- pp->use_inband_status = (phy_mode == PHY_INTERFACE_MODE_SGMII) && +- fixed_phy; ++ ++ err = of_property_read_string(dn, "managed", &managed); ++ pp->use_inband_status = (err == 0 && ++ strcmp(managed, "in-band-status") == 0); + + pp->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(pp->clk)) { diff --git a/queue-4.1/net-dsa-bcm_sf2-do-not-override-speed-settings.patch b/queue-4.1/net-dsa-bcm_sf2-do-not-override-speed-settings.patch new file mode 100644 index 00000000000..b803d28c5ad --- /dev/null +++ b/queue-4.1/net-dsa-bcm_sf2-do-not-override-speed-settings.patch @@ -0,0 +1,68 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Florian Fainelli +Date: Mon, 20 Jul 2015 17:49:55 -0700 +Subject: net: dsa: bcm_sf2: Do not override speed settings + +From: Florian Fainelli + +[ Upstream d2eac98f7d1b950b762a7eca05a9ce0ea1d878d2 in net-next tree, + will be pushed to Linus very soon. ] + +The SF2 driver currently overrides speed settings for its port +configured using a fixed PHY, this is both unnecessary and incorrect, +because we keep feedback to the hardware parameters that we read from +the PHY device, which in the case of a fixed PHY cannot possibly change +speed. + +This is a required change to allow the fixed PHY code to allow +registering a PHY with a link configured as DOWN by default and avoid +some sort of circular dependency where we require the link_update +callback to run to program the hardware, and we then utilize the fixed +PHY parameters to program the hardware with the same settings. + +Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 18 +----------------- + 1 file changed, 1 insertion(+), 17 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -893,15 +893,11 @@ static void bcm_sf2_sw_fixed_link_update + struct fixed_phy_status *status) + { + struct bcm_sf2_priv *priv = ds_to_priv(ds); +- u32 duplex, pause, speed; ++ u32 duplex, pause; + u32 reg; + + duplex = core_readl(priv, CORE_DUPSTS); + pause = core_readl(priv, CORE_PAUSESTS); +- speed = core_readl(priv, CORE_SPDSTS); +- +- speed >>= (port * SPDSTS_SHIFT); +- speed &= SPDSTS_MASK; + + status->link = 0; + +@@ -929,18 +925,6 @@ static void bcm_sf2_sw_fixed_link_update + reg &= ~LINK_STS; + core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port)); + +- switch (speed) { +- case SPDSTS_10: +- status->speed = SPEED_10; +- break; +- case SPDSTS_100: +- status->speed = SPEED_100; +- break; +- case SPDSTS_1000: +- status->speed = SPEED_1000; +- break; +- } +- + if ((pause & (1 << port)) && + (pause & (1 << (port + PAUSESTS_TX_PAUSE_SHIFT)))) { + status->asym_pause = 1; diff --git a/queue-4.1/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch b/queue-4.1/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch new file mode 100644 index 00000000000..4f0cea0a7d2 --- /dev/null +++ b/queue-4.1/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch @@ -0,0 +1,36 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Florian Fainelli +Date: Tue, 8 Sep 2015 20:06:41 -0700 +Subject: net: dsa: bcm_sf2: Fix 64-bits register writes + +From: Florian Fainelli + +[ Upstream commit 03679a14739a0d4c14b52ba65a69ff553bfba73b ] + +The macro to write 64-bits quantities to the 32-bits register swapped +the value and offsets arguments, we want to preserve the ordering of the +arguments with respect to how writel() is implemented for instance: +value first, offset/base second. + +Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") +Signed-off-by: Florian Fainelli +Reviewed-by: Vivien Didelot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.h ++++ b/drivers/net/dsa/bcm_sf2.h +@@ -112,8 +112,8 @@ static inline u64 name##_readq(struct bc + spin_unlock(&priv->indir_lock); \ + return (u64)indir << 32 | dir; \ + } \ +-static inline void name##_writeq(struct bcm_sf2_priv *priv, u32 off, \ +- u64 val) \ ++static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val, \ ++ u32 off) \ + { \ + spin_lock(&priv->indir_lock); \ + reg_writel(priv, upper_32_bits(val), REG_DIR_DATA_WRITE); \ diff --git a/queue-4.1/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch b/queue-4.1/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch new file mode 100644 index 00000000000..fd00ec2098d --- /dev/null +++ b/queue-4.1/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch @@ -0,0 +1,74 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Florian Fainelli +Date: Sat, 5 Sep 2015 13:07:27 -0700 +Subject: net: dsa: bcm_sf2: Fix ageing conditions and operation + +From: Florian Fainelli + +[ Upstream commit 39797a279d62972cd914ef580fdfacb13e508bf8 ] + +The comparison check between cur_hw_state and hw_state is currently +invalid because cur_hw_state is right shifted by G_MISTP_SHIFT, while +hw_state is not, so we end-up comparing bits 2:0 with bits 7:5, which is +going to cause an additional aging to occur. Fix this by not shifting +cur_hw_state while reading it, but instead, mask the value with the +appropriately shitfted bitmask. + +The other problem with the fast-ageing process is that we did not set +the EN_AGE_DYNAMIC bit to request the ageing to occur for dynamically +learned MAC addresses. Finally, write back 0 to the FAST_AGE_CTRL +register to avoid leaving spurious bits sets from one operation to the +other. + +Fixes: 12f460f23423 ("net: dsa: bcm_sf2: add HW bridging support") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -417,7 +417,7 @@ static int bcm_sf2_sw_fast_age_port(stru + core_writel(priv, port, CORE_FAST_AGE_PORT); + + reg = core_readl(priv, CORE_FAST_AGE_CTRL); +- reg |= EN_AGE_PORT | FAST_AGE_STR_DONE; ++ reg |= EN_AGE_PORT | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE; + core_writel(priv, reg, CORE_FAST_AGE_CTRL); + + do { +@@ -431,6 +431,8 @@ static int bcm_sf2_sw_fast_age_port(stru + if (!timeout) + return -ETIMEDOUT; + ++ core_writel(priv, 0, CORE_FAST_AGE_CTRL); ++ + return 0; + } + +@@ -506,7 +508,7 @@ static int bcm_sf2_sw_br_set_stp_state(s + u32 reg; + + reg = core_readl(priv, CORE_G_PCTL_PORT(port)); +- cur_hw_state = reg >> G_MISTP_STATE_SHIFT; ++ cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT); + + switch (state) { + case BR_STATE_DISABLED: +@@ -530,10 +532,12 @@ static int bcm_sf2_sw_br_set_stp_state(s + } + + /* Fast-age ARL entries if we are moving a port from Learning or +- * Forwarding state to Disabled, Blocking or Listening state ++ * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening ++ * state (hw_state) + */ + if (cur_hw_state != hw_state) { +- if (cur_hw_state & 4 && !(hw_state & 4)) { ++ if (cur_hw_state >= G_MISTP_LEARN_STATE && ++ hw_state <= G_MISTP_LISTEN_STATE) { + ret = bcm_sf2_sw_fast_age_port(ds, port); + if (ret) { + pr_err("%s: fast-ageing failed\n", __func__); diff --git a/queue-4.1/net-eth-altera-fix-napi-poll_list-corruption.patch b/queue-4.1/net-eth-altera-fix-napi-poll_list-corruption.patch new file mode 100644 index 00000000000..e52f0cd0494 --- /dev/null +++ b/queue-4.1/net-eth-altera-fix-napi-poll_list-corruption.patch @@ -0,0 +1,32 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Atsushi Nemoto +Date: Wed, 2 Sep 2015 17:49:29 +0900 +Subject: net: eth: altera: fix napi poll_list corruption + +From: Atsushi Nemoto + +[ Upstream commit 4548a697e4969d695047cebd6d9af5e2f6cc728e ] + +tse_poll() calls __napi_complete() with irq enabled. This leads napi +poll_list corruption and may stop all napi drivers working. +Use napi_complete() instead of __napi_complete(). + +Signed-off-by: Atsushi Nemoto +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/altera/altera_tse_main.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/ethernet/altera/altera_tse_main.c ++++ b/drivers/net/ethernet/altera/altera_tse_main.c +@@ -511,8 +511,7 @@ static int tse_poll(struct napi_struct * + + if (rxcomplete < budget) { + +- napi_gro_flush(napi, false); +- __napi_complete(napi); ++ napi_complete(napi); + + netdev_dbg(priv->dev, + "NAPI Complete, did %d packets with budget %d\n", diff --git a/queue-4.1/net-fec-clear-receive-interrupts-before-processing-a-packet.patch b/queue-4.1/net-fec-clear-receive-interrupts-before-processing-a-packet.patch new file mode 100644 index 00000000000..2bfad7a2368 --- /dev/null +++ b/queue-4.1/net-fec-clear-receive-interrupts-before-processing-a-packet.patch @@ -0,0 +1,39 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Russell King +Date: Wed, 2 Sep 2015 17:24:14 +0800 +Subject: net: fec: clear receive interrupts before processing a packet + +From: Russell King + +[ Upstream commit ed63f1dcd5788d36f942fbcce350742385e3e18c ] + +The patch just to re-submit the patch "db3421c114cfa6326" because the +patch "4d494cdc92b3b9a0" remove the change. + +Clear any pending receive interrupt before we process a pending packet. +This helps to avoid any spurious interrupts being raised after we have +fully cleaned the receive ring, while still allowing an interrupt to be +raised if we receive another packet. + +The position of this is critical: we must do this prior to reading the +next packet status to avoid potentially dropping an interrupt when a +packet is still pending. + +Acked-by: Fugang Duan +Signed-off-by: Russell King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/fec_main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -1396,6 +1396,7 @@ fec_enet_rx_queue(struct net_device *nde + if ((status & BD_ENET_RX_LAST) == 0) + netdev_err(ndev, "rcv is not +last\n"); + ++ writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT); + + /* Check for errors. */ + if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | diff --git a/queue-4.1/net-ipv6-correct-pim6-mrt_lock-handling.patch b/queue-4.1/net-ipv6-correct-pim6-mrt_lock-handling.patch new file mode 100644 index 00000000000..22d7d6adcd3 --- /dev/null +++ b/queue-4.1/net-ipv6-correct-pim6-mrt_lock-handling.patch @@ -0,0 +1,35 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Richard Laing +Date: Thu, 3 Sep 2015 13:52:31 +1200 +Subject: net/ipv6: Correct PIM6 mrt_lock handling + +From: Richard Laing + +[ Upstream commit 25b4a44c19c83d98e8c0807a7ede07c1f28eab8b ] + +In the IPv6 multicast routing code the mrt_lock was not being released +correctly in the MFC iterator, as a result adding or deleting a MIF would +cause a hang because the mrt_lock could not be acquired. + +This fix is a copy of the code for the IPv4 case and ensures that the lock +is released correctly. + +Signed-off-by: Richard Laing +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6mr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/ip6mr.c ++++ b/net/ipv6/ip6mr.c +@@ -550,7 +550,7 @@ static void ipmr_mfc_seq_stop(struct seq + + if (it->cache == &mrt->mfc6_unres_queue) + spin_unlock_bh(&mfc_unres_lock); +- else if (it->cache == mrt->mfc6_cache_array) ++ else if (it->cache == &mrt->mfc6_cache_array[it->ct]) + read_unlock(&mrt_lock); + } + diff --git a/queue-4.1/net-mlx4_en-really-allow-to-change-rss-key.patch b/queue-4.1/net-mlx4_en-really-allow-to-change-rss-key.patch new file mode 100644 index 00000000000..8537f2f5a7a --- /dev/null +++ b/queue-4.1/net-mlx4_en-really-allow-to-change-rss-key.patch @@ -0,0 +1,35 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Eric Dumazet +Date: Tue, 15 Sep 2015 18:29:47 -0700 +Subject: net/mlx4_en: really allow to change RSS key + +From: Eric Dumazet + +[ Upsteam commit 4671fc6d47e0a0108fe24a4d830347d6a6ef4aa7 ] + +When changing rss key, we do not want to overwrite user provided key +by the one provided by netdev_rss_key_fill(), which is the host random +key generated at boot time. + +Fixes: 947cbb0ac242 ("net/mlx4_en: Support for configurable RSS hash function") +Signed-off-by: Eric Dumazet +Cc: Eyal Perry +CC: Amir Vadai +Acked-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c +@@ -1256,8 +1256,6 @@ int mlx4_en_config_rss_steer(struct mlx4 + rss_context->hash_fn = MLX4_RSS_HASH_TOP; + memcpy(rss_context->rss_key, priv->rss_key, + MLX4_EN_RSS_KEY_SIZE); +- netdev_rss_key_fill(rss_context->rss_key, +- MLX4_EN_RSS_KEY_SIZE); + } else { + en_err(priv, "Unknown RSS hash function requested\n"); + err = -EINVAL; diff --git a/queue-4.1/net-phy-fixed_phy-handle-link-down-case.patch b/queue-4.1/net-phy-fixed_phy-handle-link-down-case.patch new file mode 100644 index 00000000000..a1e18129677 --- /dev/null +++ b/queue-4.1/net-phy-fixed_phy-handle-link-down-case.patch @@ -0,0 +1,65 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Stas Sergeev +Date: Mon, 20 Jul 2015 17:49:56 -0700 +Subject: net: phy: fixed_phy: handle link-down case + +From: Stas Sergeev + +[ Upstream 868a4215be9a6d80548ccb74763b883dc99d32a2 in net-next tree, + will be pushed to Linus very soon. ] + +fixed_phy_register() currently hardcodes the fixed PHY link to 1, and +expects to find a "speed" parameter to provide correct information +towards the fixed PHY consumer. + +In a subsequent change, where we allow "managed" (e.g: (RS)GMII in-band +status auto-negotiation) fixed PHYs, none of these parameters can be +provided since they will be auto-negotiated, hence, we just provide a +zero-initialized fixed_phy_status to fixed_phy_register() which makes it +fail when we call fixed_phy_update_regs() since status.speed = 0 which +makes us hit the "default" label and error out. + +Without this change, we would also see potentially inconsistent +speed/duplex parameters for fixed PHYs when the link is DOWN. + +CC: netdev@vger.kernel.org +CC: linux-kernel@vger.kernel.org +Signed-off-by: Stas Sergeev +[florian: add more background to why this is correct and desirable] +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/fixed_phy.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/net/phy/fixed_phy.c ++++ b/drivers/net/phy/fixed_phy.c +@@ -52,6 +52,10 @@ static int fixed_phy_update_regs(struct + u16 lpagb = 0; + u16 lpa = 0; + ++ if (!fp->status.link) ++ goto done; ++ bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE; ++ + if (fp->status.duplex) { + bmcr |= BMCR_FULLDPLX; + +@@ -96,15 +100,13 @@ static int fixed_phy_update_regs(struct + } + } + +- if (fp->status.link) +- bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE; +- + if (fp->status.pause) + lpa |= LPA_PAUSE_CAP; + + if (fp->status.asym_pause) + lpa |= LPA_PAUSE_ASYM; + ++done: + fp->regs[MII_PHYSID1] = 0; + fp->regs[MII_PHYSID2] = 0; + diff --git a/queue-4.1/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch b/queue-4.1/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch new file mode 100644 index 00000000000..172e1fa4246 --- /dev/null +++ b/queue-4.1/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch @@ -0,0 +1,93 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: WANG Cong +Date: Tue, 22 Sep 2015 17:01:11 -0700 +Subject: net: revert "net_sched: move tp->root allocation into fw_init()" + +From: WANG Cong + +[ Upstream commit d8aecb10115497f6cdf841df8c88ebb3ba25fa28 ] + +fw filter uses tp->root==NULL to check if it is the old method, +so it doesn't need allocation at all in this case. This patch +reverts the offending commit and adds some comments for old +method to make it obvious. + +Fixes: 33f8b9ecdb15 ("net_sched: move tp->root allocation into fw_init()") +Reported-by: Akshat Kakkar +Cc: Jamal Hadi Salim +Signed-off-by: Cong Wang +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_fw.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +--- a/net/sched/cls_fw.c ++++ b/net/sched/cls_fw.c +@@ -33,7 +33,6 @@ + + struct fw_head { + u32 mask; +- bool mask_set; + struct fw_filter __rcu *ht[HTSIZE]; + struct rcu_head rcu; + }; +@@ -84,7 +83,7 @@ static int fw_classify(struct sk_buff *s + } + } + } else { +- /* old method */ ++ /* Old method: classify the packet using its skb mark. */ + if (id && (TC_H_MAJ(id) == 0 || + !(TC_H_MAJ(id ^ tp->q->handle)))) { + res->classid = id; +@@ -114,14 +113,9 @@ static unsigned long fw_get(struct tcf_p + + static int fw_init(struct tcf_proto *tp) + { +- struct fw_head *head; +- +- head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); +- if (head == NULL) +- return -ENOBUFS; +- +- head->mask_set = false; +- rcu_assign_pointer(tp->root, head); ++ /* We don't allocate fw_head here, because in the old method ++ * we don't need it at all. ++ */ + return 0; + } + +@@ -252,7 +246,7 @@ static int fw_change(struct net *net, st + int err; + + if (!opt) +- return handle ? -EINVAL : 0; ++ return handle ? -EINVAL : 0; /* Succeed if it is old method. */ + + err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy); + if (err < 0) +@@ -302,11 +296,17 @@ static int fw_change(struct net *net, st + if (!handle) + return -EINVAL; + +- if (!head->mask_set) { +- head->mask = 0xFFFFFFFF; ++ if (!head) { ++ u32 mask = 0xFFFFFFFF; + if (tb[TCA_FW_MASK]) +- head->mask = nla_get_u32(tb[TCA_FW_MASK]); +- head->mask_set = true; ++ mask = nla_get_u32(tb[TCA_FW_MASK]); ++ ++ head = kzalloc(sizeof(*head), GFP_KERNEL); ++ if (!head) ++ return -ENOBUFS; ++ head->mask = mask; ++ ++ rcu_assign_pointer(tp->root, head); + } + + f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); diff --git a/queue-4.1/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch b/queue-4.1/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch new file mode 100644 index 00000000000..09ef4c98ebe --- /dev/null +++ b/queue-4.1/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch @@ -0,0 +1,94 @@ +From foo@baz Wed Sep 30 05:22:23 CEST 2015 +From: Herbert Xu +Date: Fri, 18 Sep 2015 19:16:50 +0800 +Subject: netlink: Fix autobind race condition that leads to zero port ID +Status: RO +Content-Length: 3101 +Lines: 90 + +From: Herbert Xu + +[ Upstream commit 1f770c0a09da855a2b51af6d19de97fb955eca85 ] + +The commit c0bb07df7d981e4091432754e30c9c720e2c0c78 ("netlink: +Reset portid after netlink_insert failure") introduced a race +condition where if two threads try to autobind the same socket +one of them may end up with a zero port ID. This led to kernel +deadlocks that were observed by multiple people. + +This patch reverts that commit and instead fixes it by introducing +a separte rhash_portid variable so that the real portid is only set +after the socket has been successfully hashed. + +Fixes: c0bb07df7d98 ("netlink: Reset portid after netlink_insert failure") +Reported-by: Tejun Heo +Reported-by: Linus Torvalds +Signed-off-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 11 ++++++----- + net/netlink/af_netlink.h | 1 + + 2 files changed, 7 insertions(+), 5 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -1017,7 +1017,7 @@ static inline int netlink_compare(struct + const struct netlink_compare_arg *x = arg->key; + const struct netlink_sock *nlk = ptr; + +- return nlk->portid != x->portid || ++ return nlk->rhash_portid != x->portid || + !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); + } + +@@ -1043,7 +1043,7 @@ static int __netlink_insert(struct netli + { + struct netlink_compare_arg arg; + +- netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); ++ netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->rhash_portid); + return rhashtable_lookup_insert_key(&table->hash, &arg, + &nlk_sk(sk)->node, + netlink_rhashtable_params); +@@ -1105,7 +1105,7 @@ static int netlink_insert(struct sock *s + unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) + goto err; + +- nlk_sk(sk)->portid = portid; ++ nlk_sk(sk)->rhash_portid = portid; + sock_hold(sk); + + err = __netlink_insert(table, sk); +@@ -1117,10 +1117,11 @@ static int netlink_insert(struct sock *s + err = -EOVERFLOW; + if (err == -EEXIST) + err = -EADDRINUSE; +- nlk_sk(sk)->portid = 0; + sock_put(sk); + } + ++ nlk_sk(sk)->portid = portid; ++ + err: + release_sock(sk); + return err; +@@ -3167,7 +3168,7 @@ static inline u32 netlink_hash(const voi + const struct netlink_sock *nlk = data; + struct netlink_compare_arg arg; + +- netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); ++ netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->rhash_portid); + return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); + } + +--- a/net/netlink/af_netlink.h ++++ b/net/netlink/af_netlink.h +@@ -25,6 +25,7 @@ struct netlink_ring { + struct netlink_sock { + /* struct sock has to be the first member of netlink_sock */ + struct sock sk; ++ u32 rhash_portid; + u32 portid; + u32 dst_portid; + u32 dst_group; diff --git a/queue-4.1/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch b/queue-4.1/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch new file mode 100644 index 00000000000..2d57a213b6f --- /dev/null +++ b/queue-4.1/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch @@ -0,0 +1,116 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Daniel Borkmann +Date: Thu, 10 Sep 2015 20:05:46 +0200 +Subject: netlink, mmap: transform mmap skb into full skb on taps + +From: Daniel Borkmann + +[ Upstream commit 1853c949646005b5959c483becde86608f548f24 ] + +Ken-ichirou reported that running netlink in mmap mode for receive in +combination with nlmon will throw a NULL pointer dereference in +__kfree_skb() on nlmon_xmit(), in my case I can also trigger an "unable +to handle kernel paging request". The problem is the skb_clone() in +__netlink_deliver_tap_skb() for skbs that are mmaped. + +I.e. the cloned skb doesn't have a destructor, whereas the mmap netlink +skb has it pointed to netlink_skb_destructor(), set in the handler +netlink_ring_setup_skb(). There, skb->head is being set to NULL, so +that in such cases, __kfree_skb() doesn't perform a skb_release_data() +via skb_release_all(), where skb->head is possibly being freed through +kfree(head) into slab allocator, although netlink mmap skb->head points +to the mmap buffer. Similarly, the same has to be done also for large +netlink skbs where the data area is vmalloced. Therefore, as discussed, +make a copy for these rather rare cases for now. This fixes the issue +on my and Ken-ichirou's test-cases. + +Reference: http://thread.gmane.org/gmane.linux.network/371129 +Fixes: bcbde0d449ed ("net: netlink: virtual tap device management") +Reported-by: Ken-ichirou MATSUZAWA +Signed-off-by: Daniel Borkmann +Tested-by: Ken-ichirou MATSUZAWA +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 30 +++++++++++++++++++++++------- + net/netlink/af_netlink.h | 9 +++++++++ + 2 files changed, 32 insertions(+), 7 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -123,6 +123,24 @@ static inline u32 netlink_group_mask(u32 + return group ? 1 << (group - 1) : 0; + } + ++static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, ++ gfp_t gfp_mask) ++{ ++ unsigned int len = skb_end_offset(skb); ++ struct sk_buff *new; ++ ++ new = alloc_skb(len, gfp_mask); ++ if (new == NULL) ++ return NULL; ++ ++ NETLINK_CB(new).portid = NETLINK_CB(skb).portid; ++ NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; ++ NETLINK_CB(new).creds = NETLINK_CB(skb).creds; ++ ++ memcpy(skb_put(new, len), skb->data, len); ++ return new; ++} ++ + int netlink_add_tap(struct netlink_tap *nt) + { + if (unlikely(nt->dev->type != ARPHRD_NETLINK)) +@@ -204,7 +222,11 @@ static int __netlink_deliver_tap_skb(str + int ret = -ENOMEM; + + dev_hold(dev); +- nskb = skb_clone(skb, GFP_ATOMIC); ++ ++ if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) ++ nskb = netlink_to_full_skb(skb, GFP_ATOMIC); ++ else ++ nskb = skb_clone(skb, GFP_ATOMIC); + if (nskb) { + nskb->dev = dev; + nskb->protocol = htons((u16) sk->sk_protocol); +@@ -276,11 +298,6 @@ static void netlink_rcv_wake(struct sock + } + + #ifdef CONFIG_NETLINK_MMAP +-static bool netlink_skb_is_mmaped(const struct sk_buff *skb) +-{ +- return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; +-} +- + static bool netlink_rx_is_mmaped(struct sock *sk) + { + return nlk_sk(sk)->rx_ring.pg_vec != NULL; +@@ -832,7 +849,6 @@ static void netlink_ring_set_copied(stru + } + + #else /* CONFIG_NETLINK_MMAP */ +-#define netlink_skb_is_mmaped(skb) false + #define netlink_rx_is_mmaped(sk) false + #define netlink_tx_is_mmaped(sk) false + #define netlink_mmap sock_no_mmap +--- a/net/netlink/af_netlink.h ++++ b/net/netlink/af_netlink.h +@@ -59,6 +59,15 @@ static inline struct netlink_sock *nlk_s + return container_of(sk, struct netlink_sock, sk); + } + ++static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) ++{ ++#ifdef CONFIG_NETLINK_MMAP ++ return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; ++#else ++ return false; ++#endif /* CONFIG_NETLINK_MMAP */ ++} ++ + struct netlink_table { + struct rhashtable hash; + struct hlist_head mc_list; diff --git a/queue-4.1/netlink-replace-rhash_portid-with-bound.patch b/queue-4.1/netlink-replace-rhash_portid-with-bound.patch new file mode 100644 index 00000000000..178bec86911 --- /dev/null +++ b/queue-4.1/netlink-replace-rhash_portid-with-bound.patch @@ -0,0 +1,248 @@ +From foo@baz Wed Sep 30 05:22:23 CEST 2015 +From: Herbert Xu +Date: Tue, 22 Sep 2015 11:38:56 +0800 +Subject: netlink: Replace rhash_portid with bound +Status: RO +Content-Length: 8459 +Lines: 244 + +From: Herbert Xu + +[ Upstream commit da314c9923fed553a007785a901fd395b7eb6c19 ] + +On Mon, Sep 21, 2015 at 02:20:22PM -0400, Tejun Heo wrote: +> +> store_release and load_acquire are different from the usual memory +> barriers and can't be paired this way. You have to pair store_release +> and load_acquire. Besides, it isn't a particularly good idea to + +OK I've decided to drop the acquire/release helpers as they don't +help us at all and simply pessimises the code by using full memory +barriers (on some architectures) where only a write or read barrier +is needed. + +> depend on memory barriers embedded in other data structures like the +> above. Here, especially, rhashtable_insert() would have write barrier +> *before* the entry is hashed not necessarily *after*, which means that +> in the above case, a socket which appears to have set bound to a +> reader might not visible when the reader tries to look up the socket +> on the hashtable. + +But you are right we do need an explicit write barrier here to +ensure that the hashing is visible. + +> There's no reason to be overly smart here. This isn't a crazy hot +> path, write barriers tend to be very cheap, store_release more so. +> Please just do smp_store_release() and note what it's paired with. + +It's not about being overly smart. It's about actually understanding +what's going on with the code. I've seen too many instances of +people simply sprinkling synchronisation primitives around without +any knowledge of what is happening underneath, which is just a recipe +for creating hard-to-debug races. + +> > @@ -1539,7 +1546,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, +> > } +> > } +> > +> > - if (!nlk->portid) { +> > + if (!nlk->bound) { +> +> I don't think you can skip load_acquire here just because this is the +> second deref of the variable. That doesn't change anything. Race +> condition could still happen between the first and second tests and +> skipping the second would lead to the same kind of bug. + +The reason this one is OK is because we do not use nlk->portid or +try to get nlk from the hash table before we return to user-space. + +However, there is a real bug here that none of these acquire/release +helpers discovered. The two bound tests here used to be a single +one. Now that they are separate it is entirely possible for another +thread to come in the middle and bind the socket. So we need to +repeat the portid check in order to maintain consistency. + +> > @@ -1587,7 +1594,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, +> > !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) +> > return -EPERM; +> > +> > - if (!nlk->portid) +> > + if (!nlk->bound) +> +> Don't we need load_acquire here too? Is this path holding a lock +> which makes that unnecessary? + +Ditto. + +---8<--- +The commit 1f770c0a09da855a2b51af6d19de97fb955eca85 ("netlink: +Fix autobind race condition that leads to zero port ID") created +some new races that can occur due to inconcsistencies between the +two port IDs. + +Tejun is right that a barrier is unavoidable. Therefore I am +reverting to the original patch that used a boolean to indicate +that a user netlink socket has been bound. + +Barriers have been added where necessary to ensure that a valid +portid and the hashed socket is visible. + +I have also changed netlink_insert to only return EBUSY if the +socket is bound to a portid different to the requested one. This +combined with only reading nlk->bound once in netlink_bind fixes +a race where two threads that bind the socket at the same time +with different port IDs may both succeed. + +Fixes: 1f770c0a09da ("netlink: Fix autobind race condition that leads to zero port ID") +Reported-by: Tejun Heo +Reported-by: Linus Torvalds +Signed-off-by: Herbert Xu +Nacked-by: Tejun Heo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 39 ++++++++++++++++++++++++++++----------- + net/netlink/af_netlink.h | 2 +- + 2 files changed, 29 insertions(+), 12 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -1017,7 +1017,7 @@ static inline int netlink_compare(struct + const struct netlink_compare_arg *x = arg->key; + const struct netlink_sock *nlk = ptr; + +- return nlk->rhash_portid != x->portid || ++ return nlk->portid != x->portid || + !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet)); + } + +@@ -1043,7 +1043,7 @@ static int __netlink_insert(struct netli + { + struct netlink_compare_arg arg; + +- netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->rhash_portid); ++ netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid); + return rhashtable_lookup_insert_key(&table->hash, &arg, + &nlk_sk(sk)->node, + netlink_rhashtable_params); +@@ -1096,8 +1096,8 @@ static int netlink_insert(struct sock *s + + lock_sock(sk); + +- err = -EBUSY; +- if (nlk_sk(sk)->portid) ++ err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; ++ if (nlk_sk(sk)->bound) + goto err; + + err = -ENOMEM; +@@ -1105,7 +1105,7 @@ static int netlink_insert(struct sock *s + unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) + goto err; + +- nlk_sk(sk)->rhash_portid = portid; ++ nlk_sk(sk)->portid = portid; + sock_hold(sk); + + err = __netlink_insert(table, sk); +@@ -1120,7 +1120,9 @@ static int netlink_insert(struct sock *s + sock_put(sk); + } + +- nlk_sk(sk)->portid = portid; ++ /* We need to ensure that the socket is hashed and visible. */ ++ smp_wmb(); ++ nlk_sk(sk)->bound = portid; + + err: + release_sock(sk); +@@ -1501,6 +1503,7 @@ static int netlink_bind(struct socket *s + struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; + int err; + long unsigned int groups = nladdr->nl_groups; ++ bool bound; + + if (addr_len < sizeof(struct sockaddr_nl)) + return -EINVAL; +@@ -1517,9 +1520,14 @@ static int netlink_bind(struct socket *s + return err; + } + +- if (nlk->portid) ++ bound = nlk->bound; ++ if (bound) { ++ /* Ensure nlk->portid is up-to-date. */ ++ smp_rmb(); ++ + if (nladdr->nl_pid != nlk->portid) + return -EINVAL; ++ } + + if (nlk->netlink_bind && groups) { + int group; +@@ -1535,7 +1543,10 @@ static int netlink_bind(struct socket *s + } + } + +- if (!nlk->portid) { ++ /* No need for barriers here as we return to user-space without ++ * using any of the bound attributes. ++ */ ++ if (!bound) { + err = nladdr->nl_pid ? + netlink_insert(sk, nladdr->nl_pid) : + netlink_autobind(sock); +@@ -1583,7 +1594,10 @@ static int netlink_connect(struct socket + !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) + return -EPERM; + +- if (!nlk->portid) ++ /* No need for barriers here as we return to user-space without ++ * using any of the bound attributes. ++ */ ++ if (!nlk->bound) + err = netlink_autobind(sock); + + if (err == 0) { +@@ -2340,10 +2354,13 @@ static int netlink_sendmsg(struct socket + dst_group = nlk->dst_group; + } + +- if (!nlk->portid) { ++ if (!nlk->bound) { + err = netlink_autobind(sock); + if (err) + goto out; ++ } else { ++ /* Ensure nlk is hashed and visible. */ ++ smp_rmb(); + } + + /* It's a really convoluted way for userland to ask for mmaped +@@ -3168,7 +3185,7 @@ static inline u32 netlink_hash(const voi + const struct netlink_sock *nlk = data; + struct netlink_compare_arg arg; + +- netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->rhash_portid); ++ netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); + return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); + } + +--- a/net/netlink/af_netlink.h ++++ b/net/netlink/af_netlink.h +@@ -25,7 +25,6 @@ struct netlink_ring { + struct netlink_sock { + /* struct sock has to be the first member of netlink_sock */ + struct sock sk; +- u32 rhash_portid; + u32 portid; + u32 dst_portid; + u32 dst_group; +@@ -36,6 +35,7 @@ struct netlink_sock { + unsigned long state; + size_t max_recvmsg_len; + wait_queue_head_t wait; ++ bool bound; + bool cb_running; + struct netlink_callback cb; + struct mutex *cb_mutex; diff --git a/queue-4.1/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch b/queue-4.1/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch new file mode 100644 index 00000000000..6e1d0f6832d --- /dev/null +++ b/queue-4.1/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch @@ -0,0 +1,106 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Stas Sergeev +Date: Mon, 20 Jul 2015 17:49:57 -0700 +Subject: of_mdio: add new DT property 'managed' to specify the PHY management type + +From: Stas Sergeev + +[ Upstream commit 4cba5c2103657d43d0886e4cff8004d95a3d0def in net-next tree, + will be pushed to Linus very soon. ] + +Currently the PHY management type is selected by the MAC driver arbitrary. +The decision is based on the presence of the "fixed-link" node and on a +will of the driver's authors. +This caused a regression recently, when mvneta driver suddenly started +to use the in-band status for auto-negotiation on fixed links. +It appears the auto-negotiation may not work when expected by the MAC driver. +Sebastien Rannou explains: +<< Yes, I confirm that my HW does not generate an in-band status. AFAIK, it's +a PHY that aggregates 4xSGMIIs to 1xQSGMII ; the MAC side of the PHY (with +inband status) is connected to the switch through QSGMII, and in this context +we are on the media side of the PHY. >> +https://lkml.org/lkml/2015/7/10/206 + +This patch introduces the new string property 'managed' that allows +the user to set the management type explicitly. +The supported values are: +"auto" - default. Uses either MDIO or nothing, depending on the presence +of the fixed-link node +"in-band-status" - use in-band status + +Signed-off-by: Stas Sergeev + +CC: Rob Herring +CC: Pawel Moll +CC: Mark Rutland +CC: Ian Campbell +CC: Kumar Gala +CC: Florian Fainelli +CC: Grant Likely +CC: devicetree@vger.kernel.org +CC: linux-kernel@vger.kernel.org +CC: netdev@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/devicetree/bindings/net/ethernet.txt | 4 ++++ + drivers/of/of_mdio.c | 19 +++++++++++++++++-- + 2 files changed, 21 insertions(+), 2 deletions(-) + +--- a/Documentation/devicetree/bindings/net/ethernet.txt ++++ b/Documentation/devicetree/bindings/net/ethernet.txt +@@ -25,7 +25,11 @@ The following properties are common to t + flow control thresholds. + - tx-fifo-depth: the size of the controller's transmit fifo in bytes. This + is used for components that can have configurable fifo sizes. ++- managed: string, specifies the PHY management type. Supported values are: ++ "auto", "in-band-status". "auto" is the default, it usess MDIO for ++ management if fixed-link is not specified. + + Child nodes of the Ethernet controller are typically the individual PHY devices + connected via the MDIO bus (sometimes the MDIO bus controller is separate). + They are described in the phy.txt file in this same directory. ++For non-MDIO PHY management see fixed-link.txt. +--- a/drivers/of/of_mdio.c ++++ b/drivers/of/of_mdio.c +@@ -263,7 +263,8 @@ EXPORT_SYMBOL(of_phy_attach); + bool of_phy_is_fixed_link(struct device_node *np) + { + struct device_node *dn; +- int len; ++ int len, err; ++ const char *managed; + + /* New binding */ + dn = of_get_child_by_name(np, "fixed-link"); +@@ -272,6 +273,10 @@ bool of_phy_is_fixed_link(struct device_ + return true; + } + ++ err = of_property_read_string(np, "managed", &managed); ++ if (err == 0 && strcmp(managed, "auto") != 0) ++ return true; ++ + /* Old binding */ + if (of_get_property(np, "fixed-link", &len) && + len == (5 * sizeof(__be32))) +@@ -286,8 +291,18 @@ int of_phy_register_fixed_link(struct de + struct fixed_phy_status status = {}; + struct device_node *fixed_link_node; + const __be32 *fixed_link_prop; +- int len; ++ int len, err; + struct phy_device *phy; ++ const char *managed; ++ ++ err = of_property_read_string(np, "managed", &managed); ++ if (err == 0) { ++ if (strcmp(managed, "in-band-status") == 0) { ++ /* status is zeroed, namely its .link member */ ++ phy = fixed_phy_register(PHY_POLL, &status, np); ++ return IS_ERR(phy) ? PTR_ERR(phy) : 0; ++ } ++ } + + /* New binding */ + fixed_link_node = of_get_child_by_name(np, "fixed-link"); diff --git a/queue-4.1/openvswitch-zero-flows-on-allocation.patch b/queue-4.1/openvswitch-zero-flows-on-allocation.patch new file mode 100644 index 00000000000..dc002731955 --- /dev/null +++ b/queue-4.1/openvswitch-zero-flows-on-allocation.patch @@ -0,0 +1,116 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Jesse Gross +Date: Mon, 21 Sep 2015 20:21:20 -0700 +Subject: openvswitch: Zero flows on allocation. + +From: Jesse Gross + +[ Upstream commit ae5f2fb1d51fa128a460bcfbe3c56d7ab8bf6a43 ] + +When support for megaflows was introduced, OVS needed to start +installing flows with a mask applied to them. Since masking is an +expensive operation, OVS also had an optimization that would only +take the parts of the flow keys that were covered by a non-zero +mask. The values stored in the remaining pieces should not matter +because they are masked out. + +While this works fine for the purposes of matching (which must always +look at the mask), serialization to netlink can be problematic. Since +the flow and the mask are serialized separately, the uninitialized +portions of the flow can be encoded with whatever values happen to be +present. + +In terms of functionality, this has little effect since these fields +will be masked out by definition. However, it leaks kernel memory to +userspace, which is a potential security vulnerability. It is also +possible that other code paths could look at the masked key and get +uninitialized data, although this does not currently appear to be an +issue in practice. + +This removes the mask optimization for flows that are being installed. +This was always intended to be the case as the mask optimizations were +really targetting per-packet flow operations. + +Fixes: 03f0d916 ("openvswitch: Mega flow implementation") +Signed-off-by: Jesse Gross +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/datapath.c | 4 ++-- + net/openvswitch/flow_table.c | 23 ++++++++++++----------- + net/openvswitch/flow_table.h | 2 +- + 3 files changed, 15 insertions(+), 14 deletions(-) + +--- a/net/openvswitch/datapath.c ++++ b/net/openvswitch/datapath.c +@@ -906,7 +906,7 @@ static int ovs_flow_cmd_new(struct sk_bu + if (error) + goto err_kfree_flow; + +- ovs_flow_mask_key(&new_flow->key, &key, &mask); ++ ovs_flow_mask_key(&new_flow->key, &key, true, &mask); + + /* Extract flow identifier. */ + error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], +@@ -1033,7 +1033,7 @@ static struct sw_flow_actions *get_flow_ + struct sw_flow_key masked_key; + int error; + +- ovs_flow_mask_key(&masked_key, key, mask); ++ ovs_flow_mask_key(&masked_key, key, true, mask); + error = ovs_nla_copy_actions(a, &masked_key, &acts, log); + if (error) { + OVS_NLERR(log, +--- a/net/openvswitch/flow_table.c ++++ b/net/openvswitch/flow_table.c +@@ -56,20 +56,21 @@ static u16 range_n_bytes(const struct sw + } + + void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, +- const struct sw_flow_mask *mask) ++ bool full, const struct sw_flow_mask *mask) + { +- const long *m = (const long *)((const u8 *)&mask->key + +- mask->range.start); +- const long *s = (const long *)((const u8 *)src + +- mask->range.start); +- long *d = (long *)((u8 *)dst + mask->range.start); ++ int start = full ? 0 : mask->range.start; ++ int len = full ? sizeof *dst : range_n_bytes(&mask->range); ++ const long *m = (const long *)((const u8 *)&mask->key + start); ++ const long *s = (const long *)((const u8 *)src + start); ++ long *d = (long *)((u8 *)dst + start); + int i; + +- /* The memory outside of the 'mask->range' are not set since +- * further operations on 'dst' only uses contents within +- * 'mask->range'. ++ /* If 'full' is true then all of 'dst' is fully initialized. Otherwise, ++ * if 'full' is false the memory outside of the 'mask->range' is left ++ * uninitialized. This can be used as an optimization when further ++ * operations on 'dst' only use contents within 'mask->range'. + */ +- for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) ++ for (i = 0; i < len; i += sizeof(long)) + *d++ = *s++ & *m++; + } + +@@ -473,7 +474,7 @@ static struct sw_flow *masked_flow_looku + u32 hash; + struct sw_flow_key masked_key; + +- ovs_flow_mask_key(&masked_key, unmasked, mask); ++ ovs_flow_mask_key(&masked_key, unmasked, false, mask); + hash = flow_hash(&masked_key, &mask->range); + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { +--- a/net/openvswitch/flow_table.h ++++ b/net/openvswitch/flow_table.h +@@ -86,5 +86,5 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid + bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); + + void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, +- const struct sw_flow_mask *mask); ++ bool full, const struct sw_flow_mask *mask); + #endif /* flow_table.h */ diff --git a/queue-4.1/sctp-fix-race-on-protocol-netns-initialization.patch b/queue-4.1/sctp-fix-race-on-protocol-netns-initialization.patch new file mode 100644 index 00000000000..18024307ef7 --- /dev/null +++ b/queue-4.1/sctp-fix-race-on-protocol-netns-initialization.patch @@ -0,0 +1,232 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Marcelo Ricardo Leitner +Date: Thu, 10 Sep 2015 17:31:15 -0300 +Subject: sctp: fix race on protocol/netns initialization + +From: Marcelo Ricardo Leitner + +[ Upstream commit 8e2d61e0aed2b7c4ecb35844fe07e0b2b762dee4 ] + +Consider sctp module is unloaded and is being requested because an user +is creating a sctp socket. + +During initialization, sctp will add the new protocol type and then +initialize pernet subsys: + + status = sctp_v4_protosw_init(); + if (status) + goto err_protosw_init; + + status = sctp_v6_protosw_init(); + if (status) + goto err_v6_protosw_init; + + status = register_pernet_subsys(&sctp_net_ops); + +The problem is that after those calls to sctp_v{4,6}_protosw_init(), it +is possible for userspace to create SCTP sockets like if the module is +already fully loaded. If that happens, one of the possible effects is +that we will have readers for net->sctp.local_addr_list list earlier +than expected and sctp_net_init() does not take precautions while +dealing with that list, leading to a potential panic but not limited to +that, as sctp_sock_init() will copy a bunch of blank/partially +initialized values from net->sctp. + +The race happens like this: + + CPU 0 | CPU 1 + socket() | + __sock_create | socket() + inet_create | __sock_create + list_for_each_entry_rcu( | + answer, &inetsw[sock->type], | + list) { | inet_create + /* no hits */ | + if (unlikely(err)) { | + ... | + request_module() | + /* socket creation is blocked | + * the module is fully loaded | + */ | + sctp_init | + sctp_v4_protosw_init | + inet_register_protosw | + list_add_rcu(&p->list, | + last_perm); | + | list_for_each_entry_rcu( + | answer, &inetsw[sock->type], + sctp_v6_protosw_init | list) { + | /* hit, so assumes protocol + | * is already loaded + | */ + | /* socket creation continues + | * before netns is initialized + | */ + register_pernet_subsys | + +Simply inverting the initialization order between +register_pernet_subsys() and sctp_v4_protosw_init() is not possible +because register_pernet_subsys() will create a control sctp socket, so +the protocol must be already visible by then. Deferring the socket +creation to a work-queue is not good specially because we loose the +ability to handle its errors. + +So, as suggested by Vlad, the fix is to split netns initialization in +two moments: defaults and control socket, so that the defaults are +already loaded by when we register the protocol, while control socket +initialization is kept at the same moment it is today. + +Fixes: 4db67e808640 ("sctp: Make the address lists per network namespace") +Signed-off-by: Vlad Yasevich +Signed-off-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/protocol.c | 64 +++++++++++++++++++++++++++++++++------------------- + 1 file changed, 41 insertions(+), 23 deletions(-) + +--- a/net/sctp/protocol.c ++++ b/net/sctp/protocol.c +@@ -1166,7 +1166,7 @@ static void sctp_v4_del_protocol(void) + unregister_inetaddr_notifier(&sctp_inetaddr_notifier); + } + +-static int __net_init sctp_net_init(struct net *net) ++static int __net_init sctp_defaults_init(struct net *net) + { + int status; + +@@ -1259,12 +1259,6 @@ static int __net_init sctp_net_init(stru + + sctp_dbg_objcnt_init(net); + +- /* Initialize the control inode/socket for handling OOTB packets. */ +- if ((status = sctp_ctl_sock_init(net))) { +- pr_err("Failed to initialize the SCTP control sock\n"); +- goto err_ctl_sock_init; +- } +- + /* Initialize the local address list. */ + INIT_LIST_HEAD(&net->sctp.local_addr_list); + spin_lock_init(&net->sctp.local_addr_lock); +@@ -1280,9 +1274,6 @@ static int __net_init sctp_net_init(stru + + return 0; + +-err_ctl_sock_init: +- sctp_dbg_objcnt_exit(net); +- sctp_proc_exit(net); + err_init_proc: + cleanup_sctp_mibs(net); + err_init_mibs: +@@ -1291,15 +1282,12 @@ err_sysctl_register: + return status; + } + +-static void __net_exit sctp_net_exit(struct net *net) ++static void __net_exit sctp_defaults_exit(struct net *net) + { + /* Free the local address list */ + sctp_free_addr_wq(net); + sctp_free_local_addr_list(net); + +- /* Free the control endpoint. */ +- inet_ctl_sock_destroy(net->sctp.ctl_sock); +- + sctp_dbg_objcnt_exit(net); + + sctp_proc_exit(net); +@@ -1307,9 +1295,32 @@ static void __net_exit sctp_net_exit(str + sctp_sysctl_net_unregister(net); + } + +-static struct pernet_operations sctp_net_ops = { +- .init = sctp_net_init, +- .exit = sctp_net_exit, ++static struct pernet_operations sctp_defaults_ops = { ++ .init = sctp_defaults_init, ++ .exit = sctp_defaults_exit, ++}; ++ ++static int __net_init sctp_ctrlsock_init(struct net *net) ++{ ++ int status; ++ ++ /* Initialize the control inode/socket for handling OOTB packets. */ ++ status = sctp_ctl_sock_init(net); ++ if (status) ++ pr_err("Failed to initialize the SCTP control sock\n"); ++ ++ return status; ++} ++ ++static void __net_init sctp_ctrlsock_exit(struct net *net) ++{ ++ /* Free the control endpoint. */ ++ inet_ctl_sock_destroy(net->sctp.ctl_sock); ++} ++ ++static struct pernet_operations sctp_ctrlsock_ops = { ++ .init = sctp_ctrlsock_init, ++ .exit = sctp_ctrlsock_exit, + }; + + /* Initialize the universe into something sensible. */ +@@ -1442,8 +1453,11 @@ static __init int sctp_init(void) + sctp_v4_pf_init(); + sctp_v6_pf_init(); + +- status = sctp_v4_protosw_init(); ++ status = register_pernet_subsys(&sctp_defaults_ops); ++ if (status) ++ goto err_register_defaults; + ++ status = sctp_v4_protosw_init(); + if (status) + goto err_protosw_init; + +@@ -1451,9 +1465,9 @@ static __init int sctp_init(void) + if (status) + goto err_v6_protosw_init; + +- status = register_pernet_subsys(&sctp_net_ops); ++ status = register_pernet_subsys(&sctp_ctrlsock_ops); + if (status) +- goto err_register_pernet_subsys; ++ goto err_register_ctrlsock; + + status = sctp_v4_add_protocol(); + if (status) +@@ -1469,12 +1483,14 @@ out: + err_v6_add_protocol: + sctp_v4_del_protocol(); + err_add_protocol: +- unregister_pernet_subsys(&sctp_net_ops); +-err_register_pernet_subsys: ++ unregister_pernet_subsys(&sctp_ctrlsock_ops); ++err_register_ctrlsock: + sctp_v6_protosw_exit(); + err_v6_protosw_init: + sctp_v4_protosw_exit(); + err_protosw_init: ++ unregister_pernet_subsys(&sctp_defaults_ops); ++err_register_defaults: + sctp_v4_pf_exit(); + sctp_v6_pf_exit(); + sctp_sysctl_unregister(); +@@ -1507,12 +1523,14 @@ static __exit void sctp_exit(void) + sctp_v6_del_protocol(); + sctp_v4_del_protocol(); + +- unregister_pernet_subsys(&sctp_net_ops); ++ unregister_pernet_subsys(&sctp_ctrlsock_ops); + + /* Free protosw registrations */ + sctp_v6_protosw_exit(); + sctp_v4_protosw_exit(); + ++ unregister_pernet_subsys(&sctp_defaults_ops); ++ + /* Unregister with socket layer. */ + sctp_v6_pf_exit(); + sctp_v4_pf_exit(); diff --git a/queue-4.1/series b/queue-4.1/series new file mode 100644 index 00000000000..a6810f9bdd7 --- /dev/null +++ b/queue-4.1/series @@ -0,0 +1,27 @@ +ip6_gre-release-cached-dst-on-tunnel-removal.patch +vxlan-re-ignore-eaddrinuse-from-igmp_join.patch +cls_u32-complete-the-check-for-non-forced-case-in-u32_destroy.patch +usbnet-get-event_no_runtime_pm-bit-before-it-is-cleared.patch +sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch +ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch +net-fec-clear-receive-interrupts-before-processing-a-packet.patch +net-eth-altera-fix-napi-poll_list-corruption.patch +net-ipv6-correct-pim6-mrt_lock-handling.patch +net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch +ipv6-fix-multipath-route-replace-error-recovery.patch +net-dsa-bcm_sf2-fix-64-bits-register-writes.patch +netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch +sctp-fix-race-on-protocol-netns-initialization.patch +bridge-fix-igmpv3-mldv2-report-parsing.patch +net-mlx4_en-really-allow-to-change-rss-key.patch +macvtap-fix-tunsetsndbuf-values-64k.patch +openvswitch-zero-flows-on-allocation.patch +tcp-add-proper-ts-val-into-rst-packets.patch +net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch +fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch +net-dsa-bcm_sf2-do-not-override-speed-settings.patch +net-phy-fixed_phy-handle-link-down-case.patch +of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch +mvneta-use-inband-status-only-when-explicitly-enabled.patch +netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch +netlink-replace-rhash_portid-with-bound.patch diff --git a/queue-4.1/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch b/queue-4.1/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch new file mode 100644 index 00000000000..6b6f09c0547 --- /dev/null +++ b/queue-4.1/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch @@ -0,0 +1,45 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Daniel Borkmann +Date: Wed, 2 Sep 2015 14:00:36 +0200 +Subject: sock, diag: fix panic in sock_diag_put_filterinfo + +From: Daniel Borkmann + +[ Upstream commit b382c08656000c12a146723a153b85b13a855b49 ] + +diag socket's sock_diag_put_filterinfo() dumps classic BPF programs +upon request to user space (ss -0 -b). However, native eBPF programs +attached to sockets (SO_ATTACH_BPF) cannot be dumped with this method: + +Their orig_prog is always NULL. However, sock_diag_put_filterinfo() +unconditionally tries to access its filter length resp. wants to copy +the filter insns from there. Internal cBPF to eBPF transformations +attached to sockets don't have this issue, as orig_prog state is kept. + +It's currently only used by packet sockets. If we would want to add +native eBPF support in the future, this needs to be done through +a different attribute than PACKET_DIAG_FILTER to not confuse possible +user space disassemblers that work on diag data. + +Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets") +Signed-off-by: Daniel Borkmann +Acked-by: Nicolas Dichtel +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock_diag.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/core/sock_diag.c ++++ b/net/core/sock_diag.c +@@ -86,6 +86,9 @@ int sock_diag_put_filterinfo(bool may_re + goto out; + + fprog = filter->prog->orig_prog; ++ if (!fprog) ++ goto out; ++ + flen = bpf_classic_proglen(fprog); + + attr = nla_reserve(skb, attrtype, flen); diff --git a/queue-4.1/tcp-add-proper-ts-val-into-rst-packets.patch b/queue-4.1/tcp-add-proper-ts-val-into-rst-packets.patch new file mode 100644 index 00000000000..bb60b7cfc69 --- /dev/null +++ b/queue-4.1/tcp-add-proper-ts-val-into-rst-packets.patch @@ -0,0 +1,65 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Eric Dumazet +Date: Wed, 23 Sep 2015 14:00:21 -0700 +Subject: tcp: add proper TS val into RST packets + +From: Eric Dumazet + +[ Upstream commit 675ee231d960af2af3606b4480324e26797eb010 ] + +RST packets sent on behalf of TCP connections with TS option (RFC 7323 +TCP timestamps) have incorrect TS val (set to 0), but correct TS ecr. + +A > B: Flags [S], seq 0, win 65535, options [mss 1000,nop,nop,TS val 100 +ecr 0], length 0 +B > A: Flags [S.], seq 2444755794, ack 1, win 28960, options [mss +1460,nop,nop,TS val 7264344 ecr 100], length 0 +A > B: Flags [.], ack 1, win 65535, options [nop,nop,TS val 110 ecr +7264344], length 0 + +B > A: Flags [R.], seq 1, ack 1, win 28960, options [nop,nop,TS val 0 +ecr 110], length 0 + +We need to call skb_mstamp_get() to get proper TS val, +derived from skb->skb_mstamp + +Note that RFC 1323 was advocating to not send TS option in RST segment, +but RFC 7323 recommends the opposite : + + Once TSopt has been successfully negotiated, that is both and + contain TSopt, the TSopt MUST be sent in every non- + segment for the duration of the connection, and SHOULD be sent in an + segment (see Section 5.2 for details) + +Note this RFC recommends to send TS val = 0, but we believe it is +premature : We do not know if all TCP stacks are properly +handling the receive side : + + When an segment is + received, it MUST NOT be subjected to the PAWS check by verifying an + acceptable value in SEG.TSval, and information from the Timestamps + option MUST NOT be used to update connection state information. + SEG.TSecr MAY be used to provide stricter acceptance checks. + +In 5 years, if/when all TCP stack are RFC 7323 ready, we might consider +to decide to send TS val = 0, if it buys something. + +Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when") +Signed-off-by: Eric Dumazet +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2893,6 +2893,7 @@ void tcp_send_active_reset(struct sock * + skb_reserve(skb, MAX_TCP_HEADER); + tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), + TCPHDR_ACK | TCPHDR_RST); ++ skb_mstamp_get(&skb->skb_mstamp); + /* Send it off. */ + if (tcp_transmit_skb(sk, skb, 0, priority)) + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); diff --git a/queue-4.1/usbnet-get-event_no_runtime_pm-bit-before-it-is-cleared.patch b/queue-4.1/usbnet-get-event_no_runtime_pm-bit-before-it-is-cleared.patch new file mode 100644 index 00000000000..9eb5b66a538 --- /dev/null +++ b/queue-4.1/usbnet-get-event_no_runtime_pm-bit-before-it-is-cleared.patch @@ -0,0 +1,54 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Eugene Shatokhin +Date: Mon, 24 Aug 2015 23:13:42 +0300 +Subject: usbnet: Get EVENT_NO_RUNTIME_PM bit before it is cleared + +From: Eugene Shatokhin + +[ Upstream commit f50791ac1aca1ac1b0370d62397b43e9f831421a ] + +It is needed to check EVENT_NO_RUNTIME_PM bit of dev->flags in +usbnet_stop(), but its value should be read before it is cleared +when dev->flags is set to 0. + +The problem was spotted and the fix was provided by +Oliver Neukum . + +Signed-off-by: Eugene Shatokhin +Acked-by: Oliver Neukum +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/usbnet.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/drivers/net/usb/usbnet.c ++++ b/drivers/net/usb/usbnet.c +@@ -778,7 +778,7 @@ int usbnet_stop (struct net_device *net) + { + struct usbnet *dev = netdev_priv(net); + struct driver_info *info = dev->driver_info; +- int retval, pm; ++ int retval, pm, mpn; + + clear_bit(EVENT_DEV_OPEN, &dev->flags); + netif_stop_queue (net); +@@ -809,6 +809,8 @@ int usbnet_stop (struct net_device *net) + + usbnet_purge_paused_rxq(dev); + ++ mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags); ++ + /* deferred work (task, timer, softirq) must also stop. + * can't flush_scheduled_work() until we drop rtnl (later), + * else workers could deadlock; so make workers a NOP. +@@ -819,8 +821,7 @@ int usbnet_stop (struct net_device *net) + if (!pm) + usb_autopm_put_interface(dev->intf); + +- if (info->manage_power && +- !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags)) ++ if (info->manage_power && mpn) + info->manage_power(dev, 0); + else + usb_autopm_put_interface(dev->intf); diff --git a/queue-4.1/vxlan-re-ignore-eaddrinuse-from-igmp_join.patch b/queue-4.1/vxlan-re-ignore-eaddrinuse-from-igmp_join.patch new file mode 100644 index 00000000000..df0f838f212 --- /dev/null +++ b/queue-4.1/vxlan-re-ignore-eaddrinuse-from-igmp_join.patch @@ -0,0 +1,38 @@ +From foo@baz Wed Sep 30 05:18:31 CEST 2015 +From: Marcelo Ricardo Leitner +Date: Tue, 25 Aug 2015 20:22:35 -0300 +Subject: vxlan: re-ignore EADDRINUSE from igmp_join + +From: Marcelo Ricardo Leitner + +[ Upstream commit bef0057b7ba881d5ae67eec876df7a26fe672a59 ] + +Before 56ef9c909b40[1] it used to ignore all errors from igmp_join(). +That commit enhanced that and made it error out whatever error happened +with igmp_join(), but that's not good because when using multicast +groups vxlan will try to join it multiple times if the socket is reused +and then the 2nd and further attempts will fail with EADDRINUSE. + +As we don't track to which groups the socket is already subscribed, it's +okay to just ignore that error. + +Fixes: 56ef9c909b40 ("vxlan: Move socket initialization to within rtnl scope") +Reported-by: John Nielsen +Signed-off-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2212,6 +2212,8 @@ static int vxlan_open(struct net_device + + if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) { + ret = vxlan_igmp_join(vxlan); ++ if (ret == -EADDRINUSE) ++ ret = 0; + if (ret) { + vxlan_sock_release(vs); + return ret; diff --git a/queue-4.2/series b/queue-4.2/series new file mode 100644 index 00000000000..cfd45ee5c94 --- /dev/null +++ b/queue-4.2/series @@ -0,0 +1,30 @@ +phylib-fix-device-deletion-order-in-mdiobus_unregister.patch +sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch +ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch +net-fec-clear-receive-interrupts-before-processing-a-packet.patch +net-eth-altera-fix-napi-poll_list-corruption.patch +net-ipv6-correct-pim6-mrt_lock-handling.patch +net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch +ipv6-fix-multipath-route-replace-error-recovery.patch +net-dsa-bcm_sf2-fix-64-bits-register-writes.patch +netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch +sctp-fix-race-on-protocol-netns-initialization.patch +bridge-fix-igmpv3-mldv2-report-parsing.patch +net-mvneta-fix-dma-buffer-unmapping-in-mvneta_rx.patch +rtnetlink-catch-eopnotsupp-errors-from-ndo_bridge_getlink.patch +net-mlx4_en-really-allow-to-change-rss-key.patch +macvtap-fix-tunsetsndbuf-values-64k.patch +netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch +netlink-replace-rhash_portid-with-bound.patch +net-dsa-actually-force-the-speed-on-the-cpu-port.patch +openvswitch-zero-flows-on-allocation.patch +tcp-add-proper-ts-val-into-rst-packets.patch +fix-af_packet-abi-breakage-in-4.2.patch +net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch +fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch +ppp-fix-lockdep-splat-in-ppp_dev_uninit.patch +net-dsa-bcm_sf2-do-not-override-speed-settings.patch +net-phy-fixed_phy-handle-link-down-case.patch +of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch +mvneta-use-inband-status-only-when-explicitly-enabled.patch +net-mlx4_core-capping-number-of-requested-msixs-to-max_msix.patch