4.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 30 Sep 2015 03:31:28 +0000 (05:31 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 30 Sep 2015 03:31:28 +0000 (05:31 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Sep 2015 03:31:28 +0000 (05:31 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Sep 2015 03:31:28 +0000 (05:31 +0200)
diff --git a/queue-4.1/bridge-fix-igmpv3-mldv2-report-parsing.patch b/queue-4.1/bridge-fix-igmpv3-mldv2-report-parsing.patch

new file mode 100644 (file)

index 0000000..018a4f7
--- /dev/null
+++ b/queue-4.1/bridge-fix-igmpv3-mldv2-report-parsing.patch
@@ -0,0 +1,53 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
+Date: Fri, 11 Sep 2015 18:39:48 +0200
+Subject: bridge: fix igmpv3 / mldv2 report parsing
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
+
+[ Upstream commit c2d4fbd2163e607915cc05798ce7fb7f31117cc1 ]
+
+With the newly introduced helper functions the skb pulling is hidden in
+the checksumming function - and undone before returning to the caller.
+
+The IGMPv3 and MLDv2 report parsing functions in the bridge still
+assumed that the skb is pointing to the beginning of the IGMP/MLD
+message while it is now kept at the beginning of the IPv4/6 header,
+breaking the message parsing and creating packet loss.
+
+Fixing this by taking the offset between IP and IGMP/MLD header into
+account, too.
+
+Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code")
+Reported-by: Tobias Powalowski <tobias.powalowski@googlemail.com>
+Tested-by: Tobias Powalowski <tobias.powalowski@googlemail.com>
+Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_multicast.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -980,7 +980,7 @@ static int br_ip4_multicast_igmp3_report
+ 
+       ih = igmpv3_report_hdr(skb);
+       num = ntohs(ih->ngrec);
+-      len = sizeof(*ih);
++      len = skb_transport_offset(skb) + sizeof(*ih);
+ 
+       for (i = 0; i < num; i++) {
+               len += sizeof(*grec);
+@@ -1035,7 +1035,7 @@ static int br_ip6_multicast_mld2_report(
+ 
+       icmp6h = icmp6_hdr(skb);
+       num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
+-      len = sizeof(*icmp6h);
++      len = skb_transport_offset(skb) + sizeof(*icmp6h);
+ 
+       for (i = 0; i < num; i++) {
+               __be16 *nsrcs, _nsrcs;
diff --git a/queue-4.1/cls_u32-complete-the-check-for-non-forced-case-in-u32_destroy.patch b/queue-4.1/cls_u32-complete-the-check-for-non-forced-case-in-u32_destroy.patch

new file mode 100644 (file)

index 0000000..e4fa66c
--- /dev/null
+++ b/queue-4.1/cls_u32-complete-the-check-for-non-forced-case-in-u32_destroy.patch
@@ -0,0 +1,55 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Tue, 25 Aug 2015 16:38:12 -0700
+Subject: cls_u32: complete the check for non-forced case in u32_destroy()
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit a6c1aea044e490da3e59124ec55991fe316818d5 ]
+
+In commit 1e052be69d04 ("net_sched: destroy proto tp when all filters are gone")
+I added a check in u32_destroy() to see if all real filters are gone
+for each tp, however, that is only done for root_ht, same is needed
+for others.
+
+This can be reproduced by the following tc commands:
+
+tc filter add dev eth0 parent 1:0 prio 5 handle 15: protocol ip u32 divisor 256
+tc filter add dev eth0 protocol ip parent 1: prio 5 handle 15:2:2 u32
+ht 15:2: match ip src 10.0.0.2 flowid 1:10
+tc filter add dev eth0 protocol ip parent 1: prio 5 handle 15:2:3 u32
+ht 15:2: match ip src 10.0.0.3 flowid 1:10
+
+Fixes: 1e052be69d04 ("net_sched: destroy proto tp when all filters are gone")
+Reported-by: Akshat Kakkar <akshat.1984@gmail.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Cong Wang <cwang@twopensource.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_u32.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -490,6 +490,19 @@ static bool u32_destroy(struct tcf_proto
+                                       return false;
+                       }
+               }
++
++              if (tp_c->refcnt > 1)
++                      return false;
++
++              if (tp_c->refcnt == 1) {
++                      struct tc_u_hnode *ht;
++
++                      for (ht = rtnl_dereference(tp_c->hlist);
++                           ht;
++                           ht = rtnl_dereference(ht->next))
++                              if (!ht_empty(ht))
++                                      return false;
++              }
+       }
+ 
+       if (root_ht && --root_ht->refcnt == 0)
diff --git a/queue-4.1/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch b/queue-4.1/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch

new file mode 100644 (file)

index 0000000..4c4f28f
--- /dev/null
+++ b/queue-4.1/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch
@@ -0,0 +1,72 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Wilson Kok <wkok@cumulusnetworks.com>
+Date: Tue, 22 Sep 2015 21:40:22 -0700
+Subject: fib_rules: fix fib rule dumps across multiple skbs
+
+From: Wilson Kok <wkok@cumulusnetworks.com>
+
+[ Upstream commit 41fc014332d91ee90c32840bf161f9685b7fbf2b ]
+
+dump_rules returns skb length and not error.
+But when family == AF_UNSPEC, the caller of dump_rules
+assumes that it returns an error. Hence, when family == AF_UNSPEC,
+we continue trying to dump on -EMSGSIZE errors resulting in
+incorrect dump idx carried between skbs belonging to the same dump.
+This results in fib rule dump always only dumping rules that fit
+into the first skb.
+
+This patch fixes dump_rules to return error so that we exit correctly
+and idx is correctly maintained between skbs that are part of the
+same dump.
+
+Signed-off-by: Wilson Kok <wkok@cumulusnetworks.com>
+Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/fib_rules.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/net/core/fib_rules.c
++++ b/net/core/fib_rules.c
+@@ -615,15 +615,17 @@ static int dump_rules(struct sk_buff *sk
+ {
+       int idx = 0;
+       struct fib_rule *rule;
++      int err = 0;
+ 
+       rcu_read_lock();
+       list_for_each_entry_rcu(rule, &ops->rules_list, list) {
+               if (idx < cb->args[1])
+                       goto skip;
+ 
+-              if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
+-                                   cb->nlh->nlmsg_seq, RTM_NEWRULE,
+-                                   NLM_F_MULTI, ops) < 0)
++              err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
++                                     cb->nlh->nlmsg_seq, RTM_NEWRULE,
++                                     NLM_F_MULTI, ops);
++              if (err)
+                       break;
+ skip:
+               idx++;
+@@ -632,7 +634,7 @@ skip:
+       cb->args[1] = idx;
+       rules_ops_put(ops);
+ 
+-      return skb->len;
++      return err;
+ }
+ 
+ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
+@@ -648,7 +650,9 @@ static int fib_nl_dumprule(struct sk_buf
+               if (ops == NULL)
+                       return -EAFNOSUPPORT;
+ 
+-              return dump_rules(skb, cb, ops);
++              dump_rules(skb, cb, ops);
++
++              return skb->len;
+       }
+ 
+       rcu_read_lock();
diff --git a/queue-4.1/ip6_gre-release-cached-dst-on-tunnel-removal.patch b/queue-4.1/ip6_gre-release-cached-dst-on-tunnel-removal.patch

new file mode 100644 (file)

index 0000000..a606cf8
--- /dev/null
+++ b/queue-4.1/ip6_gre-release-cached-dst-on-tunnel-removal.patch
@@ -0,0 +1,35 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: huaibin Wang <huaibin.wang@6wind.com>
+Date: Tue, 25 Aug 2015 16:20:34 +0200
+Subject: ip6_gre: release cached dst on tunnel removal
+
+From: huaibin Wang <huaibin.wang@6wind.com>
+
+[ Upstream commit d4257295ba1b389c693b79de857a96e4b7cd8ac0 ]
+
+When a tunnel is deleted, the cached dst entry should be released.
+
+This problem may prevent the removal of a netns (seen with a x-netns IPv6
+gre tunnel):
+  unregister_netdevice: waiting for lo to become free. Usage count = 3
+
+CC: Dmitry Kozlov <xeb@mail.ru>
+Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
+Signed-off-by: huaibin Wang <huaibin.wang@6wind.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -361,6 +361,7 @@ static void ip6gre_tunnel_uninit(struct
+       struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
+ 
+       ip6gre_tunnel_unlink(ign, t);
++      ip6_tnl_dst_reset(t);
+       dev_put(dev);
+ }
+ 
diff --git a/queue-4.1/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch b/queue-4.1/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch

new file mode 100644 (file)

index 0000000..47a8e39
--- /dev/null
+++ b/queue-4.1/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch
@@ -0,0 +1,33 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 3 Sep 2015 00:29:07 +0200
+Subject: ipv6: fix exthdrs offload registration in out_rt path
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit e41b0bedba0293b9e1e8d1e8ed553104b9693656 ]
+
+We previously register IPPROTO_ROUTING offload under inet6_add_offload(),
+but in error path, we try to unregister it with inet_del_offload(). This
+doesn't seem correct, it should actually be inet6_del_offload(), also
+ipv6_exthdrs_offload_exit() from that commit seems rather incorrect (it
+also uses rthdr_offload twice), but it got removed entirely later on.
+
+Fixes: 3336288a9fea ("ipv6: Switch to using new offload infrastructure.")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/exthdrs_offload.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/exthdrs_offload.c
++++ b/net/ipv6/exthdrs_offload.c
+@@ -36,6 +36,6 @@ out:
+       return ret;
+ 
+ out_rt:
+-      inet_del_offload(&rthdr_offload, IPPROTO_ROUTING);
++      inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING);
+       goto out;
+ }
diff --git a/queue-4.1/ipv6-fix-multipath-route-replace-error-recovery.patch b/queue-4.1/ipv6-fix-multipath-route-replace-error-recovery.patch

new file mode 100644 (file)

index 0000000..f58ac7f
--- /dev/null
+++ b/queue-4.1/ipv6-fix-multipath-route-replace-error-recovery.patch
@@ -0,0 +1,352 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Roopa Prabhu <roopa@cumulusnetworks.com>
+Date: Tue, 8 Sep 2015 10:53:04 -0700
+Subject: ipv6: fix multipath route replace error recovery
+
+From: Roopa Prabhu <roopa@cumulusnetworks.com>
+
+[ Upstream commit 6b9ea5a64ed5eeb3f68f2e6fcce0ed1179801d1e ]
+
+Problem:
+The ecmp route replace support for ipv6 in the kernel, deletes the
+existing ecmp route too early, ie when it installs the first nexthop.
+If there is an error in installing the subsequent nexthops, its too late
+to recover the already deleted existing route leaving the fib
+in an inconsistent state.
+
+This patch reduces the possibility of this by doing the following:
+a) Changes the existing multipath route add code to a two stage process:
+  build rt6_infos + insert them
+       ip6_route_add rt6_info creation code is moved into
+       ip6_route_info_create.
+b) This ensures that most errors are caught during building rt6_infos
+  and we fail early
+c) Separates multipath add and del code. Because add needs the special
+  two stage mode in a) and delete essentially does not care.
+d) In any event if the code fails during inserting a route again, a
+  warning is printed (This should be unlikely)
+
+Before the patch:
+$ip -6 route show
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024
+
+/* Try replacing the route with a duplicate nexthop */
+$ip -6 route change 3000:1000:1000:1000::2/128 nexthop via
+fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev
+swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1
+RTNETLINK answers: File exists
+
+$ip -6 route show
+/* previously added ecmp route 3000:1000:1000:1000::2 dissappears from
+ * kernel */
+
+After the patch:
+$ip -6 route show
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024
+
+/* Try replacing the route with a duplicate nexthop */
+$ip -6 route change 3000:1000:1000:1000::2/128 nexthop via
+fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev
+swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1
+RTNETLINK answers: File exists
+
+$ip -6 route show
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024
+
+Fixes: 27596472473a ("ipv6: fix ECMP route replacement")
+Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
+Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |  201 +++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 175 insertions(+), 26 deletions(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1515,7 +1515,7 @@ static int ip6_convert_metrics(struct mx
+       return -EINVAL;
+ }
+ 
+-int ip6_route_add(struct fib6_config *cfg)
++int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
+ {
+       int err;
+       struct net *net = cfg->fc_nlinfo.nl_net;
+@@ -1523,7 +1523,6 @@ int ip6_route_add(struct fib6_config *cf
+       struct net_device *dev = NULL;
+       struct inet6_dev *idev = NULL;
+       struct fib6_table *table;
+-      struct mx6_config mxc = { .mx = NULL, };
+       int addr_type;
+ 
+       if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
+@@ -1719,6 +1718,32 @@ install_route:
+ 
+       cfg->fc_nlinfo.nl_net = dev_net(dev);
+ 
++      *rt_ret = rt;
++
++      return 0;
++out:
++      if (dev)
++              dev_put(dev);
++      if (idev)
++              in6_dev_put(idev);
++      if (rt)
++              dst_free(&rt->dst);
++
++      *rt_ret = NULL;
++
++      return err;
++}
++
++int ip6_route_add(struct fib6_config *cfg)
++{
++      struct mx6_config mxc = { .mx = NULL, };
++      struct rt6_info *rt = NULL;
++      int err;
++
++      err = ip6_route_info_create(cfg, &rt);
++      if (err)
++              goto out;
++
+       err = ip6_convert_metrics(&mxc, cfg);
+       if (err)
+               goto out;
+@@ -1726,14 +1751,12 @@ install_route:
+       err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
+ 
+       kfree(mxc.mx);
++
+       return err;
+ out:
+-      if (dev)
+-              dev_put(dev);
+-      if (idev)
+-              in6_dev_put(idev);
+       if (rt)
+               dst_free(&rt->dst);
++
+       return err;
+ }
+ 
+@@ -2496,19 +2519,78 @@ errout:
+       return err;
+ }
+ 
+-static int ip6_route_multipath(struct fib6_config *cfg, int add)
++struct rt6_nh {
++      struct rt6_info *rt6_info;
++      struct fib6_config r_cfg;
++      struct mx6_config mxc;
++      struct list_head next;
++};
++
++static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
++{
++      struct rt6_nh *nh;
++
++      list_for_each_entry(nh, rt6_nh_list, next) {
++              pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
++                      &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
++                      nh->r_cfg.fc_ifindex);
++      }
++}
++
++static int ip6_route_info_append(struct list_head *rt6_nh_list,
++                               struct rt6_info *rt, struct fib6_config *r_cfg)
++{
++      struct rt6_nh *nh;
++      struct rt6_info *rtnh;
++      int err = -EEXIST;
++
++      list_for_each_entry(nh, rt6_nh_list, next) {
++              /* check if rt6_info already exists */
++              rtnh = nh->rt6_info;
++
++              if (rtnh->dst.dev == rt->dst.dev &&
++                  rtnh->rt6i_idev == rt->rt6i_idev &&
++                  ipv6_addr_equal(&rtnh->rt6i_gateway,
++                                  &rt->rt6i_gateway))
++                      return err;
++      }
++
++      nh = kzalloc(sizeof(*nh), GFP_KERNEL);
++      if (!nh)
++              return -ENOMEM;
++      nh->rt6_info = rt;
++      err = ip6_convert_metrics(&nh->mxc, r_cfg);
++      if (err) {
++              kfree(nh);
++              return err;
++      }
++      memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
++      list_add_tail(&nh->next, rt6_nh_list);
++
++      return 0;
++}
++
++static int ip6_route_multipath_add(struct fib6_config *cfg)
+ {
+       struct fib6_config r_cfg;
+       struct rtnexthop *rtnh;
++      struct rt6_info *rt;
++      struct rt6_nh *err_nh;
++      struct rt6_nh *nh, *nh_safe;
+       int remaining;
+       int attrlen;
+-      int err = 0, last_err = 0;
++      int err = 1;
++      int nhn = 0;
++      int replace = (cfg->fc_nlinfo.nlh &&
++                     (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
++      LIST_HEAD(rt6_nh_list);
+ 
+       remaining = cfg->fc_mp_len;
+-beginning:
+       rtnh = (struct rtnexthop *)cfg->fc_mp;
+ 
+-      /* Parse a Multipath Entry */
++      /* Parse a Multipath Entry and build a list (rt6_nh_list) of
++       * rt6_info structs per nexthop
++       */
+       while (rtnh_ok(rtnh, remaining)) {
+               memcpy(&r_cfg, cfg, sizeof(*cfg));
+               if (rtnh->rtnh_ifindex)
+@@ -2524,22 +2606,32 @@ beginning:
+                               r_cfg.fc_flags |= RTF_GATEWAY;
+                       }
+               }
+-              err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
++
++              err = ip6_route_info_create(&r_cfg, &rt);
++              if (err)
++                      goto cleanup;
++
++              err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
+               if (err) {
+-                      last_err = err;
+-                      /* If we are trying to remove a route, do not stop the
+-                       * loop when ip6_route_del() fails (because next hop is
+-                       * already gone), we should try to remove all next hops.
+-                       */
+-                      if (add) {
+-                              /* If add fails, we should try to delete all
+-                               * next hops that have been already added.
+-                               */
+-                              add = 0;
+-                              remaining = cfg->fc_mp_len - remaining;
+-                              goto beginning;
+-                      }
++                      dst_free(&rt->dst);
++                      goto cleanup;
++              }
++
++              rtnh = rtnh_next(rtnh, &remaining);
++      }
++
++      err_nh = NULL;
++      list_for_each_entry(nh, &rt6_nh_list, next) {
++              err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
++              /* nh->rt6_info is used or freed at this point, reset to NULL*/
++              nh->rt6_info = NULL;
++              if (err) {
++                      if (replace && nhn)
++                              ip6_print_replace_route_err(&rt6_nh_list);
++                      err_nh = nh;
++                      goto add_errout;
+               }
++
+               /* Because each route is added like a single route we remove
+                * these flags after the first nexthop: if there is a collision,
+                * we have already failed to add the first nexthop:
+@@ -2549,6 +2641,63 @@ beginning:
+                */
+               cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
+                                                    NLM_F_REPLACE);
++              nhn++;
++      }
++
++      goto cleanup;
++
++add_errout:
++      /* Delete routes that were already added */
++      list_for_each_entry(nh, &rt6_nh_list, next) {
++              if (err_nh == nh)
++                      break;
++              ip6_route_del(&nh->r_cfg);
++      }
++
++cleanup:
++      list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
++              if (nh->rt6_info)
++                      dst_free(&nh->rt6_info->dst);
++              if (nh->mxc.mx)
++                      kfree(nh->mxc.mx);
++              list_del(&nh->next);
++              kfree(nh);
++      }
++
++      return err;
++}
++
++static int ip6_route_multipath_del(struct fib6_config *cfg)
++{
++      struct fib6_config r_cfg;
++      struct rtnexthop *rtnh;
++      int remaining;
++      int attrlen;
++      int err = 1, last_err = 0;
++
++      remaining = cfg->fc_mp_len;
++      rtnh = (struct rtnexthop *)cfg->fc_mp;
++
++      /* Parse a Multipath Entry */
++      while (rtnh_ok(rtnh, remaining)) {
++              memcpy(&r_cfg, cfg, sizeof(*cfg));
++              if (rtnh->rtnh_ifindex)
++                      r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
++
++              attrlen = rtnh_attrlen(rtnh);
++              if (attrlen > 0) {
++                      struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
++
++                      nla = nla_find(attrs, attrlen, RTA_GATEWAY);
++                      if (nla) {
++                              nla_memcpy(&r_cfg.fc_gateway, nla, 16);
++                              r_cfg.fc_flags |= RTF_GATEWAY;
++                      }
++              }
++              err = ip6_route_del(&r_cfg);
++              if (err)
++                      last_err = err;
++
+               rtnh = rtnh_next(rtnh, &remaining);
+       }
+ 
+@@ -2565,7 +2714,7 @@ static int inet6_rtm_delroute(struct sk_
+               return err;
+ 
+       if (cfg.fc_mp)
+-              return ip6_route_multipath(&cfg, 0);
++              return ip6_route_multipath_del(&cfg);
+       else
+               return ip6_route_del(&cfg);
+ }
+@@ -2580,7 +2729,7 @@ static int inet6_rtm_newroute(struct sk_
+               return err;
+ 
+       if (cfg.fc_mp)
+-              return ip6_route_multipath(&cfg, 1);
++              return ip6_route_multipath_add(&cfg);
+       else
+               return ip6_route_add(&cfg);
+ }
diff --git a/queue-4.1/macvtap-fix-tunsetsndbuf-values-64k.patch b/queue-4.1/macvtap-fix-tunsetsndbuf-values-64k.patch

new file mode 100644 (file)

index 0000000..3b271d2
--- /dev/null
+++ b/queue-4.1/macvtap-fix-tunsetsndbuf-values-64k.patch
@@ -0,0 +1,47 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Fri, 18 Sep 2015 13:41:09 +0300
+Subject: macvtap: fix TUNSETSNDBUF values > 64k
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+[ Upstream commit 3ea79249e81e5ed051f2e6480cbde896d99046e8 ]
+
+Upon TUNSETSNDBUF,  macvtap reads the requested sndbuf size into
+a local variable u.
+commit 39ec7de7092b ("macvtap: fix uninitialized access on
+TUNSETIFF") changed its type to u16 (which is the right thing to
+do for all other macvtap ioctls), breaking all values > 64k.
+
+The value of TUNSETSNDBUF is actually a signed 32 bit integer, so
+the right thing to do is to read it into an int.
+
+Cc: David S. Miller <davem@davemloft.net>
+Fixes: 39ec7de7092b ("macvtap: fix uninitialized access on TUNSETIFF")
+Reported-by: Mark A. Peloquin
+Bisected-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Tested-by:  Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvtap.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -1054,10 +1054,10 @@ static long macvtap_ioctl(struct file *f
+               return 0;
+ 
+       case TUNSETSNDBUF:
+-              if (get_user(u, up))
++              if (get_user(s, sp))
+                       return -EFAULT;
+ 
+-              q->sk.sk_sndbuf = u;
++              q->sk.sk_sndbuf = s;
+               return 0;
+ 
+       case TUNGETVNETHDRSZ:
diff --git a/queue-4.1/mvneta-use-inband-status-only-when-explicitly-enabled.patch b/queue-4.1/mvneta-use-inband-status-only-when-explicitly-enabled.patch

new file mode 100644 (file)

index 0000000..b6fb578
--- /dev/null
+++ b/queue-4.1/mvneta-use-inband-status-only-when-explicitly-enabled.patch
@@ -0,0 +1,66 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Stas Sergeev <stsp@list.ru>
+Date: Mon, 20 Jul 2015 17:49:58 -0700
+Subject: mvneta: use inband status only when explicitly enabled
+
+From: Stas Sergeev <stsp@list.ru>
+
+[ Upstream commit f8af8e6eb95093d5ce5ebcc52bd1929b0433e172 in net-next tree,
+  will be pushed to Linus very soon. ]
+
+The commit 898b2970e2c9 ("mvneta: implement SGMII-based in-band link state
+signaling") implemented the link parameters auto-negotiation unconditionally.
+Unfortunately it appears that some HW that implements SGMII protocol,
+doesn't generate the inband status, so it is not possible to auto-negotiate
+anything with such HW.
+
+This patch enables the auto-negotiation only if explicitly requested with
+the 'managed' DT property.
+
+This patch fixes the following regression:
+https://lkml.org/lkml/2015/7/8/865
+
+Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
+
+CC: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+CC: netdev@vger.kernel.org
+CC: linux-kernel@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -3029,8 +3029,8 @@ static int mvneta_probe(struct platform_
+       const char *dt_mac_addr;
+       char hw_mac_addr[ETH_ALEN];
+       const char *mac_from;
++      const char *managed;
+       int phy_mode;
+-      int fixed_phy = 0;
+       int err;
+ 
+       /* Our multiqueue support is not complete, so for now, only
+@@ -3064,7 +3064,6 @@ static int mvneta_probe(struct platform_
+                       dev_err(&pdev->dev, "cannot register fixed PHY\n");
+                       goto err_free_irq;
+               }
+-              fixed_phy = 1;
+ 
+               /* In the case of a fixed PHY, the DT node associated
+                * to the PHY is the Ethernet MAC DT node.
+@@ -3088,8 +3087,10 @@ static int mvneta_probe(struct platform_
+       pp = netdev_priv(dev);
+       pp->phy_node = phy_node;
+       pp->phy_interface = phy_mode;
+-      pp->use_inband_status = (phy_mode == PHY_INTERFACE_MODE_SGMII) &&
+-                              fixed_phy;
++
++      err = of_property_read_string(dn, "managed", &managed);
++      pp->use_inband_status = (err == 0 &&
++                               strcmp(managed, "in-band-status") == 0);
+ 
+       pp->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(pp->clk)) {
diff --git a/queue-4.1/net-dsa-bcm_sf2-do-not-override-speed-settings.patch b/queue-4.1/net-dsa-bcm_sf2-do-not-override-speed-settings.patch

new file mode 100644 (file)

index 0000000..b803d28
--- /dev/null
+++ b/queue-4.1/net-dsa-bcm_sf2-do-not-override-speed-settings.patch
@@ -0,0 +1,68 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Mon, 20 Jul 2015 17:49:55 -0700
+Subject: net: dsa: bcm_sf2: Do not override speed settings
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream d2eac98f7d1b950b762a7eca05a9ce0ea1d878d2 in net-next tree,
+  will be pushed to Linus very soon. ]
+
+The SF2 driver currently overrides speed settings for its port
+configured using a fixed PHY, this is both unnecessary and incorrect,
+because we keep feedback to the hardware parameters that we read from
+the PHY device, which in the case of a fixed PHY cannot possibly change
+speed.
+
+This is a required change to allow the fixed PHY code to allow
+registering a PHY with a link configured as DOWN by default and avoid
+some sort of circular dependency where we require the link_update
+callback to run to program the hardware, and we then utilize the fixed
+PHY parameters to program the hardware with the same settings.
+
+Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |   18 +-----------------
+ 1 file changed, 1 insertion(+), 17 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -893,15 +893,11 @@ static void bcm_sf2_sw_fixed_link_update
+                                        struct fixed_phy_status *status)
+ {
+       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+-      u32 duplex, pause, speed;
++      u32 duplex, pause;
+       u32 reg;
+ 
+       duplex = core_readl(priv, CORE_DUPSTS);
+       pause = core_readl(priv, CORE_PAUSESTS);
+-      speed = core_readl(priv, CORE_SPDSTS);
+-
+-      speed >>= (port * SPDSTS_SHIFT);
+-      speed &= SPDSTS_MASK;
+ 
+       status->link = 0;
+ 
+@@ -929,18 +925,6 @@ static void bcm_sf2_sw_fixed_link_update
+               reg &= ~LINK_STS;
+       core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port));
+ 
+-      switch (speed) {
+-      case SPDSTS_10:
+-              status->speed = SPEED_10;
+-              break;
+-      case SPDSTS_100:
+-              status->speed = SPEED_100;
+-              break;
+-      case SPDSTS_1000:
+-              status->speed = SPEED_1000;
+-              break;
+-      }
+-
+       if ((pause & (1 << port)) &&
+           (pause & (1 << (port + PAUSESTS_TX_PAUSE_SHIFT)))) {
+               status->asym_pause = 1;
diff --git a/queue-4.1/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch b/queue-4.1/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch

new file mode 100644 (file)

index 0000000..4f0cea0
--- /dev/null
+++ b/queue-4.1/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch
@@ -0,0 +1,36 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 8 Sep 2015 20:06:41 -0700
+Subject: net: dsa: bcm_sf2: Fix 64-bits register writes
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 03679a14739a0d4c14b52ba65a69ff553bfba73b ]
+
+The macro to write 64-bits quantities to the 32-bits register swapped
+the value and offsets arguments, we want to preserve the ordering of the
+arguments with respect to how writel() is implemented for instance:
+value first, offset/base second.
+
+Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.h
++++ b/drivers/net/dsa/bcm_sf2.h
+@@ -112,8 +112,8 @@ static inline u64 name##_readq(struct bc
+       spin_unlock(&priv->indir_lock);                                 \
+       return (u64)indir << 32 | dir;                                  \
+ }                                                                     \
+-static inline void name##_writeq(struct bcm_sf2_priv *priv, u32 off,  \
+-                                                      u64 val)        \
++static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val,  \
++                                                      u32 off)        \
+ {                                                                     \
+       spin_lock(&priv->indir_lock);                                   \
+       reg_writel(priv, upper_32_bits(val), REG_DIR_DATA_WRITE);       \
diff --git a/queue-4.1/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch b/queue-4.1/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch

new file mode 100644 (file)

index 0000000..fd00ec2
--- /dev/null
+++ b/queue-4.1/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch
@@ -0,0 +1,74 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Sat, 5 Sep 2015 13:07:27 -0700
+Subject: net: dsa: bcm_sf2: Fix ageing conditions and operation
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 39797a279d62972cd914ef580fdfacb13e508bf8 ]
+
+The comparison check between cur_hw_state and hw_state is currently
+invalid because cur_hw_state is right shifted by G_MISTP_SHIFT, while
+hw_state is not, so we end-up comparing bits 2:0 with bits 7:5, which is
+going to cause an additional aging to occur. Fix this by not shifting
+cur_hw_state while reading it, but instead, mask the value with the
+appropriately shitfted bitmask.
+
+The other problem with the fast-ageing process is that we did not set
+the EN_AGE_DYNAMIC bit to request the ageing to occur for dynamically
+learned MAC addresses. Finally, write back 0 to the FAST_AGE_CTRL
+register to avoid leaving spurious bits sets from one operation to the
+other.
+
+Fixes: 12f460f23423 ("net: dsa: bcm_sf2: add HW bridging support")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -417,7 +417,7 @@ static int bcm_sf2_sw_fast_age_port(stru
+       core_writel(priv, port, CORE_FAST_AGE_PORT);
+ 
+       reg = core_readl(priv, CORE_FAST_AGE_CTRL);
+-      reg |= EN_AGE_PORT | FAST_AGE_STR_DONE;
++      reg |= EN_AGE_PORT | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE;
+       core_writel(priv, reg, CORE_FAST_AGE_CTRL);
+ 
+       do {
+@@ -431,6 +431,8 @@ static int bcm_sf2_sw_fast_age_port(stru
+       if (!timeout)
+               return -ETIMEDOUT;
+ 
++      core_writel(priv, 0, CORE_FAST_AGE_CTRL);
++
+       return 0;
+ }
+ 
+@@ -506,7 +508,7 @@ static int bcm_sf2_sw_br_set_stp_state(s
+       u32 reg;
+ 
+       reg = core_readl(priv, CORE_G_PCTL_PORT(port));
+-      cur_hw_state = reg >> G_MISTP_STATE_SHIFT;
++      cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT);
+ 
+       switch (state) {
+       case BR_STATE_DISABLED:
+@@ -530,10 +532,12 @@ static int bcm_sf2_sw_br_set_stp_state(s
+       }
+ 
+       /* Fast-age ARL entries if we are moving a port from Learning or
+-       * Forwarding state to Disabled, Blocking or Listening state
++       * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening
++       * state (hw_state)
+        */
+       if (cur_hw_state != hw_state) {
+-              if (cur_hw_state & 4 && !(hw_state & 4)) {
++              if (cur_hw_state >= G_MISTP_LEARN_STATE &&
++                  hw_state <= G_MISTP_LISTEN_STATE) {
+                       ret = bcm_sf2_sw_fast_age_port(ds, port);
+                       if (ret) {
+                               pr_err("%s: fast-ageing failed\n", __func__);
diff --git a/queue-4.1/net-eth-altera-fix-napi-poll_list-corruption.patch b/queue-4.1/net-eth-altera-fix-napi-poll_list-corruption.patch

new file mode 100644 (file)

index 0000000..e52f0cd
--- /dev/null
+++ b/queue-4.1/net-eth-altera-fix-napi-poll_list-corruption.patch
@@ -0,0 +1,32 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Atsushi Nemoto <nemoto@toshiba-tops.co.jp>
+Date: Wed, 2 Sep 2015 17:49:29 +0900
+Subject: net: eth: altera: fix napi poll_list corruption
+
+From: Atsushi Nemoto <nemoto@toshiba-tops.co.jp>
+
+[ Upstream commit 4548a697e4969d695047cebd6d9af5e2f6cc728e ]
+
+tse_poll() calls __napi_complete() with irq enabled.  This leads napi
+poll_list corruption and may stop all napi drivers working.
+Use napi_complete() instead of __napi_complete().
+
+Signed-off-by: Atsushi Nemoto <nemoto@toshiba-tops.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/altera/altera_tse_main.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/altera/altera_tse_main.c
++++ b/drivers/net/ethernet/altera/altera_tse_main.c
+@@ -511,8 +511,7 @@ static int tse_poll(struct napi_struct *
+ 
+       if (rxcomplete < budget) {
+ 
+-              napi_gro_flush(napi, false);
+-              __napi_complete(napi);
++              napi_complete(napi);
+ 
+               netdev_dbg(priv->dev,
+                          "NAPI Complete, did %d packets with budget %d\n",
diff --git a/queue-4.1/net-fec-clear-receive-interrupts-before-processing-a-packet.patch b/queue-4.1/net-fec-clear-receive-interrupts-before-processing-a-packet.patch

new file mode 100644 (file)

index 0000000..2bfad7a
--- /dev/null
+++ b/queue-4.1/net-fec-clear-receive-interrupts-before-processing-a-packet.patch
@@ -0,0 +1,39 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Russell King <rmk+kernel@arm.linux.org.uk>
+Date: Wed, 2 Sep 2015 17:24:14 +0800
+Subject: net: fec: clear receive interrupts before processing a packet
+
+From: Russell King <rmk+kernel@arm.linux.org.uk>
+
+[ Upstream commit ed63f1dcd5788d36f942fbcce350742385e3e18c ]
+
+The patch just to re-submit the patch "db3421c114cfa6326" because the
+patch "4d494cdc92b3b9a0" remove the change.
+
+Clear any pending receive interrupt before we process a pending packet.
+This helps to avoid any spurious interrupts being raised after we have
+fully cleaned the receive ring, while still allowing an interrupt to be
+raised if we receive another packet.
+
+The position of this is critical: we must do this prior to reading the
+next packet status to avoid potentially dropping an interrupt when a
+packet is still pending.
+
+Acked-by: Fugang Duan <B38611@freescale.com>
+Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -1396,6 +1396,7 @@ fec_enet_rx_queue(struct net_device *nde
+               if ((status & BD_ENET_RX_LAST) == 0)
+                       netdev_err(ndev, "rcv is not +last\n");
+ 
++              writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT);
+ 
+               /* Check for errors. */
+               if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
diff --git a/queue-4.1/net-ipv6-correct-pim6-mrt_lock-handling.patch b/queue-4.1/net-ipv6-correct-pim6-mrt_lock-handling.patch

new file mode 100644 (file)

index 0000000..22d7d6a
--- /dev/null
+++ b/queue-4.1/net-ipv6-correct-pim6-mrt_lock-handling.patch
@@ -0,0 +1,35 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Richard Laing <richard.laing@alliedtelesis.co.nz>
+Date: Thu, 3 Sep 2015 13:52:31 +1200
+Subject: net/ipv6: Correct PIM6 mrt_lock handling
+
+From: Richard Laing <richard.laing@alliedtelesis.co.nz>
+
+[ Upstream commit 25b4a44c19c83d98e8c0807a7ede07c1f28eab8b ]
+
+In the IPv6 multicast routing code the mrt_lock was not being released
+correctly in the MFC iterator, as a result adding or deleting a MIF would
+cause a hang because the mrt_lock could not be acquired.
+
+This fix is a copy of the code for the IPv4 case and ensures that the lock
+is released correctly.
+
+Signed-off-by: Richard Laing <richard.laing@alliedtelesis.co.nz>
+Acked-by: Cong Wang <cwang@twopensource.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6mr.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -550,7 +550,7 @@ static void ipmr_mfc_seq_stop(struct seq
+ 
+       if (it->cache == &mrt->mfc6_unres_queue)
+               spin_unlock_bh(&mfc_unres_lock);
+-      else if (it->cache == mrt->mfc6_cache_array)
++      else if (it->cache == &mrt->mfc6_cache_array[it->ct])
+               read_unlock(&mrt_lock);
+ }
+ 
diff --git a/queue-4.1/net-mlx4_en-really-allow-to-change-rss-key.patch b/queue-4.1/net-mlx4_en-really-allow-to-change-rss-key.patch

new file mode 100644 (file)

index 0000000..8537f2f
--- /dev/null
+++ b/queue-4.1/net-mlx4_en-really-allow-to-change-rss-key.patch
@@ -0,0 +1,35 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 15 Sep 2015 18:29:47 -0700
+Subject: net/mlx4_en: really allow to change RSS key
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upsteam commit 4671fc6d47e0a0108fe24a4d830347d6a6ef4aa7 ]
+
+When changing rss key, we do not want to overwrite user provided key
+by the one provided by netdev_rss_key_fill(), which is the host random
+key generated at boot time.
+
+Fixes: 947cbb0ac242 ("net/mlx4_en: Support for configurable RSS hash function")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Eyal Perry <eyalpe@mellanox.com>
+CC: Amir Vadai <amirv@mellanox.com>
+Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_rx.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+@@ -1256,8 +1256,6 @@ int mlx4_en_config_rss_steer(struct mlx4
+               rss_context->hash_fn = MLX4_RSS_HASH_TOP;
+               memcpy(rss_context->rss_key, priv->rss_key,
+                      MLX4_EN_RSS_KEY_SIZE);
+-              netdev_rss_key_fill(rss_context->rss_key,
+-                                  MLX4_EN_RSS_KEY_SIZE);
+       } else {
+               en_err(priv, "Unknown RSS hash function requested\n");
+               err = -EINVAL;
diff --git a/queue-4.1/net-phy-fixed_phy-handle-link-down-case.patch b/queue-4.1/net-phy-fixed_phy-handle-link-down-case.patch

new file mode 100644 (file)

index 0000000..a1e1812
--- /dev/null
+++ b/queue-4.1/net-phy-fixed_phy-handle-link-down-case.patch
@@ -0,0 +1,65 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Stas Sergeev <stsp@list.ru>
+Date: Mon, 20 Jul 2015 17:49:56 -0700
+Subject: net: phy: fixed_phy: handle link-down case
+
+From: Stas Sergeev <stsp@list.ru>
+
+[ Upstream 868a4215be9a6d80548ccb74763b883dc99d32a2 in net-next tree,
+  will be pushed to Linus very soon. ]
+
+fixed_phy_register() currently hardcodes the fixed PHY link to 1, and
+expects to find a "speed" parameter to provide correct information
+towards the fixed PHY consumer.
+
+In a subsequent change, where we allow "managed" (e.g: (RS)GMII in-band
+status auto-negotiation) fixed PHYs, none of these parameters can be
+provided since they will be auto-negotiated, hence, we just provide a
+zero-initialized fixed_phy_status to fixed_phy_register() which makes it
+fail when we call fixed_phy_update_regs() since status.speed = 0 which
+makes us hit the "default" label and error out.
+
+Without this change, we would also see potentially inconsistent
+speed/duplex parameters for fixed PHYs when the link is DOWN.
+
+CC: netdev@vger.kernel.org
+CC: linux-kernel@vger.kernel.org
+Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
+[florian: add more background to why this is correct and desirable]
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/fixed_phy.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/phy/fixed_phy.c
++++ b/drivers/net/phy/fixed_phy.c
+@@ -52,6 +52,10 @@ static int fixed_phy_update_regs(struct
+       u16 lpagb = 0;
+       u16 lpa = 0;
+ 
++      if (!fp->status.link)
++              goto done;
++      bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE;
++
+       if (fp->status.duplex) {
+               bmcr |= BMCR_FULLDPLX;
+ 
+@@ -96,15 +100,13 @@ static int fixed_phy_update_regs(struct
+               }
+       }
+ 
+-      if (fp->status.link)
+-              bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE;
+-
+       if (fp->status.pause)
+               lpa |= LPA_PAUSE_CAP;
+ 
+       if (fp->status.asym_pause)
+               lpa |= LPA_PAUSE_ASYM;
+ 
++done:
+       fp->regs[MII_PHYSID1] = 0;
+       fp->regs[MII_PHYSID2] = 0;
+ 
diff --git a/queue-4.1/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch b/queue-4.1/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch

new file mode 100644 (file)

index 0000000..172e1fa
--- /dev/null
+++ b/queue-4.1/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch
@@ -0,0 +1,93 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Tue, 22 Sep 2015 17:01:11 -0700
+Subject: net: revert "net_sched: move tp->root allocation into fw_init()"
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit d8aecb10115497f6cdf841df8c88ebb3ba25fa28 ]
+
+fw filter uses tp->root==NULL to check if it is the old method,
+so it doesn't need allocation at all in this case. This patch
+reverts the offending commit and adds some comments for old
+method to make it obvious.
+
+Fixes: 33f8b9ecdb15 ("net_sched: move tp->root allocation into fw_init()")
+Reported-by: Akshat Kakkar <akshat.1984@gmail.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_fw.c |   30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+--- a/net/sched/cls_fw.c
++++ b/net/sched/cls_fw.c
+@@ -33,7 +33,6 @@
+ 
+ struct fw_head {
+       u32                     mask;
+-      bool                    mask_set;
+       struct fw_filter __rcu  *ht[HTSIZE];
+       struct rcu_head         rcu;
+ };
+@@ -84,7 +83,7 @@ static int fw_classify(struct sk_buff *s
+                       }
+               }
+       } else {
+-              /* old method */
++              /* Old method: classify the packet using its skb mark. */
+               if (id && (TC_H_MAJ(id) == 0 ||
+                          !(TC_H_MAJ(id ^ tp->q->handle)))) {
+                       res->classid = id;
+@@ -114,14 +113,9 @@ static unsigned long fw_get(struct tcf_p
+ 
+ static int fw_init(struct tcf_proto *tp)
+ {
+-      struct fw_head *head;
+-
+-      head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
+-      if (head == NULL)
+-              return -ENOBUFS;
+-
+-      head->mask_set = false;
+-      rcu_assign_pointer(tp->root, head);
++      /* We don't allocate fw_head here, because in the old method
++       * we don't need it at all.
++       */
+       return 0;
+ }
+ 
+@@ -252,7 +246,7 @@ static int fw_change(struct net *net, st
+       int err;
+ 
+       if (!opt)
+-              return handle ? -EINVAL : 0;
++              return handle ? -EINVAL : 0; /* Succeed if it is old method. */
+ 
+       err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy);
+       if (err < 0)
+@@ -302,11 +296,17 @@ static int fw_change(struct net *net, st
+       if (!handle)
+               return -EINVAL;
+ 
+-      if (!head->mask_set) {
+-              head->mask = 0xFFFFFFFF;
++      if (!head) {
++              u32 mask = 0xFFFFFFFF;
+               if (tb[TCA_FW_MASK])
+-                      head->mask = nla_get_u32(tb[TCA_FW_MASK]);
+-              head->mask_set = true;
++                      mask = nla_get_u32(tb[TCA_FW_MASK]);
++
++              head = kzalloc(sizeof(*head), GFP_KERNEL);
++              if (!head)
++                      return -ENOBUFS;
++              head->mask = mask;
++
++              rcu_assign_pointer(tp->root, head);
+       }
+ 
+       f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
diff --git a/queue-4.1/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch b/queue-4.1/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch

new file mode 100644 (file)

index 0000000..09ef4c9
--- /dev/null
+++ b/queue-4.1/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch
@@ -0,0 +1,94 @@
+From foo@baz Wed Sep 30 05:22:23 CEST 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Fri, 18 Sep 2015 19:16:50 +0800
+Subject: netlink: Fix autobind race condition that leads to zero port ID
+Status: RO
+Content-Length: 3101
+Lines: 90
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit 1f770c0a09da855a2b51af6d19de97fb955eca85 ]
+
+The commit c0bb07df7d981e4091432754e30c9c720e2c0c78 ("netlink:
+Reset portid after netlink_insert failure") introduced a race
+condition where if two threads try to autobind the same socket
+one of them may end up with a zero port ID.  This led to kernel
+deadlocks that were observed by multiple people.
+
+This patch reverts that commit and instead fixes it by introducing
+a separte rhash_portid variable so that the real portid is only set
+after the socket has been successfully hashed.
+
+Fixes: c0bb07df7d98 ("netlink: Reset portid after netlink_insert failure")
+Reported-by: Tejun Heo <tj@kernel.org>
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   11 ++++++-----
+ net/netlink/af_netlink.h |    1 +
+ 2 files changed, 7 insertions(+), 5 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -1017,7 +1017,7 @@ static inline int netlink_compare(struct
+       const struct netlink_compare_arg *x = arg->key;
+       const struct netlink_sock *nlk = ptr;
+ 
+-      return nlk->portid != x->portid ||
++      return nlk->rhash_portid != x->portid ||
+              !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet));
+ }
+ 
+@@ -1043,7 +1043,7 @@ static int __netlink_insert(struct netli
+ {
+       struct netlink_compare_arg arg;
+ 
+-      netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid);
++      netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->rhash_portid);
+       return rhashtable_lookup_insert_key(&table->hash, &arg,
+                                           &nlk_sk(sk)->node,
+                                           netlink_rhashtable_params);
+@@ -1105,7 +1105,7 @@ static int netlink_insert(struct sock *s
+           unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
+               goto err;
+ 
+-      nlk_sk(sk)->portid = portid;
++      nlk_sk(sk)->rhash_portid = portid;
+       sock_hold(sk);
+ 
+       err = __netlink_insert(table, sk);
+@@ -1117,10 +1117,11 @@ static int netlink_insert(struct sock *s
+                       err = -EOVERFLOW;
+               if (err == -EEXIST)
+                       err = -EADDRINUSE;
+-              nlk_sk(sk)->portid = 0;
+               sock_put(sk);
+       }
+ 
++      nlk_sk(sk)->portid = portid;
++
+ err:
+       release_sock(sk);
+       return err;
+@@ -3167,7 +3168,7 @@ static inline u32 netlink_hash(const voi
+       const struct netlink_sock *nlk = data;
+       struct netlink_compare_arg arg;
+ 
+-      netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid);
++      netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->rhash_portid);
+       return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed);
+ }
+ 
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -25,6 +25,7 @@ struct netlink_ring {
+ struct netlink_sock {
+       /* struct sock has to be the first member of netlink_sock */
+       struct sock             sk;
++      u32                     rhash_portid;
+       u32                     portid;
+       u32                     dst_portid;
+       u32                     dst_group;
diff --git a/queue-4.1/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch b/queue-4.1/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch

new file mode 100644 (file)

index 0000000..2d57a21
--- /dev/null
+++ b/queue-4.1/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch
@@ -0,0 +1,116 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 10 Sep 2015 20:05:46 +0200
+Subject: netlink, mmap: transform mmap skb into full skb on taps
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 1853c949646005b5959c483becde86608f548f24 ]
+
+Ken-ichirou reported that running netlink in mmap mode for receive in
+combination with nlmon will throw a NULL pointer dereference in
+__kfree_skb() on nlmon_xmit(), in my case I can also trigger an "unable
+to handle kernel paging request". The problem is the skb_clone() in
+__netlink_deliver_tap_skb() for skbs that are mmaped.
+
+I.e. the cloned skb doesn't have a destructor, whereas the mmap netlink
+skb has it pointed to netlink_skb_destructor(), set in the handler
+netlink_ring_setup_skb(). There, skb->head is being set to NULL, so
+that in such cases, __kfree_skb() doesn't perform a skb_release_data()
+via skb_release_all(), where skb->head is possibly being freed through
+kfree(head) into slab allocator, although netlink mmap skb->head points
+to the mmap buffer. Similarly, the same has to be done also for large
+netlink skbs where the data area is vmalloced. Therefore, as discussed,
+make a copy for these rather rare cases for now. This fixes the issue
+on my and Ken-ichirou's test-cases.
+
+Reference: http://thread.gmane.org/gmane.linux.network/371129
+Fixes: bcbde0d449ed ("net: netlink: virtual tap device management")
+Reported-by: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   30 +++++++++++++++++++++++-------
+ net/netlink/af_netlink.h |    9 +++++++++
+ 2 files changed, 32 insertions(+), 7 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -123,6 +123,24 @@ static inline u32 netlink_group_mask(u32
+       return group ? 1 << (group - 1) : 0;
+ }
+ 
++static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
++                                         gfp_t gfp_mask)
++{
++      unsigned int len = skb_end_offset(skb);
++      struct sk_buff *new;
++
++      new = alloc_skb(len, gfp_mask);
++      if (new == NULL)
++              return NULL;
++
++      NETLINK_CB(new).portid = NETLINK_CB(skb).portid;
++      NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
++      NETLINK_CB(new).creds = NETLINK_CB(skb).creds;
++
++      memcpy(skb_put(new, len), skb->data, len);
++      return new;
++}
++
+ int netlink_add_tap(struct netlink_tap *nt)
+ {
+       if (unlikely(nt->dev->type != ARPHRD_NETLINK))
+@@ -204,7 +222,11 @@ static int __netlink_deliver_tap_skb(str
+       int ret = -ENOMEM;
+ 
+       dev_hold(dev);
+-      nskb = skb_clone(skb, GFP_ATOMIC);
++
++      if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
++              nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
++      else
++              nskb = skb_clone(skb, GFP_ATOMIC);
+       if (nskb) {
+               nskb->dev = dev;
+               nskb->protocol = htons((u16) sk->sk_protocol);
+@@ -276,11 +298,6 @@ static void netlink_rcv_wake(struct sock
+ }
+ 
+ #ifdef CONFIG_NETLINK_MMAP
+-static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
+-{
+-      return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
+-}
+-
+ static bool netlink_rx_is_mmaped(struct sock *sk)
+ {
+       return nlk_sk(sk)->rx_ring.pg_vec != NULL;
+@@ -832,7 +849,6 @@ static void netlink_ring_set_copied(stru
+ }
+ 
+ #else /* CONFIG_NETLINK_MMAP */
+-#define netlink_skb_is_mmaped(skb)    false
+ #define netlink_rx_is_mmaped(sk)      false
+ #define netlink_tx_is_mmaped(sk)      false
+ #define netlink_mmap                  sock_no_mmap
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -59,6 +59,15 @@ static inline struct netlink_sock *nlk_s
+       return container_of(sk, struct netlink_sock, sk);
+ }
+ 
++static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
++{
++#ifdef CONFIG_NETLINK_MMAP
++      return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
++#else
++      return false;
++#endif /* CONFIG_NETLINK_MMAP */
++}
++
+ struct netlink_table {
+       struct rhashtable       hash;
+       struct hlist_head       mc_list;
diff --git a/queue-4.1/netlink-replace-rhash_portid-with-bound.patch b/queue-4.1/netlink-replace-rhash_portid-with-bound.patch

new file mode 100644 (file)

index 0000000..178bec8
--- /dev/null
+++ b/queue-4.1/netlink-replace-rhash_portid-with-bound.patch
@@ -0,0 +1,248 @@
+From foo@baz Wed Sep 30 05:22:23 CEST 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 22 Sep 2015 11:38:56 +0800
+Subject: netlink: Replace rhash_portid with bound
+Status: RO
+Content-Length: 8459
+Lines: 244
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit da314c9923fed553a007785a901fd395b7eb6c19 ]
+
+On Mon, Sep 21, 2015 at 02:20:22PM -0400, Tejun Heo wrote:
+>
+> store_release and load_acquire are different from the usual memory
+> barriers and can't be paired this way.  You have to pair store_release
+> and load_acquire.  Besides, it isn't a particularly good idea to
+
+OK I've decided to drop the acquire/release helpers as they don't
+help us at all and simply pessimises the code by using full memory
+barriers (on some architectures) where only a write or read barrier
+is needed.
+
+> depend on memory barriers embedded in other data structures like the
+> above.  Here, especially, rhashtable_insert() would have write barrier
+> *before* the entry is hashed not necessarily *after*, which means that
+> in the above case, a socket which appears to have set bound to a
+> reader might not visible when the reader tries to look up the socket
+> on the hashtable.
+
+But you are right we do need an explicit write barrier here to
+ensure that the hashing is visible.
+
+> There's no reason to be overly smart here.  This isn't a crazy hot
+> path, write barriers tend to be very cheap, store_release more so.
+> Please just do smp_store_release() and note what it's paired with.
+
+It's not about being overly smart.  It's about actually understanding
+what's going on with the code.  I've seen too many instances of
+people simply sprinkling synchronisation primitives around without
+any knowledge of what is happening underneath, which is just a recipe
+for creating hard-to-debug races.
+
+> > @@ -1539,7 +1546,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
+> >            }
+> >    }
+> >
+> > -  if (!nlk->portid) {
+> > +  if (!nlk->bound) {
+>
+> I don't think you can skip load_acquire here just because this is the
+> second deref of the variable.  That doesn't change anything.  Race
+> condition could still happen between the first and second tests and
+> skipping the second would lead to the same kind of bug.
+
+The reason this one is OK is because we do not use nlk->portid or
+try to get nlk from the hash table before we return to user-space.
+
+However, there is a real bug here that none of these acquire/release
+helpers discovered.  The two bound tests here used to be a single
+one.  Now that they are separate it is entirely possible for another
+thread to come in the middle and bind the socket.  So we need to
+repeat the portid check in order to maintain consistency.
+
+> > @@ -1587,7 +1594,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
+> >        !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
+> >            return -EPERM;
+> >
+> > -  if (!nlk->portid)
+> > +  if (!nlk->bound)
+>
+> Don't we need load_acquire here too?  Is this path holding a lock
+> which makes that unnecessary?
+
+Ditto.
+
+---8<---
+The commit 1f770c0a09da855a2b51af6d19de97fb955eca85 ("netlink:
+Fix autobind race condition that leads to zero port ID") created
+some new races that can occur due to inconcsistencies between the
+two port IDs.
+
+Tejun is right that a barrier is unavoidable.  Therefore I am
+reverting to the original patch that used a boolean to indicate
+that a user netlink socket has been bound.
+
+Barriers have been added where necessary to ensure that a valid
+portid and the hashed socket is visible.
+
+I have also changed netlink_insert to only return EBUSY if the
+socket is bound to a portid different to the requested one.  This
+combined with only reading nlk->bound once in netlink_bind fixes
+a race where two threads that bind the socket at the same time
+with different port IDs may both succeed.
+
+Fixes: 1f770c0a09da ("netlink: Fix autobind race condition that leads to zero port ID")
+Reported-by: Tejun Heo <tj@kernel.org>
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Nacked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   39 ++++++++++++++++++++++++++++-----------
+ net/netlink/af_netlink.h |    2 +-
+ 2 files changed, 29 insertions(+), 12 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -1017,7 +1017,7 @@ static inline int netlink_compare(struct
+       const struct netlink_compare_arg *x = arg->key;
+       const struct netlink_sock *nlk = ptr;
+ 
+-      return nlk->rhash_portid != x->portid ||
++      return nlk->portid != x->portid ||
+              !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet));
+ }
+ 
+@@ -1043,7 +1043,7 @@ static int __netlink_insert(struct netli
+ {
+       struct netlink_compare_arg arg;
+ 
+-      netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->rhash_portid);
++      netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid);
+       return rhashtable_lookup_insert_key(&table->hash, &arg,
+                                           &nlk_sk(sk)->node,
+                                           netlink_rhashtable_params);
+@@ -1096,8 +1096,8 @@ static int netlink_insert(struct sock *s
+ 
+       lock_sock(sk);
+ 
+-      err = -EBUSY;
+-      if (nlk_sk(sk)->portid)
++      err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY;
++      if (nlk_sk(sk)->bound)
+               goto err;
+ 
+       err = -ENOMEM;
+@@ -1105,7 +1105,7 @@ static int netlink_insert(struct sock *s
+           unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
+               goto err;
+ 
+-      nlk_sk(sk)->rhash_portid = portid;
++      nlk_sk(sk)->portid = portid;
+       sock_hold(sk);
+ 
+       err = __netlink_insert(table, sk);
+@@ -1120,7 +1120,9 @@ static int netlink_insert(struct sock *s
+               sock_put(sk);
+       }
+ 
+-      nlk_sk(sk)->portid = portid;
++      /* We need to ensure that the socket is hashed and visible. */
++      smp_wmb();
++      nlk_sk(sk)->bound = portid;
+ 
+ err:
+       release_sock(sk);
+@@ -1501,6 +1503,7 @@ static int netlink_bind(struct socket *s
+       struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
+       int err;
+       long unsigned int groups = nladdr->nl_groups;
++      bool bound;
+ 
+       if (addr_len < sizeof(struct sockaddr_nl))
+               return -EINVAL;
+@@ -1517,9 +1520,14 @@ static int netlink_bind(struct socket *s
+                       return err;
+       }
+ 
+-      if (nlk->portid)
++      bound = nlk->bound;
++      if (bound) {
++              /* Ensure nlk->portid is up-to-date. */
++              smp_rmb();
++
+               if (nladdr->nl_pid != nlk->portid)
+                       return -EINVAL;
++      }
+ 
+       if (nlk->netlink_bind && groups) {
+               int group;
+@@ -1535,7 +1543,10 @@ static int netlink_bind(struct socket *s
+               }
+       }
+ 
+-      if (!nlk->portid) {
++      /* No need for barriers here as we return to user-space without
++       * using any of the bound attributes.
++       */
++      if (!bound) {
+               err = nladdr->nl_pid ?
+                       netlink_insert(sk, nladdr->nl_pid) :
+                       netlink_autobind(sock);
+@@ -1583,7 +1594,10 @@ static int netlink_connect(struct socket
+           !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
+               return -EPERM;
+ 
+-      if (!nlk->portid)
++      /* No need for barriers here as we return to user-space without
++       * using any of the bound attributes.
++       */
++      if (!nlk->bound)
+               err = netlink_autobind(sock);
+ 
+       if (err == 0) {
+@@ -2340,10 +2354,13 @@ static int netlink_sendmsg(struct socket
+               dst_group = nlk->dst_group;
+       }
+ 
+-      if (!nlk->portid) {
++      if (!nlk->bound) {
+               err = netlink_autobind(sock);
+               if (err)
+                       goto out;
++      } else {
++              /* Ensure nlk is hashed and visible. */
++              smp_rmb();
+       }
+ 
+       /* It's a really convoluted way for userland to ask for mmaped
+@@ -3168,7 +3185,7 @@ static inline u32 netlink_hash(const voi
+       const struct netlink_sock *nlk = data;
+       struct netlink_compare_arg arg;
+ 
+-      netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->rhash_portid);
++      netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid);
+       return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed);
+ }
+ 
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -25,7 +25,6 @@ struct netlink_ring {
+ struct netlink_sock {
+       /* struct sock has to be the first member of netlink_sock */
+       struct sock             sk;
+-      u32                     rhash_portid;
+       u32                     portid;
+       u32                     dst_portid;
+       u32                     dst_group;
+@@ -36,6 +35,7 @@ struct netlink_sock {
+       unsigned long           state;
+       size_t                  max_recvmsg_len;
+       wait_queue_head_t       wait;
++      bool                    bound;
+       bool                    cb_running;
+       struct netlink_callback cb;
+       struct mutex            *cb_mutex;
diff --git a/queue-4.1/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch b/queue-4.1/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch

new file mode 100644 (file)

index 0000000..6e1d0f6
--- /dev/null
+++ b/queue-4.1/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch
@@ -0,0 +1,106 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Stas Sergeev <stsp@list.ru>
+Date: Mon, 20 Jul 2015 17:49:57 -0700
+Subject: of_mdio: add new DT property 'managed' to specify the PHY management type
+
+From: Stas Sergeev <stsp@list.ru>
+
+[ Upstream commit 4cba5c2103657d43d0886e4cff8004d95a3d0def in net-next tree,
+  will be pushed to Linus very soon. ]
+
+Currently the PHY management type is selected by the MAC driver arbitrary.
+The decision is based on the presence of the "fixed-link" node and on a
+will of the driver's authors.
+This caused a regression recently, when mvneta driver suddenly started
+to use the in-band status for auto-negotiation on fixed links.
+It appears the auto-negotiation may not work when expected by the MAC driver.
+Sebastien Rannou explains:
+<< Yes, I confirm that my HW does not generate an in-band status. AFAIK, it's
+a PHY that aggregates 4xSGMIIs to 1xQSGMII ; the MAC side of the PHY (with
+inband status) is connected to the switch through QSGMII, and in this context
+we are on the media side of the PHY. >>
+https://lkml.org/lkml/2015/7/10/206
+
+This patch introduces the new string property 'managed' that allows
+the user to set the management type explicitly.
+The supported values are:
+"auto" - default. Uses either MDIO or nothing, depending on the presence
+of the fixed-link node
+"in-band-status" - use in-band status
+
+Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
+
+CC: Rob Herring <robh+dt@kernel.org>
+CC: Pawel Moll <pawel.moll@arm.com>
+CC: Mark Rutland <mark.rutland@arm.com>
+CC: Ian Campbell <ijc+devicetree@hellion.org.uk>
+CC: Kumar Gala <galak@codeaurora.org>
+CC: Florian Fainelli <f.fainelli@gmail.com>
+CC: Grant Likely <grant.likely@linaro.org>
+CC: devicetree@vger.kernel.org
+CC: linux-kernel@vger.kernel.org
+CC: netdev@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/devicetree/bindings/net/ethernet.txt |    4 ++++
+ drivers/of/of_mdio.c                               |   19 +++++++++++++++++--
+ 2 files changed, 21 insertions(+), 2 deletions(-)
+
+--- a/Documentation/devicetree/bindings/net/ethernet.txt
++++ b/Documentation/devicetree/bindings/net/ethernet.txt
+@@ -25,7 +25,11 @@ The following properties are common to t
+   flow control thresholds.
+ - tx-fifo-depth: the size of the controller's transmit fifo in bytes. This
+   is used for components that can have configurable fifo sizes.
++- managed: string, specifies the PHY management type. Supported values are:
++  "auto", "in-band-status". "auto" is the default, it usess MDIO for
++  management if fixed-link is not specified.
+ 
+ Child nodes of the Ethernet controller are typically the individual PHY devices
+ connected via the MDIO bus (sometimes the MDIO bus controller is separate).
+ They are described in the phy.txt file in this same directory.
++For non-MDIO PHY management see fixed-link.txt.
+--- a/drivers/of/of_mdio.c
++++ b/drivers/of/of_mdio.c
+@@ -263,7 +263,8 @@ EXPORT_SYMBOL(of_phy_attach);
+ bool of_phy_is_fixed_link(struct device_node *np)
+ {
+       struct device_node *dn;
+-      int len;
++      int len, err;
++      const char *managed;
+ 
+       /* New binding */
+       dn = of_get_child_by_name(np, "fixed-link");
+@@ -272,6 +273,10 @@ bool of_phy_is_fixed_link(struct device_
+               return true;
+       }
+ 
++      err = of_property_read_string(np, "managed", &managed);
++      if (err == 0 && strcmp(managed, "auto") != 0)
++              return true;
++
+       /* Old binding */
+       if (of_get_property(np, "fixed-link", &len) &&
+           len == (5 * sizeof(__be32)))
+@@ -286,8 +291,18 @@ int of_phy_register_fixed_link(struct de
+       struct fixed_phy_status status = {};
+       struct device_node *fixed_link_node;
+       const __be32 *fixed_link_prop;
+-      int len;
++      int len, err;
+       struct phy_device *phy;
++      const char *managed;
++
++      err = of_property_read_string(np, "managed", &managed);
++      if (err == 0) {
++              if (strcmp(managed, "in-band-status") == 0) {
++                      /* status is zeroed, namely its .link member */
++                      phy = fixed_phy_register(PHY_POLL, &status, np);
++                      return IS_ERR(phy) ? PTR_ERR(phy) : 0;
++              }
++      }
+ 
+       /* New binding */
+       fixed_link_node = of_get_child_by_name(np, "fixed-link");
diff --git a/queue-4.1/openvswitch-zero-flows-on-allocation.patch b/queue-4.1/openvswitch-zero-flows-on-allocation.patch

new file mode 100644 (file)

index 0000000..dc00273
--- /dev/null
+++ b/queue-4.1/openvswitch-zero-flows-on-allocation.patch
@@ -0,0 +1,116 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Jesse Gross <jesse@nicira.com>
+Date: Mon, 21 Sep 2015 20:21:20 -0700
+Subject: openvswitch: Zero flows on allocation.
+
+From: Jesse Gross <jesse@nicira.com>
+
+[ Upstream commit ae5f2fb1d51fa128a460bcfbe3c56d7ab8bf6a43 ]
+
+When support for megaflows was introduced, OVS needed to start
+installing flows with a mask applied to them. Since masking is an
+expensive operation, OVS also had an optimization that would only
+take the parts of the flow keys that were covered by a non-zero
+mask. The values stored in the remaining pieces should not matter
+because they are masked out.
+
+While this works fine for the purposes of matching (which must always
+look at the mask), serialization to netlink can be problematic. Since
+the flow and the mask are serialized separately, the uninitialized
+portions of the flow can be encoded with whatever values happen to be
+present.
+
+In terms of functionality, this has little effect since these fields
+will be masked out by definition. However, it leaks kernel memory to
+userspace, which is a potential security vulnerability. It is also
+possible that other code paths could look at the masked key and get
+uninitialized data, although this does not currently appear to be an
+issue in practice.
+
+This removes the mask optimization for flows that are being installed.
+This was always intended to be the case as the mask optimizations were
+really targetting per-packet flow operations.
+
+Fixes: 03f0d916 ("openvswitch: Mega flow implementation")
+Signed-off-by: Jesse Gross <jesse@nicira.com>
+Acked-by: Pravin B Shelar <pshelar@nicira.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/datapath.c   |    4 ++--
+ net/openvswitch/flow_table.c |   23 ++++++++++++-----------
+ net/openvswitch/flow_table.h |    2 +-
+ 3 files changed, 15 insertions(+), 14 deletions(-)
+
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -906,7 +906,7 @@ static int ovs_flow_cmd_new(struct sk_bu
+       if (error)
+               goto err_kfree_flow;
+ 
+-      ovs_flow_mask_key(&new_flow->key, &key, &mask);
++      ovs_flow_mask_key(&new_flow->key, &key, true, &mask);
+ 
+       /* Extract flow identifier. */
+       error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
+@@ -1033,7 +1033,7 @@ static struct sw_flow_actions *get_flow_
+       struct sw_flow_key masked_key;
+       int error;
+ 
+-      ovs_flow_mask_key(&masked_key, key, mask);
++      ovs_flow_mask_key(&masked_key, key, true, mask);
+       error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
+       if (error) {
+               OVS_NLERR(log,
+--- a/net/openvswitch/flow_table.c
++++ b/net/openvswitch/flow_table.c
+@@ -56,20 +56,21 @@ static u16 range_n_bytes(const struct sw
+ }
+ 
+ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+-                     const struct sw_flow_mask *mask)
++                     bool full, const struct sw_flow_mask *mask)
+ {
+-      const long *m = (const long *)((const u8 *)&mask->key +
+-                              mask->range.start);
+-      const long *s = (const long *)((const u8 *)src +
+-                              mask->range.start);
+-      long *d = (long *)((u8 *)dst + mask->range.start);
++      int start = full ? 0 : mask->range.start;
++      int len = full ? sizeof *dst : range_n_bytes(&mask->range);
++      const long *m = (const long *)((const u8 *)&mask->key + start);
++      const long *s = (const long *)((const u8 *)src + start);
++      long *d = (long *)((u8 *)dst + start);
+       int i;
+ 
+-      /* The memory outside of the 'mask->range' are not set since
+-       * further operations on 'dst' only uses contents within
+-       * 'mask->range'.
++      /* If 'full' is true then all of 'dst' is fully initialized. Otherwise,
++       * if 'full' is false the memory outside of the 'mask->range' is left
++       * uninitialized. This can be used as an optimization when further
++       * operations on 'dst' only use contents within 'mask->range'.
+        */
+-      for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
++      for (i = 0; i < len; i += sizeof(long))
+               *d++ = *s++ & *m++;
+ }
+ 
+@@ -473,7 +474,7 @@ static struct sw_flow *masked_flow_looku
+       u32 hash;
+       struct sw_flow_key masked_key;
+ 
+-      ovs_flow_mask_key(&masked_key, unmasked, mask);
++      ovs_flow_mask_key(&masked_key, unmasked, false, mask);
+       hash = flow_hash(&masked_key, &mask->range);
+       head = find_bucket(ti, hash);
+       hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+--- a/net/openvswitch/flow_table.h
++++ b/net/openvswitch/flow_table.h
+@@ -86,5 +86,5 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid
+ bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
+ 
+ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+-                     const struct sw_flow_mask *mask);
++                     bool full, const struct sw_flow_mask *mask);
+ #endif /* flow_table.h */
diff --git a/queue-4.1/sctp-fix-race-on-protocol-netns-initialization.patch b/queue-4.1/sctp-fix-race-on-protocol-netns-initialization.patch

new file mode 100644 (file)

index 0000000..1802430
--- /dev/null
+++ b/queue-4.1/sctp-fix-race-on-protocol-netns-initialization.patch
@@ -0,0 +1,232 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Date: Thu, 10 Sep 2015 17:31:15 -0300
+Subject: sctp: fix race on protocol/netns initialization
+
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+
+[ Upstream commit 8e2d61e0aed2b7c4ecb35844fe07e0b2b762dee4 ]
+
+Consider sctp module is unloaded and is being requested because an user
+is creating a sctp socket.
+
+During initialization, sctp will add the new protocol type and then
+initialize pernet subsys:
+
+        status = sctp_v4_protosw_init();
+        if (status)
+                goto err_protosw_init;
+
+        status = sctp_v6_protosw_init();
+        if (status)
+                goto err_v6_protosw_init;
+
+        status = register_pernet_subsys(&sctp_net_ops);
+
+The problem is that after those calls to sctp_v{4,6}_protosw_init(), it
+is possible for userspace to create SCTP sockets like if the module is
+already fully loaded. If that happens, one of the possible effects is
+that we will have readers for net->sctp.local_addr_list list earlier
+than expected and sctp_net_init() does not take precautions while
+dealing with that list, leading to a potential panic but not limited to
+that, as sctp_sock_init() will copy a bunch of blank/partially
+initialized values from net->sctp.
+
+The race happens like this:
+
+     CPU 0                           |  CPU 1
+  socket()                           |
+   __sock_create                     | socket()
+    inet_create                      |  __sock_create
+     list_for_each_entry_rcu(        |
+        answer, &inetsw[sock->type], |
+        list) {                      |   inet_create
+      /* no hits */                  |
+     if (unlikely(err)) {            |
+      ...                            |
+      request_module()               |
+      /* socket creation is blocked  |
+       * the module is fully loaded  |
+       */                            |
+       sctp_init                     |
+        sctp_v4_protosw_init         |
+         inet_register_protosw       |
+          list_add_rcu(&p->list,     |
+                       last_perm);   |
+                                     |  list_for_each_entry_rcu(
+                                     |     answer, &inetsw[sock->type],
+        sctp_v6_protosw_init         |     list) {
+                                     |     /* hit, so assumes protocol
+                                     |      * is already loaded
+                                     |      */
+                                     |  /* socket creation continues
+                                     |   * before netns is initialized
+                                     |   */
+        register_pernet_subsys       |
+
+Simply inverting the initialization order between
+register_pernet_subsys() and sctp_v4_protosw_init() is not possible
+because register_pernet_subsys() will create a control sctp socket, so
+the protocol must be already visible by then. Deferring the socket
+creation to a work-queue is not good specially because we loose the
+ability to handle its errors.
+
+So, as suggested by Vlad, the fix is to split netns initialization in
+two moments: defaults and control socket, so that the defaults are
+already loaded by when we register the protocol, while control socket
+initialization is kept at the same moment it is today.
+
+Fixes: 4db67e808640 ("sctp: Make the address lists per network namespace")
+Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
+Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/protocol.c |   64 +++++++++++++++++++++++++++++++++-------------------
+ 1 file changed, 41 insertions(+), 23 deletions(-)
+
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -1166,7 +1166,7 @@ static void sctp_v4_del_protocol(void)
+       unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+ }
+ 
+-static int __net_init sctp_net_init(struct net *net)
++static int __net_init sctp_defaults_init(struct net *net)
+ {
+       int status;
+ 
+@@ -1259,12 +1259,6 @@ static int __net_init sctp_net_init(stru
+ 
+       sctp_dbg_objcnt_init(net);
+ 
+-      /* Initialize the control inode/socket for handling OOTB packets.  */
+-      if ((status = sctp_ctl_sock_init(net))) {
+-              pr_err("Failed to initialize the SCTP control sock\n");
+-              goto err_ctl_sock_init;
+-      }
+-
+       /* Initialize the local address list. */
+       INIT_LIST_HEAD(&net->sctp.local_addr_list);
+       spin_lock_init(&net->sctp.local_addr_lock);
+@@ -1280,9 +1274,6 @@ static int __net_init sctp_net_init(stru
+ 
+       return 0;
+ 
+-err_ctl_sock_init:
+-      sctp_dbg_objcnt_exit(net);
+-      sctp_proc_exit(net);
+ err_init_proc:
+       cleanup_sctp_mibs(net);
+ err_init_mibs:
+@@ -1291,15 +1282,12 @@ err_sysctl_register:
+       return status;
+ }
+ 
+-static void __net_exit sctp_net_exit(struct net *net)
++static void __net_exit sctp_defaults_exit(struct net *net)
+ {
+       /* Free the local address list */
+       sctp_free_addr_wq(net);
+       sctp_free_local_addr_list(net);
+ 
+-      /* Free the control endpoint.  */
+-      inet_ctl_sock_destroy(net->sctp.ctl_sock);
+-
+       sctp_dbg_objcnt_exit(net);
+ 
+       sctp_proc_exit(net);
+@@ -1307,9 +1295,32 @@ static void __net_exit sctp_net_exit(str
+       sctp_sysctl_net_unregister(net);
+ }
+ 
+-static struct pernet_operations sctp_net_ops = {
+-      .init = sctp_net_init,
+-      .exit = sctp_net_exit,
++static struct pernet_operations sctp_defaults_ops = {
++      .init = sctp_defaults_init,
++      .exit = sctp_defaults_exit,
++};
++
++static int __net_init sctp_ctrlsock_init(struct net *net)
++{
++      int status;
++
++      /* Initialize the control inode/socket for handling OOTB packets.  */
++      status = sctp_ctl_sock_init(net);
++      if (status)
++              pr_err("Failed to initialize the SCTP control sock\n");
++
++      return status;
++}
++
++static void __net_init sctp_ctrlsock_exit(struct net *net)
++{
++      /* Free the control endpoint.  */
++      inet_ctl_sock_destroy(net->sctp.ctl_sock);
++}
++
++static struct pernet_operations sctp_ctrlsock_ops = {
++      .init = sctp_ctrlsock_init,
++      .exit = sctp_ctrlsock_exit,
+ };
+ 
+ /* Initialize the universe into something sensible.  */
+@@ -1442,8 +1453,11 @@ static __init int sctp_init(void)
+       sctp_v4_pf_init();
+       sctp_v6_pf_init();
+ 
+-      status = sctp_v4_protosw_init();
++      status = register_pernet_subsys(&sctp_defaults_ops);
++      if (status)
++              goto err_register_defaults;
+ 
++      status = sctp_v4_protosw_init();
+       if (status)
+               goto err_protosw_init;
+ 
+@@ -1451,9 +1465,9 @@ static __init int sctp_init(void)
+       if (status)
+               goto err_v6_protosw_init;
+ 
+-      status = register_pernet_subsys(&sctp_net_ops);
++      status = register_pernet_subsys(&sctp_ctrlsock_ops);
+       if (status)
+-              goto err_register_pernet_subsys;
++              goto err_register_ctrlsock;
+ 
+       status = sctp_v4_add_protocol();
+       if (status)
+@@ -1469,12 +1483,14 @@ out:
+ err_v6_add_protocol:
+       sctp_v4_del_protocol();
+ err_add_protocol:
+-      unregister_pernet_subsys(&sctp_net_ops);
+-err_register_pernet_subsys:
++      unregister_pernet_subsys(&sctp_ctrlsock_ops);
++err_register_ctrlsock:
+       sctp_v6_protosw_exit();
+ err_v6_protosw_init:
+       sctp_v4_protosw_exit();
+ err_protosw_init:
++      unregister_pernet_subsys(&sctp_defaults_ops);
++err_register_defaults:
+       sctp_v4_pf_exit();
+       sctp_v6_pf_exit();
+       sctp_sysctl_unregister();
+@@ -1507,12 +1523,14 @@ static __exit void sctp_exit(void)
+       sctp_v6_del_protocol();
+       sctp_v4_del_protocol();
+ 
+-      unregister_pernet_subsys(&sctp_net_ops);
++      unregister_pernet_subsys(&sctp_ctrlsock_ops);
+ 
+       /* Free protosw registrations */
+       sctp_v6_protosw_exit();
+       sctp_v4_protosw_exit();
+ 
++      unregister_pernet_subsys(&sctp_defaults_ops);
++
+       /* Unregister with socket layer. */
+       sctp_v6_pf_exit();
+       sctp_v4_pf_exit();
diff --git a/queue-4.1/series b/queue-4.1/series

new file mode 100644 (file)

index 0000000..a6810f9
--- /dev/null
+++ b/queue-4.1/series
@@ -0,0 +1,27 @@
+ip6_gre-release-cached-dst-on-tunnel-removal.patch
+vxlan-re-ignore-eaddrinuse-from-igmp_join.patch
+cls_u32-complete-the-check-for-non-forced-case-in-u32_destroy.patch
+usbnet-get-event_no_runtime_pm-bit-before-it-is-cleared.patch
+sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch
+ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch
+net-fec-clear-receive-interrupts-before-processing-a-packet.patch
+net-eth-altera-fix-napi-poll_list-corruption.patch
+net-ipv6-correct-pim6-mrt_lock-handling.patch
+net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch
+ipv6-fix-multipath-route-replace-error-recovery.patch
+net-dsa-bcm_sf2-fix-64-bits-register-writes.patch
+netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch
+sctp-fix-race-on-protocol-netns-initialization.patch
+bridge-fix-igmpv3-mldv2-report-parsing.patch
+net-mlx4_en-really-allow-to-change-rss-key.patch
+macvtap-fix-tunsetsndbuf-values-64k.patch
+openvswitch-zero-flows-on-allocation.patch
+tcp-add-proper-ts-val-into-rst-packets.patch
+net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch
+fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch
+net-dsa-bcm_sf2-do-not-override-speed-settings.patch
+net-phy-fixed_phy-handle-link-down-case.patch
+of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch
+mvneta-use-inband-status-only-when-explicitly-enabled.patch
+netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch
+netlink-replace-rhash_portid-with-bound.patch
diff --git a/queue-4.1/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch b/queue-4.1/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch

new file mode 100644 (file)

index 0000000..6b6f09c
--- /dev/null
+++ b/queue-4.1/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch
@@ -0,0 +1,45 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 2 Sep 2015 14:00:36 +0200
+Subject: sock, diag: fix panic in sock_diag_put_filterinfo
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit b382c08656000c12a146723a153b85b13a855b49 ]
+
+diag socket's sock_diag_put_filterinfo() dumps classic BPF programs
+upon request to user space (ss -0 -b). However, native eBPF programs
+attached to sockets (SO_ATTACH_BPF) cannot be dumped with this method:
+
+Their orig_prog is always NULL. However, sock_diag_put_filterinfo()
+unconditionally tries to access its filter length resp. wants to copy
+the filter insns from there. Internal cBPF to eBPF transformations
+attached to sockets don't have this issue, as orig_prog state is kept.
+
+It's currently only used by packet sockets. If we would want to add
+native eBPF support in the future, this needs to be done through
+a different attribute than PACKET_DIAG_FILTER to not confuse possible
+user space disassemblers that work on diag data.
+
+Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Acked-by: Alexei Starovoitov <ast@plumgrid.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_diag.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/core/sock_diag.c
++++ b/net/core/sock_diag.c
+@@ -86,6 +86,9 @@ int sock_diag_put_filterinfo(bool may_re
+               goto out;
+ 
+       fprog = filter->prog->orig_prog;
++      if (!fprog)
++              goto out;
++
+       flen = bpf_classic_proglen(fprog);
+ 
+       attr = nla_reserve(skb, attrtype, flen);
diff --git a/queue-4.1/tcp-add-proper-ts-val-into-rst-packets.patch b/queue-4.1/tcp-add-proper-ts-val-into-rst-packets.patch

new file mode 100644 (file)

index 0000000..bb60b7c
--- /dev/null
+++ b/queue-4.1/tcp-add-proper-ts-val-into-rst-packets.patch
@@ -0,0 +1,65 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Sep 2015 14:00:21 -0700
+Subject: tcp: add proper TS val into RST packets
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 675ee231d960af2af3606b4480324e26797eb010 ]
+
+RST packets sent on behalf of TCP connections with TS option (RFC 7323
+TCP timestamps) have incorrect TS val (set to 0), but correct TS ecr.
+
+A > B: Flags [S], seq 0, win 65535, options [mss 1000,nop,nop,TS val 100
+ecr 0], length 0
+B > A: Flags [S.], seq 2444755794, ack 1, win 28960, options [mss
+1460,nop,nop,TS val 7264344 ecr 100], length 0
+A > B: Flags [.], ack 1, win 65535, options [nop,nop,TS val 110 ecr
+7264344], length 0
+
+B > A: Flags [R.], seq 1, ack 1, win 28960, options [nop,nop,TS val 0
+ecr 110], length 0
+
+We need to call skb_mstamp_get() to get proper TS val,
+derived from skb->skb_mstamp
+
+Note that RFC 1323 was advocating to not send TS option in RST segment,
+but RFC 7323 recommends the opposite :
+
+  Once TSopt has been successfully negotiated, that is both <SYN> and
+  <SYN,ACK> contain TSopt, the TSopt MUST be sent in every non-<RST>
+  segment for the duration of the connection, and SHOULD be sent in an
+  <RST> segment (see Section 5.2 for details)
+
+Note this RFC recommends to send TS val = 0, but we believe it is
+premature : We do not know if all TCP stacks are properly
+handling the receive side :
+
+   When an <RST> segment is
+   received, it MUST NOT be subjected to the PAWS check by verifying an
+   acceptable value in SEG.TSval, and information from the Timestamps
+   option MUST NOT be used to update connection state information.
+   SEG.TSecr MAY be used to provide stricter <RST> acceptance checks.
+
+In 5 years, if/when all TCP stack are RFC 7323 ready, we might consider
+to decide to send TS val = 0, if it buys something.
+
+Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2893,6 +2893,7 @@ void tcp_send_active_reset(struct sock *
+       skb_reserve(skb, MAX_TCP_HEADER);
+       tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
+                            TCPHDR_ACK | TCPHDR_RST);
++      skb_mstamp_get(&skb->skb_mstamp);
+       /* Send it off. */
+       if (tcp_transmit_skb(sk, skb, 0, priority))
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
diff --git a/queue-4.1/usbnet-get-event_no_runtime_pm-bit-before-it-is-cleared.patch b/queue-4.1/usbnet-get-event_no_runtime_pm-bit-before-it-is-cleared.patch

new file mode 100644 (file)

index 0000000..9eb5b66
--- /dev/null
+++ b/queue-4.1/usbnet-get-event_no_runtime_pm-bit-before-it-is-cleared.patch
@@ -0,0 +1,54 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Eugene Shatokhin <eugene.shatokhin@rosalab.ru>
+Date: Mon, 24 Aug 2015 23:13:42 +0300
+Subject: usbnet: Get EVENT_NO_RUNTIME_PM bit before it is cleared
+
+From: Eugene Shatokhin <eugene.shatokhin@rosalab.ru>
+
+[ Upstream commit f50791ac1aca1ac1b0370d62397b43e9f831421a ]
+
+It is needed to check EVENT_NO_RUNTIME_PM bit of dev->flags in
+usbnet_stop(), but its value should be read before it is cleared
+when dev->flags is set to 0.
+
+The problem was spotted and the fix was provided by
+Oliver Neukum <oneukum@suse.de>.
+
+Signed-off-by: Eugene Shatokhin <eugene.shatokhin@rosalab.ru>
+Acked-by: Oliver Neukum <oneukum@suse.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/usbnet.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/usb/usbnet.c
++++ b/drivers/net/usb/usbnet.c
+@@ -778,7 +778,7 @@ int usbnet_stop (struct net_device *net)
+ {
+       struct usbnet           *dev = netdev_priv(net);
+       struct driver_info      *info = dev->driver_info;
+-      int                     retval, pm;
++      int                     retval, pm, mpn;
+ 
+       clear_bit(EVENT_DEV_OPEN, &dev->flags);
+       netif_stop_queue (net);
+@@ -809,6 +809,8 @@ int usbnet_stop (struct net_device *net)
+ 
+       usbnet_purge_paused_rxq(dev);
+ 
++      mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags);
++
+       /* deferred work (task, timer, softirq) must also stop.
+        * can't flush_scheduled_work() until we drop rtnl (later),
+        * else workers could deadlock; so make workers a NOP.
+@@ -819,8 +821,7 @@ int usbnet_stop (struct net_device *net)
+       if (!pm)
+               usb_autopm_put_interface(dev->intf);
+ 
+-      if (info->manage_power &&
+-          !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags))
++      if (info->manage_power && mpn)
+               info->manage_power(dev, 0);
+       else
+               usb_autopm_put_interface(dev->intf);
diff --git a/queue-4.1/vxlan-re-ignore-eaddrinuse-from-igmp_join.patch b/queue-4.1/vxlan-re-ignore-eaddrinuse-from-igmp_join.patch

new file mode 100644 (file)

index 0000000..df0f838
--- /dev/null
+++ b/queue-4.1/vxlan-re-ignore-eaddrinuse-from-igmp_join.patch
@@ -0,0 +1,38 @@
+From foo@baz Wed Sep 30 05:18:31 CEST 2015
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Date: Tue, 25 Aug 2015 20:22:35 -0300
+Subject: vxlan: re-ignore EADDRINUSE from igmp_join
+
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+
+[ Upstream commit bef0057b7ba881d5ae67eec876df7a26fe672a59 ]
+
+Before 56ef9c909b40[1] it used to ignore all errors from igmp_join().
+That commit enhanced that and made it error out whatever error happened
+with igmp_join(), but that's not good because when using multicast
+groups vxlan will try to join it multiple times if the socket is reused
+and then the 2nd and further attempts will fail with EADDRINUSE.
+
+As we don't track to which groups the socket is already subscribed, it's
+okay to just ignore that error.
+
+Fixes: 56ef9c909b40 ("vxlan: Move socket initialization to within rtnl scope")
+Reported-by: John Nielsen <lists@jnielsen.net>
+Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2212,6 +2212,8 @@ static int vxlan_open(struct net_device
+ 
+       if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
+               ret = vxlan_igmp_join(vxlan);
++              if (ret == -EADDRINUSE)
++                      ret = 0;
+               if (ret) {
+                       vxlan_sock_release(vs);
+                       return ret;
diff --git a/queue-4.2/series b/queue-4.2/series

new file mode 100644 (file)

index 0000000..cfd45ee
--- /dev/null
+++ b/queue-4.2/series
@@ -0,0 +1,30 @@
+phylib-fix-device-deletion-order-in-mdiobus_unregister.patch
+sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch
+ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch
+net-fec-clear-receive-interrupts-before-processing-a-packet.patch
+net-eth-altera-fix-napi-poll_list-corruption.patch
+net-ipv6-correct-pim6-mrt_lock-handling.patch
+net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch
+ipv6-fix-multipath-route-replace-error-recovery.patch
+net-dsa-bcm_sf2-fix-64-bits-register-writes.patch
+netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch
+sctp-fix-race-on-protocol-netns-initialization.patch
+bridge-fix-igmpv3-mldv2-report-parsing.patch
+net-mvneta-fix-dma-buffer-unmapping-in-mvneta_rx.patch
+rtnetlink-catch-eopnotsupp-errors-from-ndo_bridge_getlink.patch
+net-mlx4_en-really-allow-to-change-rss-key.patch
+macvtap-fix-tunsetsndbuf-values-64k.patch
+netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch
+netlink-replace-rhash_portid-with-bound.patch
+net-dsa-actually-force-the-speed-on-the-cpu-port.patch
+openvswitch-zero-flows-on-allocation.patch
+tcp-add-proper-ts-val-into-rst-packets.patch
+fix-af_packet-abi-breakage-in-4.2.patch
+net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch
+fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch
+ppp-fix-lockdep-splat-in-ppp_dev_uninit.patch
+net-dsa-bcm_sf2-do-not-override-speed-settings.patch
+net-phy-fixed_phy-handle-link-down-case.patch
+of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch
+mvneta-use-inband-status-only-when-explicitly-enabled.patch
+net-mlx4_core-capping-number-of-requested-msixs-to-max_msix.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 30 Sep 2015 03:31:28 +0000 (05:31 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 30 Sep 2015 03:31:28 +0000 (05:31 +0200)
queue-4.1/bridge-fix-igmpv3-mldv2-report-parsing.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/cls_u32-complete-the-check-for-non-forced-case-in-u32_destroy.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ip6_gre-release-cached-dst-on-tunnel-removal.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/ipv6-fix-multipath-route-replace-error-recovery.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/macvtap-fix-tunsetsndbuf-values-64k.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/mvneta-use-inband-status-only-when-explicitly-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-dsa-bcm_sf2-do-not-override-speed-settings.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-eth-altera-fix-napi-poll_list-corruption.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-fec-clear-receive-interrupts-before-processing-a-packet.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-ipv6-correct-pim6-mrt_lock-handling.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-mlx4_en-really-allow-to-change-rss-key.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-phy-fixed_phy-handle-link-down-case.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/netlink-replace-rhash_portid-with-bound.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/openvswitch-zero-flows-on-allocation.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/sctp-fix-race-on-protocol-netns-initialization.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/series	[new file with mode: 0644]	patch \| blob
queue-4.1/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/tcp-add-proper-ts-val-into-rst-packets.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/usbnet-get-event_no_runtime_pm-bit-before-it-is-cleared.patch	[new file with mode: 0644]	patch \| blob
queue-4.1/vxlan-re-ignore-eaddrinuse-from-igmp_join.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/series	[new file with mode: 0644]	patch \| blob