4.2-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 30 Sep 2015 03:31:55 +0000 (05:31 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 30 Sep 2015 03:31:55 +0000 (05:31 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Sep 2015 03:31:55 +0000 (05:31 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Sep 2015 03:31:55 +0000 (05:31 +0200)
diff --git a/queue-4.2/bridge-fix-igmpv3-mldv2-report-parsing.patch b/queue-4.2/bridge-fix-igmpv3-mldv2-report-parsing.patch

new file mode 100644 (file)

index 0000000..c651768
--- /dev/null
+++ b/queue-4.2/bridge-fix-igmpv3-mldv2-report-parsing.patch
@@ -0,0 +1,53 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
+Date: Fri, 11 Sep 2015 18:39:48 +0200
+Subject: bridge: fix igmpv3 / mldv2 report parsing
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
+
+[ Upstream commit c2d4fbd2163e607915cc05798ce7fb7f31117cc1 ]
+
+With the newly introduced helper functions the skb pulling is hidden in
+the checksumming function - and undone before returning to the caller.
+
+The IGMPv3 and MLDv2 report parsing functions in the bridge still
+assumed that the skb is pointing to the beginning of the IGMP/MLD
+message while it is now kept at the beginning of the IPv4/6 header,
+breaking the message parsing and creating packet loss.
+
+Fixing this by taking the offset between IP and IGMP/MLD header into
+account, too.
+
+Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code")
+Reported-by: Tobias Powalowski <tobias.powalowski@googlemail.com>
+Tested-by: Tobias Powalowski <tobias.powalowski@googlemail.com>
+Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_multicast.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -991,7 +991,7 @@ static int br_ip4_multicast_igmp3_report
+ 
+       ih = igmpv3_report_hdr(skb);
+       num = ntohs(ih->ngrec);
+-      len = sizeof(*ih);
++      len = skb_transport_offset(skb) + sizeof(*ih);
+ 
+       for (i = 0; i < num; i++) {
+               len += sizeof(*grec);
+@@ -1052,7 +1052,7 @@ static int br_ip6_multicast_mld2_report(
+ 
+       icmp6h = icmp6_hdr(skb);
+       num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
+-      len = sizeof(*icmp6h);
++      len = skb_transport_offset(skb) + sizeof(*icmp6h);
+ 
+       for (i = 0; i < num; i++) {
+               __be16 *nsrcs, _nsrcs;
diff --git a/queue-4.2/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch b/queue-4.2/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch

new file mode 100644 (file)

index 0000000..3eba8da
--- /dev/null
+++ b/queue-4.2/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch
@@ -0,0 +1,72 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Wilson Kok <wkok@cumulusnetworks.com>
+Date: Tue, 22 Sep 2015 21:40:22 -0700
+Subject: fib_rules: fix fib rule dumps across multiple skbs
+
+From: Wilson Kok <wkok@cumulusnetworks.com>
+
+[ Upstream commit 41fc014332d91ee90c32840bf161f9685b7fbf2b ]
+
+dump_rules returns skb length and not error.
+But when family == AF_UNSPEC, the caller of dump_rules
+assumes that it returns an error. Hence, when family == AF_UNSPEC,
+we continue trying to dump on -EMSGSIZE errors resulting in
+incorrect dump idx carried between skbs belonging to the same dump.
+This results in fib rule dump always only dumping rules that fit
+into the first skb.
+
+This patch fixes dump_rules to return error so that we exit correctly
+and idx is correctly maintained between skbs that are part of the
+same dump.
+
+Signed-off-by: Wilson Kok <wkok@cumulusnetworks.com>
+Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/fib_rules.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/net/core/fib_rules.c
++++ b/net/core/fib_rules.c
+@@ -615,15 +615,17 @@ static int dump_rules(struct sk_buff *sk
+ {
+       int idx = 0;
+       struct fib_rule *rule;
++      int err = 0;
+ 
+       rcu_read_lock();
+       list_for_each_entry_rcu(rule, &ops->rules_list, list) {
+               if (idx < cb->args[1])
+                       goto skip;
+ 
+-              if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
+-                                   cb->nlh->nlmsg_seq, RTM_NEWRULE,
+-                                   NLM_F_MULTI, ops) < 0)
++              err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
++                                     cb->nlh->nlmsg_seq, RTM_NEWRULE,
++                                     NLM_F_MULTI, ops);
++              if (err)
+                       break;
+ skip:
+               idx++;
+@@ -632,7 +634,7 @@ skip:
+       cb->args[1] = idx;
+       rules_ops_put(ops);
+ 
+-      return skb->len;
++      return err;
+ }
+ 
+ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
+@@ -648,7 +650,9 @@ static int fib_nl_dumprule(struct sk_buf
+               if (ops == NULL)
+                       return -EAFNOSUPPORT;
+ 
+-              return dump_rules(skb, cb, ops);
++              dump_rules(skb, cb, ops);
++
++              return skb->len;
+       }
+ 
+       rcu_read_lock();
diff --git a/queue-4.2/fix-af_packet-abi-breakage-in-4.2.patch b/queue-4.2/fix-af_packet-abi-breakage-in-4.2.patch

new file mode 100644 (file)

index 0000000..c267675
--- /dev/null
+++ b/queue-4.2/fix-af_packet-abi-breakage-in-4.2.patch
@@ -0,0 +1,114 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: David Woodhouse <dwmw2@infradead.org>
+Date: Wed, 23 Sep 2015 19:45:08 +0100
+Subject: Fix AF_PACKET ABI breakage in 4.2
+
+From: David Woodhouse <dwmw2@infradead.org>
+
+[ Upstream commit d3869efe7a8a2298516d9af4f91487cf486ca945 ]
+
+Commit 7d82410950aa ("virtio: add explicit big-endian support to memory
+accessors") accidentally changed the virtio_net header used by
+AF_PACKET with PACKET_VNET_HDR from host-endian to big-endian.
+
+Since virtio_legacy_is_little_endian() is a very long identifier,
+define a vio_le macro and use that throughout the code instead of the
+hard-coded 'false' for little-endian.
+
+This restores the ABI to match 4.1 and earlier kernels, and makes my
+test program work again.
+
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   32 +++++++++++++++++---------------
+ 1 file changed, 17 insertions(+), 15 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -229,6 +229,8 @@ struct packet_skb_cb {
+       } sa;
+ };
+ 
++#define vio_le() virtio_legacy_is_little_endian()
++
+ #define PACKET_SKB_CB(__skb)  ((struct packet_skb_cb *)((__skb)->cb))
+ 
+ #define GET_PBDQC_FROM_RB(x)  ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
+@@ -2561,15 +2563,15 @@ static int packet_snd(struct socket *soc
+                       goto out_unlock;
+ 
+               if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
+-                  (__virtio16_to_cpu(false, vnet_hdr.csum_start) +
+-                   __virtio16_to_cpu(false, vnet_hdr.csum_offset) + 2 >
+-                    __virtio16_to_cpu(false, vnet_hdr.hdr_len)))
+-                      vnet_hdr.hdr_len = __cpu_to_virtio16(false,
+-                               __virtio16_to_cpu(false, vnet_hdr.csum_start) +
+-                              __virtio16_to_cpu(false, vnet_hdr.csum_offset) + 2);
++                  (__virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
++                   __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2 >
++                    __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len)))
++                      vnet_hdr.hdr_len = __cpu_to_virtio16(vio_le(),
++                               __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
++                              __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2);
+ 
+               err = -EINVAL;
+-              if (__virtio16_to_cpu(false, vnet_hdr.hdr_len) > len)
++              if (__virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len) > len)
+                       goto out_unlock;
+ 
+               if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+@@ -2612,7 +2614,7 @@ static int packet_snd(struct socket *soc
+       hlen = LL_RESERVED_SPACE(dev);
+       tlen = dev->needed_tailroom;
+       skb = packet_alloc_skb(sk, hlen + tlen, hlen, len,
+-                             __virtio16_to_cpu(false, vnet_hdr.hdr_len),
++                             __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len),
+                              msg->msg_flags & MSG_DONTWAIT, &err);
+       if (skb == NULL)
+               goto out_unlock;
+@@ -2659,8 +2661,8 @@ static int packet_snd(struct socket *soc
+ 
+       if (po->has_vnet_hdr) {
+               if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+-                      u16 s = __virtio16_to_cpu(false, vnet_hdr.csum_start);
+-                      u16 o = __virtio16_to_cpu(false, vnet_hdr.csum_offset);
++                      u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start);
++                      u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset);
+                       if (!skb_partial_csum_set(skb, s, o)) {
+                               err = -EINVAL;
+                               goto out_free;
+@@ -2668,7 +2670,7 @@ static int packet_snd(struct socket *soc
+               }
+ 
+               skb_shinfo(skb)->gso_size =
+-                      __virtio16_to_cpu(false, vnet_hdr.gso_size);
++                      __virtio16_to_cpu(vio_le(), vnet_hdr.gso_size);
+               skb_shinfo(skb)->gso_type = gso_type;
+ 
+               /* Header must be checked, and gso_segs computed. */
+@@ -3042,9 +3044,9 @@ static int packet_recvmsg(struct socket
+ 
+                       /* This is a hint as to how much should be linear. */
+                       vnet_hdr.hdr_len =
+-                              __cpu_to_virtio16(false, skb_headlen(skb));
++                              __cpu_to_virtio16(vio_le(), skb_headlen(skb));
+                       vnet_hdr.gso_size =
+-                              __cpu_to_virtio16(false, sinfo->gso_size);
++                              __cpu_to_virtio16(vio_le(), sinfo->gso_size);
+                       if (sinfo->gso_type & SKB_GSO_TCPV4)
+                               vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+                       else if (sinfo->gso_type & SKB_GSO_TCPV6)
+@@ -3062,9 +3064,9 @@ static int packet_recvmsg(struct socket
+ 
+               if (skb->ip_summed == CHECKSUM_PARTIAL) {
+                       vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+-                      vnet_hdr.csum_start = __cpu_to_virtio16(false,
++                      vnet_hdr.csum_start = __cpu_to_virtio16(vio_le(),
+                                         skb_checksum_start_offset(skb));
+-                      vnet_hdr.csum_offset = __cpu_to_virtio16(false,
++                      vnet_hdr.csum_offset = __cpu_to_virtio16(vio_le(),
+                                                        skb->csum_offset);
+               } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+                       vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
diff --git a/queue-4.2/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch b/queue-4.2/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch

new file mode 100644 (file)

index 0000000..cf690f5
--- /dev/null
+++ b/queue-4.2/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch
@@ -0,0 +1,33 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 3 Sep 2015 00:29:07 +0200
+Subject: ipv6: fix exthdrs offload registration in out_rt path
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit e41b0bedba0293b9e1e8d1e8ed553104b9693656 ]
+
+We previously register IPPROTO_ROUTING offload under inet6_add_offload(),
+but in error path, we try to unregister it with inet_del_offload(). This
+doesn't seem correct, it should actually be inet6_del_offload(), also
+ipv6_exthdrs_offload_exit() from that commit seems rather incorrect (it
+also uses rthdr_offload twice), but it got removed entirely later on.
+
+Fixes: 3336288a9fea ("ipv6: Switch to using new offload infrastructure.")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/exthdrs_offload.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/exthdrs_offload.c
++++ b/net/ipv6/exthdrs_offload.c
+@@ -36,6 +36,6 @@ out:
+       return ret;
+ 
+ out_rt:
+-      inet_del_offload(&rthdr_offload, IPPROTO_ROUTING);
++      inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING);
+       goto out;
+ }
diff --git a/queue-4.2/ipv6-fix-multipath-route-replace-error-recovery.patch b/queue-4.2/ipv6-fix-multipath-route-replace-error-recovery.patch

new file mode 100644 (file)

index 0000000..3066a1d
--- /dev/null
+++ b/queue-4.2/ipv6-fix-multipath-route-replace-error-recovery.patch
@@ -0,0 +1,352 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Roopa Prabhu <roopa@cumulusnetworks.com>
+Date: Tue, 8 Sep 2015 10:53:04 -0700
+Subject: ipv6: fix multipath route replace error recovery
+
+From: Roopa Prabhu <roopa@cumulusnetworks.com>
+
+[ Upstream commit 6b9ea5a64ed5eeb3f68f2e6fcce0ed1179801d1e ]
+
+Problem:
+The ecmp route replace support for ipv6 in the kernel, deletes the
+existing ecmp route too early, ie when it installs the first nexthop.
+If there is an error in installing the subsequent nexthops, its too late
+to recover the already deleted existing route leaving the fib
+in an inconsistent state.
+
+This patch reduces the possibility of this by doing the following:
+a) Changes the existing multipath route add code to a two stage process:
+  build rt6_infos + insert them
+       ip6_route_add rt6_info creation code is moved into
+       ip6_route_info_create.
+b) This ensures that most errors are caught during building rt6_infos
+  and we fail early
+c) Separates multipath add and del code. Because add needs the special
+  two stage mode in a) and delete essentially does not care.
+d) In any event if the code fails during inserting a route again, a
+  warning is printed (This should be unlikely)
+
+Before the patch:
+$ip -6 route show
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024
+
+/* Try replacing the route with a duplicate nexthop */
+$ip -6 route change 3000:1000:1000:1000::2/128 nexthop via
+fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev
+swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1
+RTNETLINK answers: File exists
+
+$ip -6 route show
+/* previously added ecmp route 3000:1000:1000:1000::2 dissappears from
+ * kernel */
+
+After the patch:
+$ip -6 route show
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024
+
+/* Try replacing the route with a duplicate nexthop */
+$ip -6 route change 3000:1000:1000:1000::2/128 nexthop via
+fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev
+swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1
+RTNETLINK answers: File exists
+
+$ip -6 route show
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
+3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024
+
+Fixes: 27596472473a ("ipv6: fix ECMP route replacement")
+Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
+Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |  201 +++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 175 insertions(+), 26 deletions(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1727,7 +1727,7 @@ static int ip6_convert_metrics(struct mx
+       return -EINVAL;
+ }
+ 
+-int ip6_route_add(struct fib6_config *cfg)
++int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
+ {
+       int err;
+       struct net *net = cfg->fc_nlinfo.nl_net;
+@@ -1735,7 +1735,6 @@ int ip6_route_add(struct fib6_config *cf
+       struct net_device *dev = NULL;
+       struct inet6_dev *idev = NULL;
+       struct fib6_table *table;
+-      struct mx6_config mxc = { .mx = NULL, };
+       int addr_type;
+ 
+       if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
+@@ -1941,6 +1940,32 @@ install_route:
+ 
+       cfg->fc_nlinfo.nl_net = dev_net(dev);
+ 
++      *rt_ret = rt;
++
++      return 0;
++out:
++      if (dev)
++              dev_put(dev);
++      if (idev)
++              in6_dev_put(idev);
++      if (rt)
++              dst_free(&rt->dst);
++
++      *rt_ret = NULL;
++
++      return err;
++}
++
++int ip6_route_add(struct fib6_config *cfg)
++{
++      struct mx6_config mxc = { .mx = NULL, };
++      struct rt6_info *rt = NULL;
++      int err;
++
++      err = ip6_route_info_create(cfg, &rt);
++      if (err)
++              goto out;
++
+       err = ip6_convert_metrics(&mxc, cfg);
+       if (err)
+               goto out;
+@@ -1948,14 +1973,12 @@ install_route:
+       err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
+ 
+       kfree(mxc.mx);
++
+       return err;
+ out:
+-      if (dev)
+-              dev_put(dev);
+-      if (idev)
+-              in6_dev_put(idev);
+       if (rt)
+               dst_free(&rt->dst);
++
+       return err;
+ }
+ 
+@@ -2727,19 +2750,78 @@ errout:
+       return err;
+ }
+ 
+-static int ip6_route_multipath(struct fib6_config *cfg, int add)
++struct rt6_nh {
++      struct rt6_info *rt6_info;
++      struct fib6_config r_cfg;
++      struct mx6_config mxc;
++      struct list_head next;
++};
++
++static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
++{
++      struct rt6_nh *nh;
++
++      list_for_each_entry(nh, rt6_nh_list, next) {
++              pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
++                      &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
++                      nh->r_cfg.fc_ifindex);
++      }
++}
++
++static int ip6_route_info_append(struct list_head *rt6_nh_list,
++                               struct rt6_info *rt, struct fib6_config *r_cfg)
++{
++      struct rt6_nh *nh;
++      struct rt6_info *rtnh;
++      int err = -EEXIST;
++
++      list_for_each_entry(nh, rt6_nh_list, next) {
++              /* check if rt6_info already exists */
++              rtnh = nh->rt6_info;
++
++              if (rtnh->dst.dev == rt->dst.dev &&
++                  rtnh->rt6i_idev == rt->rt6i_idev &&
++                  ipv6_addr_equal(&rtnh->rt6i_gateway,
++                                  &rt->rt6i_gateway))
++                      return err;
++      }
++
++      nh = kzalloc(sizeof(*nh), GFP_KERNEL);
++      if (!nh)
++              return -ENOMEM;
++      nh->rt6_info = rt;
++      err = ip6_convert_metrics(&nh->mxc, r_cfg);
++      if (err) {
++              kfree(nh);
++              return err;
++      }
++      memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
++      list_add_tail(&nh->next, rt6_nh_list);
++
++      return 0;
++}
++
++static int ip6_route_multipath_add(struct fib6_config *cfg)
+ {
+       struct fib6_config r_cfg;
+       struct rtnexthop *rtnh;
++      struct rt6_info *rt;
++      struct rt6_nh *err_nh;
++      struct rt6_nh *nh, *nh_safe;
+       int remaining;
+       int attrlen;
+-      int err = 0, last_err = 0;
++      int err = 1;
++      int nhn = 0;
++      int replace = (cfg->fc_nlinfo.nlh &&
++                     (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
++      LIST_HEAD(rt6_nh_list);
+ 
+       remaining = cfg->fc_mp_len;
+-beginning:
+       rtnh = (struct rtnexthop *)cfg->fc_mp;
+ 
+-      /* Parse a Multipath Entry */
++      /* Parse a Multipath Entry and build a list (rt6_nh_list) of
++       * rt6_info structs per nexthop
++       */
+       while (rtnh_ok(rtnh, remaining)) {
+               memcpy(&r_cfg, cfg, sizeof(*cfg));
+               if (rtnh->rtnh_ifindex)
+@@ -2755,22 +2837,32 @@ beginning:
+                               r_cfg.fc_flags |= RTF_GATEWAY;
+                       }
+               }
+-              err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
++
++              err = ip6_route_info_create(&r_cfg, &rt);
++              if (err)
++                      goto cleanup;
++
++              err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
+               if (err) {
+-                      last_err = err;
+-                      /* If we are trying to remove a route, do not stop the
+-                       * loop when ip6_route_del() fails (because next hop is
+-                       * already gone), we should try to remove all next hops.
+-                       */
+-                      if (add) {
+-                              /* If add fails, we should try to delete all
+-                               * next hops that have been already added.
+-                               */
+-                              add = 0;
+-                              remaining = cfg->fc_mp_len - remaining;
+-                              goto beginning;
+-                      }
++                      dst_free(&rt->dst);
++                      goto cleanup;
++              }
++
++              rtnh = rtnh_next(rtnh, &remaining);
++      }
++
++      err_nh = NULL;
++      list_for_each_entry(nh, &rt6_nh_list, next) {
++              err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
++              /* nh->rt6_info is used or freed at this point, reset to NULL*/
++              nh->rt6_info = NULL;
++              if (err) {
++                      if (replace && nhn)
++                              ip6_print_replace_route_err(&rt6_nh_list);
++                      err_nh = nh;
++                      goto add_errout;
+               }
++
+               /* Because each route is added like a single route we remove
+                * these flags after the first nexthop: if there is a collision,
+                * we have already failed to add the first nexthop:
+@@ -2780,6 +2872,63 @@ beginning:
+                */
+               cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
+                                                    NLM_F_REPLACE);
++              nhn++;
++      }
++
++      goto cleanup;
++
++add_errout:
++      /* Delete routes that were already added */
++      list_for_each_entry(nh, &rt6_nh_list, next) {
++              if (err_nh == nh)
++                      break;
++              ip6_route_del(&nh->r_cfg);
++      }
++
++cleanup:
++      list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
++              if (nh->rt6_info)
++                      dst_free(&nh->rt6_info->dst);
++              if (nh->mxc.mx)
++                      kfree(nh->mxc.mx);
++              list_del(&nh->next);
++              kfree(nh);
++      }
++
++      return err;
++}
++
++static int ip6_route_multipath_del(struct fib6_config *cfg)
++{
++      struct fib6_config r_cfg;
++      struct rtnexthop *rtnh;
++      int remaining;
++      int attrlen;
++      int err = 1, last_err = 0;
++
++      remaining = cfg->fc_mp_len;
++      rtnh = (struct rtnexthop *)cfg->fc_mp;
++
++      /* Parse a Multipath Entry */
++      while (rtnh_ok(rtnh, remaining)) {
++              memcpy(&r_cfg, cfg, sizeof(*cfg));
++              if (rtnh->rtnh_ifindex)
++                      r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
++
++              attrlen = rtnh_attrlen(rtnh);
++              if (attrlen > 0) {
++                      struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
++
++                      nla = nla_find(attrs, attrlen, RTA_GATEWAY);
++                      if (nla) {
++                              nla_memcpy(&r_cfg.fc_gateway, nla, 16);
++                              r_cfg.fc_flags |= RTF_GATEWAY;
++                      }
++              }
++              err = ip6_route_del(&r_cfg);
++              if (err)
++                      last_err = err;
++
+               rtnh = rtnh_next(rtnh, &remaining);
+       }
+ 
+@@ -2796,7 +2945,7 @@ static int inet6_rtm_delroute(struct sk_
+               return err;
+ 
+       if (cfg.fc_mp)
+-              return ip6_route_multipath(&cfg, 0);
++              return ip6_route_multipath_del(&cfg);
+       else
+               return ip6_route_del(&cfg);
+ }
+@@ -2811,7 +2960,7 @@ static int inet6_rtm_newroute(struct sk_
+               return err;
+ 
+       if (cfg.fc_mp)
+-              return ip6_route_multipath(&cfg, 1);
++              return ip6_route_multipath_add(&cfg);
+       else
+               return ip6_route_add(&cfg);
+ }
diff --git a/queue-4.2/macvtap-fix-tunsetsndbuf-values-64k.patch b/queue-4.2/macvtap-fix-tunsetsndbuf-values-64k.patch

new file mode 100644 (file)

index 0000000..d2cee3b
--- /dev/null
+++ b/queue-4.2/macvtap-fix-tunsetsndbuf-values-64k.patch
@@ -0,0 +1,47 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Fri, 18 Sep 2015 13:41:09 +0300
+Subject: macvtap: fix TUNSETSNDBUF values > 64k
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+[ Upstream commit 3ea79249e81e5ed051f2e6480cbde896d99046e8 ]
+
+Upon TUNSETSNDBUF,  macvtap reads the requested sndbuf size into
+a local variable u.
+commit 39ec7de7092b ("macvtap: fix uninitialized access on
+TUNSETIFF") changed its type to u16 (which is the right thing to
+do for all other macvtap ioctls), breaking all values > 64k.
+
+The value of TUNSETSNDBUF is actually a signed 32 bit integer, so
+the right thing to do is to read it into an int.
+
+Cc: David S. Miller <davem@davemloft.net>
+Fixes: 39ec7de7092b ("macvtap: fix uninitialized access on TUNSETIFF")
+Reported-by: Mark A. Peloquin
+Bisected-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Tested-by:  Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvtap.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -1111,10 +1111,10 @@ static long macvtap_ioctl(struct file *f
+               return 0;
+ 
+       case TUNSETSNDBUF:
+-              if (get_user(u, up))
++              if (get_user(s, sp))
+                       return -EFAULT;
+ 
+-              q->sk.sk_sndbuf = u;
++              q->sk.sk_sndbuf = s;
+               return 0;
+ 
+       case TUNGETVNETHDRSZ:
diff --git a/queue-4.2/mvneta-use-inband-status-only-when-explicitly-enabled.patch b/queue-4.2/mvneta-use-inband-status-only-when-explicitly-enabled.patch

new file mode 100644 (file)

index 0000000..48e61d1
--- /dev/null
+++ b/queue-4.2/mvneta-use-inband-status-only-when-explicitly-enabled.patch
@@ -0,0 +1,66 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Stas Sergeev <stsp@list.ru>
+Date: Mon, 20 Jul 2015 17:49:58 -0700
+Subject: mvneta: use inband status only when explicitly enabled
+
+From: Stas Sergeev <stsp@list.ru>
+
+[ Upstream commit f8af8e6eb95093d5ce5ebcc52bd1929b0433e172 in net-next tree,
+  will be pushed to Linus very soon. ]
+
+The commit 898b2970e2c9 ("mvneta: implement SGMII-based in-band link state
+signaling") implemented the link parameters auto-negotiation unconditionally.
+Unfortunately it appears that some HW that implements SGMII protocol,
+doesn't generate the inband status, so it is not possible to auto-negotiate
+anything with such HW.
+
+This patch enables the auto-negotiation only if explicitly requested with
+the 'managed' DT property.
+
+This patch fixes the following regression:
+https://lkml.org/lkml/2015/7/8/865
+
+Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
+
+CC: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+CC: netdev@vger.kernel.org
+CC: linux-kernel@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -3029,8 +3029,8 @@ static int mvneta_probe(struct platform_
+       const char *dt_mac_addr;
+       char hw_mac_addr[ETH_ALEN];
+       const char *mac_from;
++      const char *managed;
+       int phy_mode;
+-      int fixed_phy = 0;
+       int err;
+ 
+       /* Our multiqueue support is not complete, so for now, only
+@@ -3064,7 +3064,6 @@ static int mvneta_probe(struct platform_
+                       dev_err(&pdev->dev, "cannot register fixed PHY\n");
+                       goto err_free_irq;
+               }
+-              fixed_phy = 1;
+ 
+               /* In the case of a fixed PHY, the DT node associated
+                * to the PHY is the Ethernet MAC DT node.
+@@ -3088,8 +3087,10 @@ static int mvneta_probe(struct platform_
+       pp = netdev_priv(dev);
+       pp->phy_node = phy_node;
+       pp->phy_interface = phy_mode;
+-      pp->use_inband_status = (phy_mode == PHY_INTERFACE_MODE_SGMII) &&
+-                              fixed_phy;
++
++      err = of_property_read_string(dn, "managed", &managed);
++      pp->use_inband_status = (err == 0 &&
++                               strcmp(managed, "in-band-status") == 0);
+ 
+       pp->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(pp->clk)) {
diff --git a/queue-4.2/net-dsa-actually-force-the-speed-on-the-cpu-port.patch b/queue-4.2/net-dsa-actually-force-the-speed-on-the-cpu-port.patch

new file mode 100644 (file)

index 0000000..1113327
--- /dev/null
+++ b/queue-4.2/net-dsa-actually-force-the-speed-on-the-cpu-port.patch
@@ -0,0 +1,64 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Russell King <rmk+kernel@arm.linux.org.uk>
+Date: Mon, 21 Sep 2015 21:42:59 +0100
+Subject: net: dsa: actually force the speed on the CPU port
+
+From: Russell King <rmk+kernel@arm.linux.org.uk>
+
+[ Upstream commit 53adc9e83028d9e35b6408231ebaf62a94a16e4d ]
+
+Commit 54d792f257c6 ("net: dsa: Centralise global and port setup
+code into mv88e6xxx.") merged in the 4.2 merge window broke the link
+speed forcing for the CPU port of Marvell DSA switches.  The original
+code was:
+
+        /* MAC Forcing register: don't force link, speed, duplex
+         * or flow control state to any particular values on physical
+         * ports, but force the CPU port and all DSA ports to 1000 Mb/s
+         * full duplex.
+         */
+        if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
+                REG_WRITE(addr, 0x01, 0x003e);
+        else
+                REG_WRITE(addr, 0x01, 0x0003);
+
+but the new code does a read-modify-write:
+
+                reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL);
+                if (dsa_is_cpu_port(ds, port) ||
+                    ds->dsa_port_mask & (1 << port)) {
+                        reg |= PORT_PCS_CTRL_FORCE_LINK |
+                                PORT_PCS_CTRL_LINK_UP |
+                                PORT_PCS_CTRL_DUPLEX_FULL |
+                                PORT_PCS_CTRL_FORCE_DUPLEX;
+                        if (mv88e6xxx_6065_family(ds))
+                                reg |= PORT_PCS_CTRL_100;
+                        else
+                                reg |= PORT_PCS_CTRL_1000;
+
+The link speed in the PCS control register is a two bit field.  Forcing
+the link speed in this way doesn't ensure that the bit field is set to
+the correct value - on the hardware I have here, the speed bitfield
+remains set to 0x03, resulting in the speed not being forced to gigabit.
+
+We must clear both bits before forcing the link speed.
+
+Fixes: 54d792f257c6 ("net: dsa: Centralise global and port setup code into mv88e6xxx.")
+Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
+Acked-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/mv88e6xxx.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/dsa/mv88e6xxx.c
++++ b/drivers/net/dsa/mv88e6xxx.c
+@@ -1387,6 +1387,7 @@ static int mv88e6xxx_setup_port(struct d
+               reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL);
+               if (dsa_is_cpu_port(ds, port) ||
+                   ds->dsa_port_mask & (1 << port)) {
++                      reg &= ~PORT_PCS_CTRL_UNFORCED;
+                       reg |= PORT_PCS_CTRL_FORCE_LINK |
+                               PORT_PCS_CTRL_LINK_UP |
+                               PORT_PCS_CTRL_DUPLEX_FULL |
diff --git a/queue-4.2/net-dsa-bcm_sf2-do-not-override-speed-settings.patch b/queue-4.2/net-dsa-bcm_sf2-do-not-override-speed-settings.patch

new file mode 100644 (file)

index 0000000..ec475b7
--- /dev/null
+++ b/queue-4.2/net-dsa-bcm_sf2-do-not-override-speed-settings.patch
@@ -0,0 +1,68 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Mon, 20 Jul 2015 17:49:55 -0700
+Subject: net: dsa: bcm_sf2: Do not override speed settings
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream d2eac98f7d1b950b762a7eca05a9ce0ea1d878d2 in net-next tree,
+  will be pushed to Linus very soon. ]
+
+The SF2 driver currently overrides speed settings for its port
+configured using a fixed PHY, this is both unnecessary and incorrect,
+because we keep feedback to the hardware parameters that we read from
+the PHY device, which in the case of a fixed PHY cannot possibly change
+speed.
+
+This is a required change to allow the fixed PHY code to allow
+registering a PHY with a link configured as DOWN by default and avoid
+some sort of circular dependency where we require the link_update
+callback to run to program the hardware, and we then utilize the fixed
+PHY parameters to program the hardware with the same settings.
+
+Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |   18 +-----------------
+ 1 file changed, 1 insertion(+), 17 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -905,15 +905,11 @@ static void bcm_sf2_sw_fixed_link_update
+                                        struct fixed_phy_status *status)
+ {
+       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+-      u32 duplex, pause, speed;
++      u32 duplex, pause;
+       u32 reg;
+ 
+       duplex = core_readl(priv, CORE_DUPSTS);
+       pause = core_readl(priv, CORE_PAUSESTS);
+-      speed = core_readl(priv, CORE_SPDSTS);
+-
+-      speed >>= (port * SPDSTS_SHIFT);
+-      speed &= SPDSTS_MASK;
+ 
+       status->link = 0;
+ 
+@@ -948,18 +944,6 @@ static void bcm_sf2_sw_fixed_link_update
+               reg &= ~LINK_STS;
+       core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port));
+ 
+-      switch (speed) {
+-      case SPDSTS_10:
+-              status->speed = SPEED_10;
+-              break;
+-      case SPDSTS_100:
+-              status->speed = SPEED_100;
+-              break;
+-      case SPDSTS_1000:
+-              status->speed = SPEED_1000;
+-              break;
+-      }
+-
+       if ((pause & (1 << port)) &&
+           (pause & (1 << (port + PAUSESTS_TX_PAUSE_SHIFT)))) {
+               status->asym_pause = 1;
diff --git a/queue-4.2/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch b/queue-4.2/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch

new file mode 100644 (file)

index 0000000..e58f176
--- /dev/null
+++ b/queue-4.2/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch
@@ -0,0 +1,36 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 8 Sep 2015 20:06:41 -0700
+Subject: net: dsa: bcm_sf2: Fix 64-bits register writes
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 03679a14739a0d4c14b52ba65a69ff553bfba73b ]
+
+The macro to write 64-bits quantities to the 32-bits register swapped
+the value and offsets arguments, we want to preserve the ordering of the
+arguments with respect to how writel() is implemented for instance:
+value first, offset/base second.
+
+Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.h
++++ b/drivers/net/dsa/bcm_sf2.h
+@@ -112,8 +112,8 @@ static inline u64 name##_readq(struct bc
+       spin_unlock(&priv->indir_lock);                                 \
+       return (u64)indir << 32 | dir;                                  \
+ }                                                                     \
+-static inline void name##_writeq(struct bcm_sf2_priv *priv, u32 off,  \
+-                                                      u64 val)        \
++static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val,  \
++                                                      u32 off)        \
+ {                                                                     \
+       spin_lock(&priv->indir_lock);                                   \
+       reg_writel(priv, upper_32_bits(val), REG_DIR_DATA_WRITE);       \
diff --git a/queue-4.2/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch b/queue-4.2/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch

new file mode 100644 (file)

index 0000000..7d08b24
--- /dev/null
+++ b/queue-4.2/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch
@@ -0,0 +1,74 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Sat, 5 Sep 2015 13:07:27 -0700
+Subject: net: dsa: bcm_sf2: Fix ageing conditions and operation
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit 39797a279d62972cd914ef580fdfacb13e508bf8 ]
+
+The comparison check between cur_hw_state and hw_state is currently
+invalid because cur_hw_state is right shifted by G_MISTP_SHIFT, while
+hw_state is not, so we end-up comparing bits 2:0 with bits 7:5, which is
+going to cause an additional aging to occur. Fix this by not shifting
+cur_hw_state while reading it, but instead, mask the value with the
+appropriately shitfted bitmask.
+
+The other problem with the fast-ageing process is that we did not set
+the EN_AGE_DYNAMIC bit to request the ageing to occur for dynamically
+learned MAC addresses. Finally, write back 0 to the FAST_AGE_CTRL
+register to avoid leaving spurious bits sets from one operation to the
+other.
+
+Fixes: 12f460f23423 ("net: dsa: bcm_sf2: add HW bridging support")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -418,7 +418,7 @@ static int bcm_sf2_sw_fast_age_port(stru
+       core_writel(priv, port, CORE_FAST_AGE_PORT);
+ 
+       reg = core_readl(priv, CORE_FAST_AGE_CTRL);
+-      reg |= EN_AGE_PORT | FAST_AGE_STR_DONE;
++      reg |= EN_AGE_PORT | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE;
+       core_writel(priv, reg, CORE_FAST_AGE_CTRL);
+ 
+       do {
+@@ -432,6 +432,8 @@ static int bcm_sf2_sw_fast_age_port(stru
+       if (!timeout)
+               return -ETIMEDOUT;
+ 
++      core_writel(priv, 0, CORE_FAST_AGE_CTRL);
++
+       return 0;
+ }
+ 
+@@ -507,7 +509,7 @@ static int bcm_sf2_sw_br_set_stp_state(s
+       u32 reg;
+ 
+       reg = core_readl(priv, CORE_G_PCTL_PORT(port));
+-      cur_hw_state = reg >> G_MISTP_STATE_SHIFT;
++      cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT);
+ 
+       switch (state) {
+       case BR_STATE_DISABLED:
+@@ -531,10 +533,12 @@ static int bcm_sf2_sw_br_set_stp_state(s
+       }
+ 
+       /* Fast-age ARL entries if we are moving a port from Learning or
+-       * Forwarding state to Disabled, Blocking or Listening state
++       * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening
++       * state (hw_state)
+        */
+       if (cur_hw_state != hw_state) {
+-              if (cur_hw_state & 4 && !(hw_state & 4)) {
++              if (cur_hw_state >= G_MISTP_LEARN_STATE &&
++                  hw_state <= G_MISTP_LISTEN_STATE) {
+                       ret = bcm_sf2_sw_fast_age_port(ds, port);
+                       if (ret) {
+                               pr_err("%s: fast-ageing failed\n", __func__);
diff --git a/queue-4.2/net-eth-altera-fix-napi-poll_list-corruption.patch b/queue-4.2/net-eth-altera-fix-napi-poll_list-corruption.patch

new file mode 100644 (file)

index 0000000..055155b
--- /dev/null
+++ b/queue-4.2/net-eth-altera-fix-napi-poll_list-corruption.patch
@@ -0,0 +1,32 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Atsushi Nemoto <nemoto@toshiba-tops.co.jp>
+Date: Wed, 2 Sep 2015 17:49:29 +0900
+Subject: net: eth: altera: fix napi poll_list corruption
+
+From: Atsushi Nemoto <nemoto@toshiba-tops.co.jp>
+
+[ Upstream commit 4548a697e4969d695047cebd6d9af5e2f6cc728e ]
+
+tse_poll() calls __napi_complete() with irq enabled.  This leads napi
+poll_list corruption and may stop all napi drivers working.
+Use napi_complete() instead of __napi_complete().
+
+Signed-off-by: Atsushi Nemoto <nemoto@toshiba-tops.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/altera/altera_tse_main.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/altera/altera_tse_main.c
++++ b/drivers/net/ethernet/altera/altera_tse_main.c
+@@ -511,8 +511,7 @@ static int tse_poll(struct napi_struct *
+ 
+       if (rxcomplete < budget) {
+ 
+-              napi_gro_flush(napi, false);
+-              __napi_complete(napi);
++              napi_complete(napi);
+ 
+               netdev_dbg(priv->dev,
+                          "NAPI Complete, did %d packets with budget %d\n",
diff --git a/queue-4.2/net-fec-clear-receive-interrupts-before-processing-a-packet.patch b/queue-4.2/net-fec-clear-receive-interrupts-before-processing-a-packet.patch

new file mode 100644 (file)

index 0000000..d020e99
--- /dev/null
+++ b/queue-4.2/net-fec-clear-receive-interrupts-before-processing-a-packet.patch
@@ -0,0 +1,39 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Russell King <rmk+kernel@arm.linux.org.uk>
+Date: Wed, 2 Sep 2015 17:24:14 +0800
+Subject: net: fec: clear receive interrupts before processing a packet
+
+From: Russell King <rmk+kernel@arm.linux.org.uk>
+
+[ Upstream commit ed63f1dcd5788d36f942fbcce350742385e3e18c ]
+
+The patch just to re-submit the patch "db3421c114cfa6326" because the
+patch "4d494cdc92b3b9a0" remove the change.
+
+Clear any pending receive interrupt before we process a pending packet.
+This helps to avoid any spurious interrupts being raised after we have
+fully cleaned the receive ring, while still allowing an interrupt to be
+raised if we receive another packet.
+
+The position of this is critical: we must do this prior to reading the
+next packet status to avoid potentially dropping an interrupt when a
+packet is still pending.
+
+Acked-by: Fugang Duan <B38611@freescale.com>
+Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -1402,6 +1402,7 @@ fec_enet_rx_queue(struct net_device *nde
+               if ((status & BD_ENET_RX_LAST) == 0)
+                       netdev_err(ndev, "rcv is not +last\n");
+ 
++              writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT);
+ 
+               /* Check for errors. */
+               if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
diff --git a/queue-4.2/net-ipv6-correct-pim6-mrt_lock-handling.patch b/queue-4.2/net-ipv6-correct-pim6-mrt_lock-handling.patch

new file mode 100644 (file)

index 0000000..f6623a8
--- /dev/null
+++ b/queue-4.2/net-ipv6-correct-pim6-mrt_lock-handling.patch
@@ -0,0 +1,35 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Richard Laing <richard.laing@alliedtelesis.co.nz>
+Date: Thu, 3 Sep 2015 13:52:31 +1200
+Subject: net/ipv6: Correct PIM6 mrt_lock handling
+
+From: Richard Laing <richard.laing@alliedtelesis.co.nz>
+
+[ Upstream commit 25b4a44c19c83d98e8c0807a7ede07c1f28eab8b ]
+
+In the IPv6 multicast routing code the mrt_lock was not being released
+correctly in the MFC iterator, as a result adding or deleting a MIF would
+cause a hang because the mrt_lock could not be acquired.
+
+This fix is a copy of the code for the IPv4 case and ensures that the lock
+is released correctly.
+
+Signed-off-by: Richard Laing <richard.laing@alliedtelesis.co.nz>
+Acked-by: Cong Wang <cwang@twopensource.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6mr.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -550,7 +550,7 @@ static void ipmr_mfc_seq_stop(struct seq
+ 
+       if (it->cache == &mrt->mfc6_unres_queue)
+               spin_unlock_bh(&mfc_unres_lock);
+-      else if (it->cache == mrt->mfc6_cache_array)
++      else if (it->cache == &mrt->mfc6_cache_array[it->ct])
+               read_unlock(&mrt_lock);
+ }
+ 
diff --git a/queue-4.2/net-mlx4_core-capping-number-of-requested-msixs-to-max_msix.patch b/queue-4.2/net-mlx4_core-capping-number-of-requested-msixs-to-max_msix.patch

new file mode 100644 (file)

index 0000000..2dd1d4c
--- /dev/null
+++ b/queue-4.2/net-mlx4_core-capping-number-of-requested-msixs-to-max_msix.patch
@@ -0,0 +1,59 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Carol L Soto <clsoto@linux.vnet.ibm.com>
+Date: Thu, 27 Aug 2015 14:43:25 -0500
+Subject: net/mlx4_core: Capping number of requested MSIXs to MAX_MSIX
+
+From: Carol L Soto <clsoto@linux.vnet.ibm.com>
+
+[ Upstream commit 9293267a3e2a7a2555d8ddc8f9301525e5b03b1b ]
+
+We currently manage IRQs in pool_bm which is a bit field
+of MAX_MSIX bits. Thus, allocating more than MAX_MSIX
+interrupts can't be managed in pool_bm.
+Fixing this by capping number of requested MSIXs to
+MAX_MSIX.
+
+Signed-off-by: Matan Barak <matanb@mellanox.com>
+Signed-off-by: Carol L Soto <clsoto@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/main.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/main.c
++++ b/drivers/net/ethernet/mellanox/mlx4/main.c
+@@ -2654,9 +2654,14 @@ static void mlx4_enable_msi_x(struct mlx
+ 
+       if (msi_x) {
+               int nreq = dev->caps.num_ports * num_online_cpus() + 1;
++              bool shared_ports = false;
+ 
+               nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
+                            nreq);
++              if (nreq > MAX_MSIX) {
++                      nreq = MAX_MSIX;
++                      shared_ports = true;
++              }
+ 
+               entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
+               if (!entries)
+@@ -2679,6 +2684,9 @@ static void mlx4_enable_msi_x(struct mlx
+               bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports,
+                           dev->caps.num_ports);
+ 
++              if (MLX4_IS_LEGACY_EQ_MODE(dev->caps))
++                      shared_ports = true;
++
+               for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) {
+                       if (i == MLX4_EQ_ASYNC)
+                               continue;
+@@ -2686,7 +2694,7 @@ static void mlx4_enable_msi_x(struct mlx
+                       priv->eq_table.eq[i].irq =
+                               entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
+ 
+-                      if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
++                      if (shared_ports) {
+                               bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
+                                           dev->caps.num_ports);
+                               /* We don't set affinity hint when there
diff --git a/queue-4.2/net-mlx4_en-really-allow-to-change-rss-key.patch b/queue-4.2/net-mlx4_en-really-allow-to-change-rss-key.patch

new file mode 100644 (file)

index 0000000..81e975d
--- /dev/null
+++ b/queue-4.2/net-mlx4_en-really-allow-to-change-rss-key.patch
@@ -0,0 +1,35 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 15 Sep 2015 18:29:47 -0700
+Subject: net/mlx4_en: really allow to change RSS key
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upsteam commit 4671fc6d47e0a0108fe24a4d830347d6a6ef4aa7 ]
+
+When changing rss key, we do not want to overwrite user provided key
+by the one provided by netdev_rss_key_fill(), which is the host random
+key generated at boot time.
+
+Fixes: 947cbb0ac242 ("net/mlx4_en: Support for configurable RSS hash function")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Eyal Perry <eyalpe@mellanox.com>
+CC: Amir Vadai <amirv@mellanox.com>
+Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_rx.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+@@ -1250,8 +1250,6 @@ int mlx4_en_config_rss_steer(struct mlx4
+               rss_context->hash_fn = MLX4_RSS_HASH_TOP;
+               memcpy(rss_context->rss_key, priv->rss_key,
+                      MLX4_EN_RSS_KEY_SIZE);
+-              netdev_rss_key_fill(rss_context->rss_key,
+-                                  MLX4_EN_RSS_KEY_SIZE);
+       } else {
+               en_err(priv, "Unknown RSS hash function requested\n");
+               err = -EINVAL;
diff --git a/queue-4.2/net-mvneta-fix-dma-buffer-unmapping-in-mvneta_rx.patch b/queue-4.2/net-mvneta-fix-dma-buffer-unmapping-in-mvneta_rx.patch

new file mode 100644 (file)

index 0000000..88581ba
--- /dev/null
+++ b/queue-4.2/net-mvneta-fix-dma-buffer-unmapping-in-mvneta_rx.patch
@@ -0,0 +1,55 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Simon Guinot <simon.guinot@sequanux.org>
+Date: Tue, 15 Sep 2015 22:41:21 +0200
+Subject: net: mvneta: fix DMA buffer unmapping in mvneta_rx()
+
+From: Simon Guinot <simon.guinot@sequanux.org>
+
+[ Upstream commit daf158d0d544cec80b7b30deff8cfc59a6e17610 ]
+
+This patch fixes a regression introduced by the commit a84e32894191
+("net: mvneta: fix refilling for Rx DMA buffers"). Due to this commit
+the newly allocated Rx buffers are DMA-unmapped in place of those passed
+to the networking stack. Obviously, this causes data corruptions.
+
+This patch fixes the issue by ensuring that the right Rx buffers are
+DMA-unmapped.
+
+Reported-by: Oren Laskin <oren@igneous.io>
+Signed-off-by: Simon Guinot <simon.guinot@sequanux.org>
+Fixes: a84e32894191 ("net: mvneta: fix refilling for Rx DMA buffers")
+Cc: <stable@vger.kernel.org> # v3.8+
+Tested-by: Oren Laskin <oren@igneous.io>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvneta.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1479,6 +1479,7 @@ static int mvneta_rx(struct mvneta_port
+               struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
+               struct sk_buff *skb;
+               unsigned char *data;
++              dma_addr_t phys_addr;
+               u32 rx_status;
+               int rx_bytes, err;
+ 
+@@ -1486,6 +1487,7 @@ static int mvneta_rx(struct mvneta_port
+               rx_status = rx_desc->status;
+               rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
+               data = (unsigned char *)rx_desc->buf_cookie;
++              phys_addr = rx_desc->buf_phys_addr;
+ 
+               if (!mvneta_rxq_desc_is_first_last(rx_status) ||
+                   (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
+@@ -1534,7 +1536,7 @@ static int mvneta_rx(struct mvneta_port
+               if (!skb)
+                       goto err_drop_frame;
+ 
+-              dma_unmap_single(dev->dev.parent, rx_desc->buf_phys_addr,
++              dma_unmap_single(dev->dev.parent, phys_addr,
+                                MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
+ 
+               rcvd_pkts++;
diff --git a/queue-4.2/net-phy-fixed_phy-handle-link-down-case.patch b/queue-4.2/net-phy-fixed_phy-handle-link-down-case.patch

new file mode 100644 (file)

index 0000000..1c9f04c
--- /dev/null
+++ b/queue-4.2/net-phy-fixed_phy-handle-link-down-case.patch
@@ -0,0 +1,65 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Stas Sergeev <stsp@list.ru>
+Date: Mon, 20 Jul 2015 17:49:56 -0700
+Subject: net: phy: fixed_phy: handle link-down case
+
+From: Stas Sergeev <stsp@list.ru>
+
+[ Upstream 868a4215be9a6d80548ccb74763b883dc99d32a2 in net-next tree,
+  will be pushed to Linus very soon. ]
+
+fixed_phy_register() currently hardcodes the fixed PHY link to 1, and
+expects to find a "speed" parameter to provide correct information
+towards the fixed PHY consumer.
+
+In a subsequent change, where we allow "managed" (e.g: (RS)GMII in-band
+status auto-negotiation) fixed PHYs, none of these parameters can be
+provided since they will be auto-negotiated, hence, we just provide a
+zero-initialized fixed_phy_status to fixed_phy_register() which makes it
+fail when we call fixed_phy_update_regs() since status.speed = 0 which
+makes us hit the "default" label and error out.
+
+Without this change, we would also see potentially inconsistent
+speed/duplex parameters for fixed PHYs when the link is DOWN.
+
+CC: netdev@vger.kernel.org
+CC: linux-kernel@vger.kernel.org
+Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
+[florian: add more background to why this is correct and desirable]
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/fixed_phy.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/phy/fixed_phy.c
++++ b/drivers/net/phy/fixed_phy.c
+@@ -52,6 +52,10 @@ static int fixed_phy_update_regs(struct
+       u16 lpagb = 0;
+       u16 lpa = 0;
+ 
++      if (!fp->status.link)
++              goto done;
++      bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE;
++
+       if (fp->status.duplex) {
+               bmcr |= BMCR_FULLDPLX;
+ 
+@@ -96,15 +100,13 @@ static int fixed_phy_update_regs(struct
+               }
+       }
+ 
+-      if (fp->status.link)
+-              bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE;
+-
+       if (fp->status.pause)
+               lpa |= LPA_PAUSE_CAP;
+ 
+       if (fp->status.asym_pause)
+               lpa |= LPA_PAUSE_ASYM;
+ 
++done:
+       fp->regs[MII_PHYSID1] = 0;
+       fp->regs[MII_PHYSID2] = 0;
+ 
diff --git a/queue-4.2/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch b/queue-4.2/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch

new file mode 100644 (file)

index 0000000..9545684
--- /dev/null
+++ b/queue-4.2/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch
@@ -0,0 +1,93 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Tue, 22 Sep 2015 17:01:11 -0700
+Subject: net: revert "net_sched: move tp->root allocation into fw_init()"
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit d8aecb10115497f6cdf841df8c88ebb3ba25fa28 ]
+
+fw filter uses tp->root==NULL to check if it is the old method,
+so it doesn't need allocation at all in this case. This patch
+reverts the offending commit and adds some comments for old
+method to make it obvious.
+
+Fixes: 33f8b9ecdb15 ("net_sched: move tp->root allocation into fw_init()")
+Reported-by: Akshat Kakkar <akshat.1984@gmail.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_fw.c |   30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+--- a/net/sched/cls_fw.c
++++ b/net/sched/cls_fw.c
+@@ -33,7 +33,6 @@
+ 
+ struct fw_head {
+       u32                     mask;
+-      bool                    mask_set;
+       struct fw_filter __rcu  *ht[HTSIZE];
+       struct rcu_head         rcu;
+ };
+@@ -84,7 +83,7 @@ static int fw_classify(struct sk_buff *s
+                       }
+               }
+       } else {
+-              /* old method */
++              /* Old method: classify the packet using its skb mark. */
+               if (id && (TC_H_MAJ(id) == 0 ||
+                          !(TC_H_MAJ(id ^ tp->q->handle)))) {
+                       res->classid = id;
+@@ -114,14 +113,9 @@ static unsigned long fw_get(struct tcf_p
+ 
+ static int fw_init(struct tcf_proto *tp)
+ {
+-      struct fw_head *head;
+-
+-      head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
+-      if (head == NULL)
+-              return -ENOBUFS;
+-
+-      head->mask_set = false;
+-      rcu_assign_pointer(tp->root, head);
++      /* We don't allocate fw_head here, because in the old method
++       * we don't need it at all.
++       */
+       return 0;
+ }
+ 
+@@ -252,7 +246,7 @@ static int fw_change(struct net *net, st
+       int err;
+ 
+       if (!opt)
+-              return handle ? -EINVAL : 0;
++              return handle ? -EINVAL : 0; /* Succeed if it is old method. */
+ 
+       err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy);
+       if (err < 0)
+@@ -302,11 +296,17 @@ static int fw_change(struct net *net, st
+       if (!handle)
+               return -EINVAL;
+ 
+-      if (!head->mask_set) {
+-              head->mask = 0xFFFFFFFF;
++      if (!head) {
++              u32 mask = 0xFFFFFFFF;
+               if (tb[TCA_FW_MASK])
+-                      head->mask = nla_get_u32(tb[TCA_FW_MASK]);
+-              head->mask_set = true;
++                      mask = nla_get_u32(tb[TCA_FW_MASK]);
++
++              head = kzalloc(sizeof(*head), GFP_KERNEL);
++              if (!head)
++                      return -ENOBUFS;
++              head->mask = mask;
++
++              rcu_assign_pointer(tp->root, head);
+       }
+ 
+       f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
diff --git a/queue-4.2/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch b/queue-4.2/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch

new file mode 100644 (file)

index 0000000..abe888b
--- /dev/null
+++ b/queue-4.2/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch
@@ -0,0 +1,92 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Fri, 18 Sep 2015 19:16:50 +0800
+Subject: netlink: Fix autobind race condition that leads to zero port ID
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit 1f770c0a09da855a2b51af6d19de97fb955eca85 ]
+
+The commit c0bb07df7d981e4091432754e30c9c720e2c0c78 ("netlink:
+Reset portid after netlink_insert failure") introduced a race
+condition where if two threads try to autobind the same socket
+one of them may end up with a zero port ID.  This led to kernel
+deadlocks that were observed by multiple people.
+
+This patch reverts that commit and instead fixes it by introducing
+a separte rhash_portid variable so that the real portid is only set
+after the socket has been successfully hashed.
+
+Fixes: c0bb07df7d98 ("netlink: Reset portid after netlink_insert failure")
+Reported-by: Tejun Heo <tj@kernel.org>
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   12 +++++++-----
+ net/netlink/af_netlink.h |    1 +
+ 2 files changed, 8 insertions(+), 5 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -1019,7 +1019,7 @@ static inline int netlink_compare(struct
+       const struct netlink_compare_arg *x = arg->key;
+       const struct netlink_sock *nlk = ptr;
+ 
+-      return nlk->portid != x->portid ||
++      return nlk->rhash_portid != x->portid ||
+              !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet));
+ }
+ 
+@@ -1045,7 +1045,7 @@ static int __netlink_insert(struct netli
+ {
+       struct netlink_compare_arg arg;
+ 
+-      netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid);
++      netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->rhash_portid);
+       return rhashtable_lookup_insert_key(&table->hash, &arg,
+                                           &nlk_sk(sk)->node,
+                                           netlink_rhashtable_params);
+@@ -1107,7 +1107,7 @@ static int netlink_insert(struct sock *s
+           unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
+               goto err;
+ 
+-      nlk_sk(sk)->portid = portid;
++      nlk_sk(sk)->rhash_portid = portid;
+       sock_hold(sk);
+ 
+       err = __netlink_insert(table, sk);
+@@ -1119,10 +1119,12 @@ static int netlink_insert(struct sock *s
+                       err = -EOVERFLOW;
+               if (err == -EEXIST)
+                       err = -EADDRINUSE;
+-              nlk_sk(sk)->portid = 0;
+               sock_put(sk);
++              goto err;
+       }
+ 
++      nlk_sk(sk)->portid = portid;
++
+ err:
+       release_sock(sk);
+       return err;
+@@ -3233,7 +3235,7 @@ static inline u32 netlink_hash(const voi
+       const struct netlink_sock *nlk = data;
+       struct netlink_compare_arg arg;
+ 
+-      netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid);
++      netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->rhash_portid);
+       return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed);
+ }
+ 
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -25,6 +25,7 @@ struct netlink_ring {
+ struct netlink_sock {
+       /* struct sock has to be the first member of netlink_sock */
+       struct sock             sk;
++      u32                     rhash_portid;
+       u32                     portid;
+       u32                     dst_portid;
+       u32                     dst_group;
diff --git a/queue-4.2/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch b/queue-4.2/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch

new file mode 100644 (file)

index 0000000..79ac4cf
--- /dev/null
+++ b/queue-4.2/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch
@@ -0,0 +1,116 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 10 Sep 2015 20:05:46 +0200
+Subject: netlink, mmap: transform mmap skb into full skb on taps
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 1853c949646005b5959c483becde86608f548f24 ]
+
+Ken-ichirou reported that running netlink in mmap mode for receive in
+combination with nlmon will throw a NULL pointer dereference in
+__kfree_skb() on nlmon_xmit(), in my case I can also trigger an "unable
+to handle kernel paging request". The problem is the skb_clone() in
+__netlink_deliver_tap_skb() for skbs that are mmaped.
+
+I.e. the cloned skb doesn't have a destructor, whereas the mmap netlink
+skb has it pointed to netlink_skb_destructor(), set in the handler
+netlink_ring_setup_skb(). There, skb->head is being set to NULL, so
+that in such cases, __kfree_skb() doesn't perform a skb_release_data()
+via skb_release_all(), where skb->head is possibly being freed through
+kfree(head) into slab allocator, although netlink mmap skb->head points
+to the mmap buffer. Similarly, the same has to be done also for large
+netlink skbs where the data area is vmalloced. Therefore, as discussed,
+make a copy for these rather rare cases for now. This fixes the issue
+on my and Ken-ichirou's test-cases.
+
+Reference: http://thread.gmane.org/gmane.linux.network/371129
+Fixes: bcbde0d449ed ("net: netlink: virtual tap device management")
+Reported-by: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: Ken-ichirou MATSUZAWA <chamaken@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   30 +++++++++++++++++++++++-------
+ net/netlink/af_netlink.h |    9 +++++++++
+ 2 files changed, 32 insertions(+), 7 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -124,6 +124,24 @@ static inline u32 netlink_group_mask(u32
+       return group ? 1 << (group - 1) : 0;
+ }
+ 
++static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
++                                         gfp_t gfp_mask)
++{
++      unsigned int len = skb_end_offset(skb);
++      struct sk_buff *new;
++
++      new = alloc_skb(len, gfp_mask);
++      if (new == NULL)
++              return NULL;
++
++      NETLINK_CB(new).portid = NETLINK_CB(skb).portid;
++      NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
++      NETLINK_CB(new).creds = NETLINK_CB(skb).creds;
++
++      memcpy(skb_put(new, len), skb->data, len);
++      return new;
++}
++
+ int netlink_add_tap(struct netlink_tap *nt)
+ {
+       if (unlikely(nt->dev->type != ARPHRD_NETLINK))
+@@ -205,7 +223,11 @@ static int __netlink_deliver_tap_skb(str
+       int ret = -ENOMEM;
+ 
+       dev_hold(dev);
+-      nskb = skb_clone(skb, GFP_ATOMIC);
++
++      if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
++              nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
++      else
++              nskb = skb_clone(skb, GFP_ATOMIC);
+       if (nskb) {
+               nskb->dev = dev;
+               nskb->protocol = htons((u16) sk->sk_protocol);
+@@ -278,11 +300,6 @@ static void netlink_rcv_wake(struct sock
+ }
+ 
+ #ifdef CONFIG_NETLINK_MMAP
+-static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
+-{
+-      return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
+-}
+-
+ static bool netlink_rx_is_mmaped(struct sock *sk)
+ {
+       return nlk_sk(sk)->rx_ring.pg_vec != NULL;
+@@ -834,7 +851,6 @@ static void netlink_ring_set_copied(stru
+ }
+ 
+ #else /* CONFIG_NETLINK_MMAP */
+-#define netlink_skb_is_mmaped(skb)    false
+ #define netlink_rx_is_mmaped(sk)      false
+ #define netlink_tx_is_mmaped(sk)      false
+ #define netlink_mmap                  sock_no_mmap
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -59,6 +59,15 @@ static inline struct netlink_sock *nlk_s
+       return container_of(sk, struct netlink_sock, sk);
+ }
+ 
++static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
++{
++#ifdef CONFIG_NETLINK_MMAP
++      return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
++#else
++      return false;
++#endif /* CONFIG_NETLINK_MMAP */
++}
++
+ struct netlink_table {
+       struct rhashtable       hash;
+       struct hlist_head       mc_list;
diff --git a/queue-4.2/netlink-replace-rhash_portid-with-bound.patch b/queue-4.2/netlink-replace-rhash_portid-with-bound.patch

new file mode 100644 (file)

index 0000000..e123ba1
--- /dev/null
+++ b/queue-4.2/netlink-replace-rhash_portid-with-bound.patch
@@ -0,0 +1,245 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 22 Sep 2015 11:38:56 +0800
+Subject: netlink: Replace rhash_portid with bound
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit da314c9923fed553a007785a901fd395b7eb6c19 ]
+
+On Mon, Sep 21, 2015 at 02:20:22PM -0400, Tejun Heo wrote:
+>
+> store_release and load_acquire are different from the usual memory
+> barriers and can't be paired this way.  You have to pair store_release
+> and load_acquire.  Besides, it isn't a particularly good idea to
+
+OK I've decided to drop the acquire/release helpers as they don't
+help us at all and simply pessimises the code by using full memory
+barriers (on some architectures) where only a write or read barrier
+is needed.
+
+> depend on memory barriers embedded in other data structures like the
+> above.  Here, especially, rhashtable_insert() would have write barrier
+> *before* the entry is hashed not necessarily *after*, which means that
+> in the above case, a socket which appears to have set bound to a
+> reader might not visible when the reader tries to look up the socket
+> on the hashtable.
+
+But you are right we do need an explicit write barrier here to
+ensure that the hashing is visible.
+
+> There's no reason to be overly smart here.  This isn't a crazy hot
+> path, write barriers tend to be very cheap, store_release more so.
+> Please just do smp_store_release() and note what it's paired with.
+
+It's not about being overly smart.  It's about actually understanding
+what's going on with the code.  I've seen too many instances of
+people simply sprinkling synchronisation primitives around without
+any knowledge of what is happening underneath, which is just a recipe
+for creating hard-to-debug races.
+
+> > @@ -1539,7 +1546,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
+> >            }
+> >    }
+> >
+> > -  if (!nlk->portid) {
+> > +  if (!nlk->bound) {
+>
+> I don't think you can skip load_acquire here just because this is the
+> second deref of the variable.  That doesn't change anything.  Race
+> condition could still happen between the first and second tests and
+> skipping the second would lead to the same kind of bug.
+
+The reason this one is OK is because we do not use nlk->portid or
+try to get nlk from the hash table before we return to user-space.
+
+However, there is a real bug here that none of these acquire/release
+helpers discovered.  The two bound tests here used to be a single
+one.  Now that they are separate it is entirely possible for another
+thread to come in the middle and bind the socket.  So we need to
+repeat the portid check in order to maintain consistency.
+
+> > @@ -1587,7 +1594,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
+> >        !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
+> >            return -EPERM;
+> >
+> > -  if (!nlk->portid)
+> > +  if (!nlk->bound)
+>
+> Don't we need load_acquire here too?  Is this path holding a lock
+> which makes that unnecessary?
+
+Ditto.
+
+---8<---
+The commit 1f770c0a09da855a2b51af6d19de97fb955eca85 ("netlink:
+Fix autobind race condition that leads to zero port ID") created
+some new races that can occur due to inconcsistencies between the
+two port IDs.
+
+Tejun is right that a barrier is unavoidable.  Therefore I am
+reverting to the original patch that used a boolean to indicate
+that a user netlink socket has been bound.
+
+Barriers have been added where necessary to ensure that a valid
+portid and the hashed socket is visible.
+
+I have also changed netlink_insert to only return EBUSY if the
+socket is bound to a portid different to the requested one.  This
+combined with only reading nlk->bound once in netlink_bind fixes
+a race where two threads that bind the socket at the same time
+with different port IDs may both succeed.
+
+Fixes: 1f770c0a09da ("netlink: Fix autobind race condition that leads to zero port ID")
+Reported-by: Tejun Heo <tj@kernel.org>
+Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Nacked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   39 ++++++++++++++++++++++++++++-----------
+ net/netlink/af_netlink.h |    2 +-
+ 2 files changed, 29 insertions(+), 12 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -1019,7 +1019,7 @@ static inline int netlink_compare(struct
+       const struct netlink_compare_arg *x = arg->key;
+       const struct netlink_sock *nlk = ptr;
+ 
+-      return nlk->rhash_portid != x->portid ||
++      return nlk->portid != x->portid ||
+              !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet));
+ }
+ 
+@@ -1045,7 +1045,7 @@ static int __netlink_insert(struct netli
+ {
+       struct netlink_compare_arg arg;
+ 
+-      netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->rhash_portid);
++      netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid);
+       return rhashtable_lookup_insert_key(&table->hash, &arg,
+                                           &nlk_sk(sk)->node,
+                                           netlink_rhashtable_params);
+@@ -1098,8 +1098,8 @@ static int netlink_insert(struct sock *s
+ 
+       lock_sock(sk);
+ 
+-      err = -EBUSY;
+-      if (nlk_sk(sk)->portid)
++      err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY;
++      if (nlk_sk(sk)->bound)
+               goto err;
+ 
+       err = -ENOMEM;
+@@ -1107,7 +1107,7 @@ static int netlink_insert(struct sock *s
+           unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
+               goto err;
+ 
+-      nlk_sk(sk)->rhash_portid = portid;
++      nlk_sk(sk)->portid = portid;
+       sock_hold(sk);
+ 
+       err = __netlink_insert(table, sk);
+@@ -1123,7 +1123,9 @@ static int netlink_insert(struct sock *s
+               goto err;
+       }
+ 
+-      nlk_sk(sk)->portid = portid;
++      /* We need to ensure that the socket is hashed and visible. */
++      smp_wmb();
++      nlk_sk(sk)->bound = portid;
+ 
+ err:
+       release_sock(sk);
+@@ -1509,6 +1511,7 @@ static int netlink_bind(struct socket *s
+       struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
+       int err;
+       long unsigned int groups = nladdr->nl_groups;
++      bool bound;
+ 
+       if (addr_len < sizeof(struct sockaddr_nl))
+               return -EINVAL;
+@@ -1525,9 +1528,14 @@ static int netlink_bind(struct socket *s
+                       return err;
+       }
+ 
+-      if (nlk->portid)
++      bound = nlk->bound;
++      if (bound) {
++              /* Ensure nlk->portid is up-to-date. */
++              smp_rmb();
++
+               if (nladdr->nl_pid != nlk->portid)
+                       return -EINVAL;
++      }
+ 
+       if (nlk->netlink_bind && groups) {
+               int group;
+@@ -1543,7 +1551,10 @@ static int netlink_bind(struct socket *s
+               }
+       }
+ 
+-      if (!nlk->portid) {
++      /* No need for barriers here as we return to user-space without
++       * using any of the bound attributes.
++       */
++      if (!bound) {
+               err = nladdr->nl_pid ?
+                       netlink_insert(sk, nladdr->nl_pid) :
+                       netlink_autobind(sock);
+@@ -1591,7 +1602,10 @@ static int netlink_connect(struct socket
+           !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
+               return -EPERM;
+ 
+-      if (!nlk->portid)
++      /* No need for barriers here as we return to user-space without
++       * using any of the bound attributes.
++       */
++      if (!nlk->bound)
+               err = netlink_autobind(sock);
+ 
+       if (err == 0) {
+@@ -2409,10 +2423,13 @@ static int netlink_sendmsg(struct socket
+               dst_group = nlk->dst_group;
+       }
+ 
+-      if (!nlk->portid) {
++      if (!nlk->bound) {
+               err = netlink_autobind(sock);
+               if (err)
+                       goto out;
++      } else {
++              /* Ensure nlk is hashed and visible. */
++              smp_rmb();
+       }
+ 
+       /* It's a really convoluted way for userland to ask for mmaped
+@@ -3235,7 +3252,7 @@ static inline u32 netlink_hash(const voi
+       const struct netlink_sock *nlk = data;
+       struct netlink_compare_arg arg;
+ 
+-      netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->rhash_portid);
++      netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid);
+       return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed);
+ }
+ 
+--- a/net/netlink/af_netlink.h
++++ b/net/netlink/af_netlink.h
+@@ -25,7 +25,6 @@ struct netlink_ring {
+ struct netlink_sock {
+       /* struct sock has to be the first member of netlink_sock */
+       struct sock             sk;
+-      u32                     rhash_portid;
+       u32                     portid;
+       u32                     dst_portid;
+       u32                     dst_group;
+@@ -36,6 +35,7 @@ struct netlink_sock {
+       unsigned long           state;
+       size_t                  max_recvmsg_len;
+       wait_queue_head_t       wait;
++      bool                    bound;
+       bool                    cb_running;
+       struct netlink_callback cb;
+       struct mutex            *cb_mutex;
diff --git a/queue-4.2/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch b/queue-4.2/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch

new file mode 100644 (file)

index 0000000..089b326
--- /dev/null
+++ b/queue-4.2/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch
@@ -0,0 +1,106 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Stas Sergeev <stsp@list.ru>
+Date: Mon, 20 Jul 2015 17:49:57 -0700
+Subject: of_mdio: add new DT property 'managed' to specify the PHY management type
+
+From: Stas Sergeev <stsp@list.ru>
+
+[ Upstream commit 4cba5c2103657d43d0886e4cff8004d95a3d0def in net-next tree,
+  will be pushed to Linus very soon. ]
+
+Currently the PHY management type is selected by the MAC driver arbitrary.
+The decision is based on the presence of the "fixed-link" node and on a
+will of the driver's authors.
+This caused a regression recently, when mvneta driver suddenly started
+to use the in-band status for auto-negotiation on fixed links.
+It appears the auto-negotiation may not work when expected by the MAC driver.
+Sebastien Rannou explains:
+<< Yes, I confirm that my HW does not generate an in-band status. AFAIK, it's
+a PHY that aggregates 4xSGMIIs to 1xQSGMII ; the MAC side of the PHY (with
+inband status) is connected to the switch through QSGMII, and in this context
+we are on the media side of the PHY. >>
+https://lkml.org/lkml/2015/7/10/206
+
+This patch introduces the new string property 'managed' that allows
+the user to set the management type explicitly.
+The supported values are:
+"auto" - default. Uses either MDIO or nothing, depending on the presence
+of the fixed-link node
+"in-band-status" - use in-band status
+
+Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
+
+CC: Rob Herring <robh+dt@kernel.org>
+CC: Pawel Moll <pawel.moll@arm.com>
+CC: Mark Rutland <mark.rutland@arm.com>
+CC: Ian Campbell <ijc+devicetree@hellion.org.uk>
+CC: Kumar Gala <galak@codeaurora.org>
+CC: Florian Fainelli <f.fainelli@gmail.com>
+CC: Grant Likely <grant.likely@linaro.org>
+CC: devicetree@vger.kernel.org
+CC: linux-kernel@vger.kernel.org
+CC: netdev@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/devicetree/bindings/net/ethernet.txt |    4 ++++
+ drivers/of/of_mdio.c                               |   19 +++++++++++++++++--
+ 2 files changed, 21 insertions(+), 2 deletions(-)
+
+--- a/Documentation/devicetree/bindings/net/ethernet.txt
++++ b/Documentation/devicetree/bindings/net/ethernet.txt
+@@ -25,7 +25,11 @@ The following properties are common to t
+   flow control thresholds.
+ - tx-fifo-depth: the size of the controller's transmit fifo in bytes. This
+   is used for components that can have configurable fifo sizes.
++- managed: string, specifies the PHY management type. Supported values are:
++  "auto", "in-band-status". "auto" is the default, it usess MDIO for
++  management if fixed-link is not specified.
+ 
+ Child nodes of the Ethernet controller are typically the individual PHY devices
+ connected via the MDIO bus (sometimes the MDIO bus controller is separate).
+ They are described in the phy.txt file in this same directory.
++For non-MDIO PHY management see fixed-link.txt.
+--- a/drivers/of/of_mdio.c
++++ b/drivers/of/of_mdio.c
+@@ -266,7 +266,8 @@ EXPORT_SYMBOL(of_phy_attach);
+ bool of_phy_is_fixed_link(struct device_node *np)
+ {
+       struct device_node *dn;
+-      int len;
++      int len, err;
++      const char *managed;
+ 
+       /* New binding */
+       dn = of_get_child_by_name(np, "fixed-link");
+@@ -275,6 +276,10 @@ bool of_phy_is_fixed_link(struct device_
+               return true;
+       }
+ 
++      err = of_property_read_string(np, "managed", &managed);
++      if (err == 0 && strcmp(managed, "auto") != 0)
++              return true;
++
+       /* Old binding */
+       if (of_get_property(np, "fixed-link", &len) &&
+           len == (5 * sizeof(__be32)))
+@@ -289,8 +294,18 @@ int of_phy_register_fixed_link(struct de
+       struct fixed_phy_status status = {};
+       struct device_node *fixed_link_node;
+       const __be32 *fixed_link_prop;
+-      int len;
++      int len, err;
+       struct phy_device *phy;
++      const char *managed;
++
++      err = of_property_read_string(np, "managed", &managed);
++      if (err == 0) {
++              if (strcmp(managed, "in-band-status") == 0) {
++                      /* status is zeroed, namely its .link member */
++                      phy = fixed_phy_register(PHY_POLL, &status, np);
++                      return IS_ERR(phy) ? PTR_ERR(phy) : 0;
++              }
++      }
+ 
+       /* New binding */
+       fixed_link_node = of_get_child_by_name(np, "fixed-link");
diff --git a/queue-4.2/openvswitch-zero-flows-on-allocation.patch b/queue-4.2/openvswitch-zero-flows-on-allocation.patch

new file mode 100644 (file)

index 0000000..f65ab90
--- /dev/null
+++ b/queue-4.2/openvswitch-zero-flows-on-allocation.patch
@@ -0,0 +1,116 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Jesse Gross <jesse@nicira.com>
+Date: Mon, 21 Sep 2015 20:21:20 -0700
+Subject: openvswitch: Zero flows on allocation.
+
+From: Jesse Gross <jesse@nicira.com>
+
+[ Upstream commit ae5f2fb1d51fa128a460bcfbe3c56d7ab8bf6a43 ]
+
+When support for megaflows was introduced, OVS needed to start
+installing flows with a mask applied to them. Since masking is an
+expensive operation, OVS also had an optimization that would only
+take the parts of the flow keys that were covered by a non-zero
+mask. The values stored in the remaining pieces should not matter
+because they are masked out.
+
+While this works fine for the purposes of matching (which must always
+look at the mask), serialization to netlink can be problematic. Since
+the flow and the mask are serialized separately, the uninitialized
+portions of the flow can be encoded with whatever values happen to be
+present.
+
+In terms of functionality, this has little effect since these fields
+will be masked out by definition. However, it leaks kernel memory to
+userspace, which is a potential security vulnerability. It is also
+possible that other code paths could look at the masked key and get
+uninitialized data, although this does not currently appear to be an
+issue in practice.
+
+This removes the mask optimization for flows that are being installed.
+This was always intended to be the case as the mask optimizations were
+really targetting per-packet flow operations.
+
+Fixes: 03f0d916 ("openvswitch: Mega flow implementation")
+Signed-off-by: Jesse Gross <jesse@nicira.com>
+Acked-by: Pravin B Shelar <pshelar@nicira.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/datapath.c   |    4 ++--
+ net/openvswitch/flow_table.c |   23 ++++++++++++-----------
+ net/openvswitch/flow_table.h |    2 +-
+ 3 files changed, 15 insertions(+), 14 deletions(-)
+
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -920,7 +920,7 @@ static int ovs_flow_cmd_new(struct sk_bu
+       if (error)
+               goto err_kfree_flow;
+ 
+-      ovs_flow_mask_key(&new_flow->key, &key, &mask);
++      ovs_flow_mask_key(&new_flow->key, &key, true, &mask);
+ 
+       /* Extract flow identifier. */
+       error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
+@@ -1047,7 +1047,7 @@ static struct sw_flow_actions *get_flow_
+       struct sw_flow_key masked_key;
+       int error;
+ 
+-      ovs_flow_mask_key(&masked_key, key, mask);
++      ovs_flow_mask_key(&masked_key, key, true, mask);
+       error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
+       if (error) {
+               OVS_NLERR(log,
+--- a/net/openvswitch/flow_table.c
++++ b/net/openvswitch/flow_table.c
+@@ -56,20 +56,21 @@ static u16 range_n_bytes(const struct sw
+ }
+ 
+ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+-                     const struct sw_flow_mask *mask)
++                     bool full, const struct sw_flow_mask *mask)
+ {
+-      const long *m = (const long *)((const u8 *)&mask->key +
+-                              mask->range.start);
+-      const long *s = (const long *)((const u8 *)src +
+-                              mask->range.start);
+-      long *d = (long *)((u8 *)dst + mask->range.start);
++      int start = full ? 0 : mask->range.start;
++      int len = full ? sizeof *dst : range_n_bytes(&mask->range);
++      const long *m = (const long *)((const u8 *)&mask->key + start);
++      const long *s = (const long *)((const u8 *)src + start);
++      long *d = (long *)((u8 *)dst + start);
+       int i;
+ 
+-      /* The memory outside of the 'mask->range' are not set since
+-       * further operations on 'dst' only uses contents within
+-       * 'mask->range'.
++      /* If 'full' is true then all of 'dst' is fully initialized. Otherwise,
++       * if 'full' is false the memory outside of the 'mask->range' is left
++       * uninitialized. This can be used as an optimization when further
++       * operations on 'dst' only use contents within 'mask->range'.
+        */
+-      for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
++      for (i = 0; i < len; i += sizeof(long))
+               *d++ = *s++ & *m++;
+ }
+ 
+@@ -473,7 +474,7 @@ static struct sw_flow *masked_flow_looku
+       u32 hash;
+       struct sw_flow_key masked_key;
+ 
+-      ovs_flow_mask_key(&masked_key, unmasked, mask);
++      ovs_flow_mask_key(&masked_key, unmasked, false, mask);
+       hash = flow_hash(&masked_key, &mask->range);
+       head = find_bucket(ti, hash);
+       hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+--- a/net/openvswitch/flow_table.h
++++ b/net/openvswitch/flow_table.h
+@@ -86,5 +86,5 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid
+ bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
+ 
+ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+-                     const struct sw_flow_mask *mask);
++                     bool full, const struct sw_flow_mask *mask);
+ #endif /* flow_table.h */
diff --git a/queue-4.2/phylib-fix-device-deletion-order-in-mdiobus_unregister.patch b/queue-4.2/phylib-fix-device-deletion-order-in-mdiobus_unregister.patch

new file mode 100644 (file)

index 0000000..632af65
--- /dev/null
+++ b/queue-4.2/phylib-fix-device-deletion-order-in-mdiobus_unregister.patch
@@ -0,0 +1,80 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Mark Salter <msalter@redhat.com>
+Date: Tue, 1 Sep 2015 09:36:05 -0400
+Subject: phylib: fix device deletion order in mdiobus_unregister()
+
+From: Mark Salter <msalter@redhat.com>
+
+[ Upstream commit b6c6aedcbcbacd7b0cb4b64ed5ac835bc1c60a03 ]
+
+commit 8b63ec1837fa ("phylib: Make PHYs children of their MDIO bus, not
+the bus' parent.") uncovered a problem in mdiobus_unregister() which
+leads to this warning when I reboot an APM Mustang (arm64) platform:
+
+  WARNING: CPU: 7 PID: 4239 at fs/sysfs/group.c:224 sysfs_remove_group+0xa0/0xa4()
+  sysfs group fffffe0000e07a10 not found for kobject 'xgene-mii-eth0:03'
+  ...
+  CPU: 7 PID: 4239 Comm: reboot Tainted: G            E   4.2.0-0.18.el7.test15.aarch64 #1
+  Hardware name: AppliedMicro Mustang/Mustang, BIOS 1.1.0 Aug 26 2015
+  Call Trace:
+  [<fffffe000009739c>] dump_backtrace+0x0/0x170
+  [<fffffe000009752c>] show_stack+0x20/0x2c
+  [<fffffe00007436f0>] dump_stack+0x78/0x9c
+  [<fffffe00000c2cb4>] warn_slowpath_common+0xa0/0xd8
+  [<fffffe00000c2d60>] warn_slowpath_fmt+0x74/0x88
+  [<fffffe0000293d3c>] sysfs_remove_group+0x9c/0xa4
+  [<fffffe00004a8bac>] dpm_sysfs_remove+0x5c/0x70
+  [<fffffe000049b388>] device_del+0x44/0x208
+  [<fffffe000049b578>] device_unregister+0x2c/0x7c
+  [<fffffe000050dc68>] mdiobus_unregister+0x48/0x94
+  [<fffffe000052afd0>] xgene_enet_mdio_remove+0x28/0x44
+  [<fffffe000052d3f0>] xgene_enet_remove+0xd0/0xd8
+  [<fffffe000052d424>] xgene_enet_shutdown+0x2c/0x3c
+  [<fffffe00004a204c>] platform_drv_shutdown+0x24/0x40
+  [<fffffe000049d4f4>] device_shutdown+0xf0/0x1b4
+  [<fffffe00000e31ec>] kernel_restart_prepare+0x40/0x4c
+  [<fffffe00000e32f8>] kernel_restart+0x1c/0x80
+  [<fffffe00000e3670>] SyS_reboot+0x17c/0x250
+
+The problem is that mdiobus_unregister() deletes the bus device before
+unregistering the phy devices on the bus. This wasn't a problem before
+because the phys were not children of the bus:
+
+  /sys/devices/platform/APMC0D05:00/net/eth0/xgene-mii-eth0:03
+  /sys/devices/platform/APMC0D05:00/net/eth0/xgene-mii-eth0
+
+But now that they are:
+
+  /sys/devices/platform/APMC0D05:00/net/eth0/xgene-mii-eth0/xgene-mii-eth0:03
+
+when mdiobus_unregister deletes the bus device, the phy subdirs are
+removed from sysfs also. So when the phys are unregistered afterward,
+we get the warning. This patch changes the order so that phys are
+unregistered before the bus device is deleted.
+
+Fixes: 8b63ec1837fa ("phylib: Make PHYs children of their MDIO bus, not the bus' parent.")
+Signed-off-by: Mark Salter <msalter@redhat.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Tested-by: Mark Langsdorf <mlangsdo@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/mdio_bus.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/phy/mdio_bus.c
++++ b/drivers/net/phy/mdio_bus.c
+@@ -303,12 +303,12 @@ void mdiobus_unregister(struct mii_bus *
+       BUG_ON(bus->state != MDIOBUS_REGISTERED);
+       bus->state = MDIOBUS_UNREGISTERED;
+ 
+-      device_del(&bus->dev);
+       for (i = 0; i < PHY_MAX_ADDR; i++) {
+               if (bus->phy_map[i])
+                       device_unregister(&bus->phy_map[i]->dev);
+               bus->phy_map[i] = NULL;
+       }
++      device_del(&bus->dev);
+ }
+ EXPORT_SYMBOL(mdiobus_unregister);
+ 
diff --git a/queue-4.2/ppp-fix-lockdep-splat-in-ppp_dev_uninit.patch b/queue-4.2/ppp-fix-lockdep-splat-in-ppp_dev_uninit.patch

new file mode 100644 (file)

index 0000000..db6825e
--- /dev/null
+++ b/queue-4.2/ppp-fix-lockdep-splat-in-ppp_dev_uninit.patch
@@ -0,0 +1,104 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Thu, 24 Sep 2015 12:54:01 +0200
+Subject: ppp: fix lockdep splat in ppp_dev_uninit()
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+[ Upstream commit 58a89ecaca53736aa465170530acea4f8be34ab4 ]
+
+ppp_dev_uninit() locks all_ppp_mutex while under rtnl mutex protection.
+ppp_create_interface() must then lock these mutexes in that same order
+to avoid possible deadlock.
+
+[  120.880011] ======================================================
+[  120.880011] [ INFO: possible circular locking dependency detected ]
+[  120.880011] 4.2.0 #1 Not tainted
+[  120.880011] -------------------------------------------------------
+[  120.880011] ppp-apitest/15827 is trying to acquire lock:
+[  120.880011]  (&pn->all_ppp_mutex){+.+.+.}, at: [<ffffffffa0145f56>] ppp_dev_uninit+0x64/0xb0 [ppp_generic]
+[  120.880011]
+[  120.880011] but task is already holding lock:
+[  120.880011]  (rtnl_mutex){+.+.+.}, at: [<ffffffff812e4255>] rtnl_lock+0x12/0x14
+[  120.880011]
+[  120.880011] which lock already depends on the new lock.
+[  120.880011]
+[  120.880011]
+[  120.880011] the existing dependency chain (in reverse order) is:
+[  120.880011]
+[  120.880011] -> #1 (rtnl_mutex){+.+.+.}:
+[  120.880011]        [<ffffffff81073a6f>] lock_acquire+0xcf/0x10e
+[  120.880011]        [<ffffffff813ab18a>] mutex_lock_nested+0x56/0x341
+[  120.880011]        [<ffffffff812e4255>] rtnl_lock+0x12/0x14
+[  120.880011]        [<ffffffff812d9d94>] register_netdev+0x11/0x27
+[  120.880011]        [<ffffffffa0147b17>] ppp_ioctl+0x289/0xc98 [ppp_generic]
+[  120.880011]        [<ffffffff8113b367>] do_vfs_ioctl+0x4ea/0x532
+[  120.880011]        [<ffffffff8113b3fd>] SyS_ioctl+0x4e/0x7d
+[  120.880011]        [<ffffffff813ad7d7>] entry_SYSCALL_64_fastpath+0x12/0x6f
+[  120.880011]
+[  120.880011] -> #0 (&pn->all_ppp_mutex){+.+.+.}:
+[  120.880011]        [<ffffffff8107334e>] __lock_acquire+0xb07/0xe76
+[  120.880011]        [<ffffffff81073a6f>] lock_acquire+0xcf/0x10e
+[  120.880011]        [<ffffffff813ab18a>] mutex_lock_nested+0x56/0x341
+[  120.880011]        [<ffffffffa0145f56>] ppp_dev_uninit+0x64/0xb0 [ppp_generic]
+[  120.880011]        [<ffffffff812d5263>] rollback_registered_many+0x19e/0x252
+[  120.880011]        [<ffffffff812d5381>] rollback_registered+0x29/0x38
+[  120.880011]        [<ffffffff812d53fa>] unregister_netdevice_queue+0x6a/0x77
+[  120.880011]        [<ffffffffa0146a94>] ppp_release+0x42/0x79 [ppp_generic]
+[  120.880011]        [<ffffffff8112d9f6>] __fput+0xec/0x192
+[  120.880011]        [<ffffffff8112dacc>] ____fput+0x9/0xb
+[  120.880011]        [<ffffffff8105447a>] task_work_run+0x66/0x80
+[  120.880011]        [<ffffffff81001801>] prepare_exit_to_usermode+0x8c/0xa7
+[  120.880011]        [<ffffffff81001900>] syscall_return_slowpath+0xe4/0x104
+[  120.880011]        [<ffffffff813ad931>] int_ret_from_sys_call+0x25/0x9f
+[  120.880011]
+[  120.880011] other info that might help us debug this:
+[  120.880011]
+[  120.880011]  Possible unsafe locking scenario:
+[  120.880011]
+[  120.880011]        CPU0                    CPU1
+[  120.880011]        ----                    ----
+[  120.880011]   lock(rtnl_mutex);
+[  120.880011]                                lock(&pn->all_ppp_mutex);
+[  120.880011]                                lock(rtnl_mutex);
+[  120.880011]   lock(&pn->all_ppp_mutex);
+[  120.880011]
+[  120.880011]  *** DEADLOCK ***
+
+Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion")
+Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
+Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/ppp_generic.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -2742,6 +2742,7 @@ static struct ppp *ppp_create_interface(
+        */
+       dev_net_set(dev, net);
+ 
++      rtnl_lock();
+       mutex_lock(&pn->all_ppp_mutex);
+ 
+       if (unit < 0) {
+@@ -2772,7 +2773,7 @@ static struct ppp *ppp_create_interface(
+       ppp->file.index = unit;
+       sprintf(dev->name, "ppp%d", unit);
+ 
+-      ret = register_netdev(dev);
++      ret = register_netdevice(dev);
+       if (ret != 0) {
+               unit_put(&pn->units_idr, unit);
+               netdev_err(ppp->dev, "PPP: couldn't register device %s (%d)\n",
+@@ -2784,6 +2785,7 @@ static struct ppp *ppp_create_interface(
+ 
+       atomic_inc(&ppp_unit_count);
+       mutex_unlock(&pn->all_ppp_mutex);
++      rtnl_unlock();
+ 
+       *retp = 0;
+       return ppp;
diff --git a/queue-4.2/rtnetlink-catch-eopnotsupp-errors-from-ndo_bridge_getlink.patch b/queue-4.2/rtnetlink-catch-eopnotsupp-errors-from-ndo_bridge_getlink.patch

new file mode 100644 (file)

index 0000000..93322fa
--- /dev/null
+++ b/queue-4.2/rtnetlink-catch-eopnotsupp-errors-from-ndo_bridge_getlink.patch
@@ -0,0 +1,96 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Roopa Prabhu <roopa@cumulusnetworks.com>
+Date: Tue, 15 Sep 2015 14:44:29 -0700
+Subject: rtnetlink: catch -EOPNOTSUPP errors from ndo_bridge_getlink
+
+From: Roopa Prabhu <roopa@cumulusnetworks.com>
+
+[ Upstream commit d64f69b0373a7d0bcec8b5da7712977518a8f42b ]
+
+problem reported:
+       kernel 4.1.3
+       ------------
+       # bridge vlan
+       port    vlan ids
+       eth0     1 PVID Egress Untagged
+               90
+               91
+               92
+               93
+               94
+               95
+               96
+               97
+               98
+               99
+               100
+
+       vmbr0    1 PVID Egress Untagged
+               94
+
+       kernel 4.2
+       -----------
+       # bridge vlan
+       port    vlan ids
+
+ndo_bridge_getlink can return -EOPNOTSUPP when an interfaces
+ndo_bridge_getlink op is set to switchdev_port_bridge_getlink
+and CONFIG_SWITCHDEV is not defined. This today can happen to
+bond, rocker and team devices. This patch adds -EOPNOTSUPP
+checks after calls to ndo_bridge_getlink.
+
+Fixes: 85fdb956726ff2a ("switchdev: cut over to new switchdev_port_bridge_getlink")
+Reported-by: Alexandre DERUMIER <aderumier@odiso.com>
+Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |   26 ++++++++++++++++----------
+ 1 file changed, 16 insertions(+), 10 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3021,6 +3021,7 @@ static int rtnl_bridge_getlink(struct sk
+       u32 portid = NETLINK_CB(cb->skb).portid;
+       u32 seq = cb->nlh->nlmsg_seq;
+       u32 filter_mask = 0;
++      int err;
+ 
+       if (nlmsg_len(cb->nlh) > sizeof(struct ifinfomsg)) {
+               struct nlattr *extfilt;
+@@ -3041,20 +3042,25 @@ static int rtnl_bridge_getlink(struct sk
+               struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+ 
+               if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
+-                      if (idx >= cb->args[0] &&
+-                          br_dev->netdev_ops->ndo_bridge_getlink(
+-                                  skb, portid, seq, dev, filter_mask,
+-                                  NLM_F_MULTI) < 0)
+-                              break;
++                      if (idx >= cb->args[0]) {
++                              err = br_dev->netdev_ops->ndo_bridge_getlink(
++                                              skb, portid, seq, dev,
++                                              filter_mask, NLM_F_MULTI);
++                              if (err < 0 && err != -EOPNOTSUPP)
++                                      break;
++                      }
+                       idx++;
+               }
+ 
+               if (ops->ndo_bridge_getlink) {
+-                      if (idx >= cb->args[0] &&
+-                          ops->ndo_bridge_getlink(skb, portid, seq, dev,
+-                                                  filter_mask,
+-                                                  NLM_F_MULTI) < 0)
+-                              break;
++                      if (idx >= cb->args[0]) {
++                              err = ops->ndo_bridge_getlink(skb, portid,
++                                                            seq, dev,
++                                                            filter_mask,
++                                                            NLM_F_MULTI);
++                              if (err < 0 && err != -EOPNOTSUPP)
++                                      break;
++                      }
+                       idx++;
+               }
+       }
diff --git a/queue-4.2/sctp-fix-race-on-protocol-netns-initialization.patch b/queue-4.2/sctp-fix-race-on-protocol-netns-initialization.patch

new file mode 100644 (file)

index 0000000..7f9bcc0
--- /dev/null
+++ b/queue-4.2/sctp-fix-race-on-protocol-netns-initialization.patch
@@ -0,0 +1,232 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Date: Thu, 10 Sep 2015 17:31:15 -0300
+Subject: sctp: fix race on protocol/netns initialization
+
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+
+[ Upstream commit 8e2d61e0aed2b7c4ecb35844fe07e0b2b762dee4 ]
+
+Consider sctp module is unloaded and is being requested because an user
+is creating a sctp socket.
+
+During initialization, sctp will add the new protocol type and then
+initialize pernet subsys:
+
+        status = sctp_v4_protosw_init();
+        if (status)
+                goto err_protosw_init;
+
+        status = sctp_v6_protosw_init();
+        if (status)
+                goto err_v6_protosw_init;
+
+        status = register_pernet_subsys(&sctp_net_ops);
+
+The problem is that after those calls to sctp_v{4,6}_protosw_init(), it
+is possible for userspace to create SCTP sockets like if the module is
+already fully loaded. If that happens, one of the possible effects is
+that we will have readers for net->sctp.local_addr_list list earlier
+than expected and sctp_net_init() does not take precautions while
+dealing with that list, leading to a potential panic but not limited to
+that, as sctp_sock_init() will copy a bunch of blank/partially
+initialized values from net->sctp.
+
+The race happens like this:
+
+     CPU 0                           |  CPU 1
+  socket()                           |
+   __sock_create                     | socket()
+    inet_create                      |  __sock_create
+     list_for_each_entry_rcu(        |
+        answer, &inetsw[sock->type], |
+        list) {                      |   inet_create
+      /* no hits */                  |
+     if (unlikely(err)) {            |
+      ...                            |
+      request_module()               |
+      /* socket creation is blocked  |
+       * the module is fully loaded  |
+       */                            |
+       sctp_init                     |
+        sctp_v4_protosw_init         |
+         inet_register_protosw       |
+          list_add_rcu(&p->list,     |
+                       last_perm);   |
+                                     |  list_for_each_entry_rcu(
+                                     |     answer, &inetsw[sock->type],
+        sctp_v6_protosw_init         |     list) {
+                                     |     /* hit, so assumes protocol
+                                     |      * is already loaded
+                                     |      */
+                                     |  /* socket creation continues
+                                     |   * before netns is initialized
+                                     |   */
+        register_pernet_subsys       |
+
+Simply inverting the initialization order between
+register_pernet_subsys() and sctp_v4_protosw_init() is not possible
+because register_pernet_subsys() will create a control sctp socket, so
+the protocol must be already visible by then. Deferring the socket
+creation to a work-queue is not good specially because we loose the
+ability to handle its errors.
+
+So, as suggested by Vlad, the fix is to split netns initialization in
+two moments: defaults and control socket, so that the defaults are
+already loaded by when we register the protocol, while control socket
+initialization is kept at the same moment it is today.
+
+Fixes: 4db67e808640 ("sctp: Make the address lists per network namespace")
+Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
+Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/protocol.c |   64 +++++++++++++++++++++++++++++++++-------------------
+ 1 file changed, 41 insertions(+), 23 deletions(-)
+
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -1166,7 +1166,7 @@ static void sctp_v4_del_protocol(void)
+       unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+ }
+ 
+-static int __net_init sctp_net_init(struct net *net)
++static int __net_init sctp_defaults_init(struct net *net)
+ {
+       int status;
+ 
+@@ -1259,12 +1259,6 @@ static int __net_init sctp_net_init(stru
+ 
+       sctp_dbg_objcnt_init(net);
+ 
+-      /* Initialize the control inode/socket for handling OOTB packets.  */
+-      if ((status = sctp_ctl_sock_init(net))) {
+-              pr_err("Failed to initialize the SCTP control sock\n");
+-              goto err_ctl_sock_init;
+-      }
+-
+       /* Initialize the local address list. */
+       INIT_LIST_HEAD(&net->sctp.local_addr_list);
+       spin_lock_init(&net->sctp.local_addr_lock);
+@@ -1280,9 +1274,6 @@ static int __net_init sctp_net_init(stru
+ 
+       return 0;
+ 
+-err_ctl_sock_init:
+-      sctp_dbg_objcnt_exit(net);
+-      sctp_proc_exit(net);
+ err_init_proc:
+       cleanup_sctp_mibs(net);
+ err_init_mibs:
+@@ -1291,15 +1282,12 @@ err_sysctl_register:
+       return status;
+ }
+ 
+-static void __net_exit sctp_net_exit(struct net *net)
++static void __net_exit sctp_defaults_exit(struct net *net)
+ {
+       /* Free the local address list */
+       sctp_free_addr_wq(net);
+       sctp_free_local_addr_list(net);
+ 
+-      /* Free the control endpoint.  */
+-      inet_ctl_sock_destroy(net->sctp.ctl_sock);
+-
+       sctp_dbg_objcnt_exit(net);
+ 
+       sctp_proc_exit(net);
+@@ -1307,9 +1295,32 @@ static void __net_exit sctp_net_exit(str
+       sctp_sysctl_net_unregister(net);
+ }
+ 
+-static struct pernet_operations sctp_net_ops = {
+-      .init = sctp_net_init,
+-      .exit = sctp_net_exit,
++static struct pernet_operations sctp_defaults_ops = {
++      .init = sctp_defaults_init,
++      .exit = sctp_defaults_exit,
++};
++
++static int __net_init sctp_ctrlsock_init(struct net *net)
++{
++      int status;
++
++      /* Initialize the control inode/socket for handling OOTB packets.  */
++      status = sctp_ctl_sock_init(net);
++      if (status)
++              pr_err("Failed to initialize the SCTP control sock\n");
++
++      return status;
++}
++
++static void __net_init sctp_ctrlsock_exit(struct net *net)
++{
++      /* Free the control endpoint.  */
++      inet_ctl_sock_destroy(net->sctp.ctl_sock);
++}
++
++static struct pernet_operations sctp_ctrlsock_ops = {
++      .init = sctp_ctrlsock_init,
++      .exit = sctp_ctrlsock_exit,
+ };
+ 
+ /* Initialize the universe into something sensible.  */
+@@ -1442,8 +1453,11 @@ static __init int sctp_init(void)
+       sctp_v4_pf_init();
+       sctp_v6_pf_init();
+ 
+-      status = sctp_v4_protosw_init();
++      status = register_pernet_subsys(&sctp_defaults_ops);
++      if (status)
++              goto err_register_defaults;
+ 
++      status = sctp_v4_protosw_init();
+       if (status)
+               goto err_protosw_init;
+ 
+@@ -1451,9 +1465,9 @@ static __init int sctp_init(void)
+       if (status)
+               goto err_v6_protosw_init;
+ 
+-      status = register_pernet_subsys(&sctp_net_ops);
++      status = register_pernet_subsys(&sctp_ctrlsock_ops);
+       if (status)
+-              goto err_register_pernet_subsys;
++              goto err_register_ctrlsock;
+ 
+       status = sctp_v4_add_protocol();
+       if (status)
+@@ -1469,12 +1483,14 @@ out:
+ err_v6_add_protocol:
+       sctp_v4_del_protocol();
+ err_add_protocol:
+-      unregister_pernet_subsys(&sctp_net_ops);
+-err_register_pernet_subsys:
++      unregister_pernet_subsys(&sctp_ctrlsock_ops);
++err_register_ctrlsock:
+       sctp_v6_protosw_exit();
+ err_v6_protosw_init:
+       sctp_v4_protosw_exit();
+ err_protosw_init:
++      unregister_pernet_subsys(&sctp_defaults_ops);
++err_register_defaults:
+       sctp_v4_pf_exit();
+       sctp_v6_pf_exit();
+       sctp_sysctl_unregister();
+@@ -1507,12 +1523,14 @@ static __exit void sctp_exit(void)
+       sctp_v6_del_protocol();
+       sctp_v4_del_protocol();
+ 
+-      unregister_pernet_subsys(&sctp_net_ops);
++      unregister_pernet_subsys(&sctp_ctrlsock_ops);
+ 
+       /* Free protosw registrations */
+       sctp_v6_protosw_exit();
+       sctp_v4_protosw_exit();
+ 
++      unregister_pernet_subsys(&sctp_defaults_ops);
++
+       /* Unregister with socket layer. */
+       sctp_v6_pf_exit();
+       sctp_v4_pf_exit();
diff --git a/queue-4.2/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch b/queue-4.2/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch

new file mode 100644 (file)

index 0000000..ec075e4
--- /dev/null
+++ b/queue-4.2/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch
@@ -0,0 +1,45 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 2 Sep 2015 14:00:36 +0200
+Subject: sock, diag: fix panic in sock_diag_put_filterinfo
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit b382c08656000c12a146723a153b85b13a855b49 ]
+
+diag socket's sock_diag_put_filterinfo() dumps classic BPF programs
+upon request to user space (ss -0 -b). However, native eBPF programs
+attached to sockets (SO_ATTACH_BPF) cannot be dumped with this method:
+
+Their orig_prog is always NULL. However, sock_diag_put_filterinfo()
+unconditionally tries to access its filter length resp. wants to copy
+the filter insns from there. Internal cBPF to eBPF transformations
+attached to sockets don't have this issue, as orig_prog state is kept.
+
+It's currently only used by packet sockets. If we would want to add
+native eBPF support in the future, this needs to be done through
+a different attribute than PACKET_DIAG_FILTER to not confuse possible
+user space disassemblers that work on diag data.
+
+Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Acked-by: Alexei Starovoitov <ast@plumgrid.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_diag.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/core/sock_diag.c
++++ b/net/core/sock_diag.c
+@@ -90,6 +90,9 @@ int sock_diag_put_filterinfo(bool may_re
+               goto out;
+ 
+       fprog = filter->prog->orig_prog;
++      if (!fprog)
++              goto out;
++
+       flen = bpf_classic_proglen(fprog);
+ 
+       attr = nla_reserve(skb, attrtype, flen);
diff --git a/queue-4.2/tcp-add-proper-ts-val-into-rst-packets.patch b/queue-4.2/tcp-add-proper-ts-val-into-rst-packets.patch

new file mode 100644 (file)

index 0000000..325221d
--- /dev/null
+++ b/queue-4.2/tcp-add-proper-ts-val-into-rst-packets.patch
@@ -0,0 +1,65 @@
+From foo@baz Wed Sep 30 05:25:07 CEST 2015
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 23 Sep 2015 14:00:21 -0700
+Subject: tcp: add proper TS val into RST packets
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 675ee231d960af2af3606b4480324e26797eb010 ]
+
+RST packets sent on behalf of TCP connections with TS option (RFC 7323
+TCP timestamps) have incorrect TS val (set to 0), but correct TS ecr.
+
+A > B: Flags [S], seq 0, win 65535, options [mss 1000,nop,nop,TS val 100
+ecr 0], length 0
+B > A: Flags [S.], seq 2444755794, ack 1, win 28960, options [mss
+1460,nop,nop,TS val 7264344 ecr 100], length 0
+A > B: Flags [.], ack 1, win 65535, options [nop,nop,TS val 110 ecr
+7264344], length 0
+
+B > A: Flags [R.], seq 1, ack 1, win 28960, options [nop,nop,TS val 0
+ecr 110], length 0
+
+We need to call skb_mstamp_get() to get proper TS val,
+derived from skb->skb_mstamp
+
+Note that RFC 1323 was advocating to not send TS option in RST segment,
+but RFC 7323 recommends the opposite :
+
+  Once TSopt has been successfully negotiated, that is both <SYN> and
+  <SYN,ACK> contain TSopt, the TSopt MUST be sent in every non-<RST>
+  segment for the duration of the connection, and SHOULD be sent in an
+  <RST> segment (see Section 5.2 for details)
+
+Note this RFC recommends to send TS val = 0, but we believe it is
+premature : We do not know if all TCP stacks are properly
+handling the receive side :
+
+   When an <RST> segment is
+   received, it MUST NOT be subjected to the PAWS check by verifying an
+   acceptable value in SEG.TSval, and information from the Timestamps
+   option MUST NOT be used to update connection state information.
+   SEG.TSecr MAY be used to provide stricter <RST> acceptance checks.
+
+In 5 years, if/when all TCP stack are RFC 7323 ready, we might consider
+to decide to send TS val = 0, if it buys something.
+
+Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2898,6 +2898,7 @@ void tcp_send_active_reset(struct sock *
+       skb_reserve(skb, MAX_TCP_HEADER);
+       tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
+                            TCPHDR_ACK | TCPHDR_RST);
++      skb_mstamp_get(&skb->skb_mstamp);
+       /* Send it off. */
+       if (tcp_transmit_skb(sk, skb, 0, priority))
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 30 Sep 2015 03:31:55 +0000 (05:31 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 30 Sep 2015 03:31:55 +0000 (05:31 +0200)
queue-4.2/bridge-fix-igmpv3-mldv2-report-parsing.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/fib_rules-fix-fib-rule-dumps-across-multiple-skbs.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/fix-af_packet-abi-breakage-in-4.2.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/ipv6-fix-exthdrs-offload-registration-in-out_rt-path.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/ipv6-fix-multipath-route-replace-error-recovery.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/macvtap-fix-tunsetsndbuf-values-64k.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/mvneta-use-inband-status-only-when-explicitly-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-dsa-actually-force-the-speed-on-the-cpu-port.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-dsa-bcm_sf2-do-not-override-speed-settings.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-dsa-bcm_sf2-fix-64-bits-register-writes.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-dsa-bcm_sf2-fix-ageing-conditions-and-operation.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-eth-altera-fix-napi-poll_list-corruption.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-fec-clear-receive-interrupts-before-processing-a-packet.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-ipv6-correct-pim6-mrt_lock-handling.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-mlx4_core-capping-number-of-requested-msixs-to-max_msix.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-mlx4_en-really-allow-to-change-rss-key.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-mvneta-fix-dma-buffer-unmapping-in-mvneta_rx.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-phy-fixed_phy-handle-link-down-case.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/net-revert-net_sched-move-tp-root-allocation-into-fw_init.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/netlink-fix-autobind-race-condition-that-leads-to-zero-port-id.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/netlink-mmap-transform-mmap-skb-into-full-skb-on-taps.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/netlink-replace-rhash_portid-with-bound.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/of_mdio-add-new-dt-property-managed-to-specify-the-phy-management-type.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/openvswitch-zero-flows-on-allocation.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/phylib-fix-device-deletion-order-in-mdiobus_unregister.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/ppp-fix-lockdep-splat-in-ppp_dev_uninit.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/rtnetlink-catch-eopnotsupp-errors-from-ndo_bridge_getlink.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/sctp-fix-race-on-protocol-netns-initialization.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/sock-diag-fix-panic-in-sock_diag_put_filterinfo.patch	[new file with mode: 0644]	patch \| blob
queue-4.2/tcp-add-proper-ts-val-into-rst-packets.patch	[new file with mode: 0644]	patch \| blob