4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 12 Jan 2017 20:38:36 +0000 (21:38 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 12 Jan 2017 20:38:36 +0000 (21:38 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 12 Jan 2017 20:38:36 +0000 (21:38 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 12 Jan 2017 20:38:36 +0000 (21:38 +0100)
diff --git a/queue-4.9/bpf-change-back-to-orig-prog-on-too-many-passes.patch b/queue-4.9/bpf-change-back-to-orig-prog-on-too-many-passes.patch

new file mode 100644 (file)

index 0000000..d407b33
--- /dev/null
+++ b/queue-4.9/bpf-change-back-to-orig-prog-on-too-many-passes.patch
@@ -0,0 +1,34 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Sat, 7 Jan 2017 00:26:33 +0100
+Subject: bpf: change back to orig prog on too many passes
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit 9d5ecb09d525469abd1a10c096cb5a17206523f2 ]
+
+If after too many passes still no image could be emitted, then
+swap back to the original program as we do in all other cases
+and don't use the one with blinding.
+
+Fixes: 959a75791603 ("bpf, x86: add support for constant blinding")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/net/bpf_jit_comp.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -1172,6 +1172,8 @@ struct bpf_prog *bpf_int_jit_compile(str
+               set_memory_ro((unsigned long)header, header->pages);
+               prog->bpf_func = (void *)image;
+               prog->jited = 1;
++      } else {
++              prog = orig_prog;
+       }
+ 
+ out_addrs:
diff --git a/queue-4.9/drop_monitor-add-missing-call-to-genlmsg_end.patch b/queue-4.9/drop_monitor-add-missing-call-to-genlmsg_end.patch

new file mode 100644 (file)

index 0000000..f8bd5b7
--- /dev/null
+++ b/queue-4.9/drop_monitor-add-missing-call-to-genlmsg_end.patch
@@ -0,0 +1,72 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Reiter Wolfgang <wr0112358@gmail.com>
+Date: Sat, 31 Dec 2016 21:11:57 +0100
+Subject: drop_monitor: add missing call to genlmsg_end
+
+From: Reiter Wolfgang <wr0112358@gmail.com>
+
+
+[ Upstream commit 4200462d88f47f3759bdf4705f87e207b0f5b2e4 ]
+
+Update nlmsg_len field with genlmsg_end to enable userspace processing
+using nlmsg_next helper. Also adds error handling.
+
+Signed-off-by: Reiter Wolfgang <wr0112358@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/drop_monitor.c |   33 ++++++++++++++++++++++++---------
+ 1 file changed, 24 insertions(+), 9 deletions(-)
+
+--- a/net/core/drop_monitor.c
++++ b/net/core/drop_monitor.c
+@@ -80,6 +80,7 @@ static struct sk_buff *reset_per_cpu_dat
+       struct nlattr *nla;
+       struct sk_buff *skb;
+       unsigned long flags;
++      void *msg_header;
+ 
+       al = sizeof(struct net_dm_alert_msg);
+       al += dm_hit_limit * sizeof(struct net_dm_drop_point);
+@@ -87,17 +88,31 @@ static struct sk_buff *reset_per_cpu_dat
+ 
+       skb = genlmsg_new(al, GFP_KERNEL);
+ 
+-      if (skb) {
+-              genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
+-                              0, NET_DM_CMD_ALERT);
+-              nla = nla_reserve(skb, NLA_UNSPEC,
+-                                sizeof(struct net_dm_alert_msg));
+-              msg = nla_data(nla);
+-              memset(msg, 0, al);
+-      } else {
+-              mod_timer(&data->send_timer, jiffies + HZ / 10);
++      if (!skb)
++              goto err;
++
++      msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
++                               0, NET_DM_CMD_ALERT);
++      if (!msg_header) {
++              nlmsg_free(skb);
++              skb = NULL;
++              goto err;
++      }
++      nla = nla_reserve(skb, NLA_UNSPEC,
++                        sizeof(struct net_dm_alert_msg));
++      if (!nla) {
++              nlmsg_free(skb);
++              skb = NULL;
++              goto err;
+       }
++      msg = nla_data(nla);
++      memset(msg, 0, al);
++      genlmsg_end(skb, msg_header);
++      goto out;
+ 
++err:
++      mod_timer(&data->send_timer, jiffies + HZ / 10);
++out:
+       spin_lock_irqsave(&data->lock, flags);
+       swap(data->skb, skb);
+       spin_unlock_irqrestore(&data->lock, flags);
diff --git a/queue-4.9/drop_monitor-consider-inserted-data-in-genlmsg_end.patch b/queue-4.9/drop_monitor-consider-inserted-data-in-genlmsg_end.patch

new file mode 100644 (file)

index 0000000..9c48bd3
--- /dev/null
+++ b/queue-4.9/drop_monitor-consider-inserted-data-in-genlmsg_end.patch
@@ -0,0 +1,48 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Reiter Wolfgang <wr0112358@gmail.com>
+Date: Tue, 3 Jan 2017 01:39:10 +0100
+Subject: drop_monitor: consider inserted data in genlmsg_end
+
+From: Reiter Wolfgang <wr0112358@gmail.com>
+
+
+[ Upstream commit 3b48ab2248e61408910e792fe84d6ec466084c1a ]
+
+Final nlmsg_len field update must reflect inserted net_dm_drop_point
+data.
+
+This patch depends on previous patch:
+"drop_monitor: add missing call to genlmsg_end"
+
+Signed-off-by: Reiter Wolfgang <wr0112358@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/drop_monitor.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/net/core/drop_monitor.c
++++ b/net/core/drop_monitor.c
+@@ -107,7 +107,6 @@ static struct sk_buff *reset_per_cpu_dat
+       }
+       msg = nla_data(nla);
+       memset(msg, 0, al);
+-      genlmsg_end(skb, msg_header);
+       goto out;
+ 
+ err:
+@@ -117,6 +116,13 @@ out:
+       swap(data->skb, skb);
+       spin_unlock_irqrestore(&data->lock, flags);
+ 
++      if (skb) {
++              struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
++              struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh);
++
++              genlmsg_end(skb, genlmsg_data(gnlh));
++      }
++
+       return skb;
+ }
+ 
diff --git a/queue-4.9/flow_dissector-update-pptp-handling-to-avoid-null-pointer-deref.patch b/queue-4.9/flow_dissector-update-pptp-handling-to-avoid-null-pointer-deref.patch

new file mode 100644 (file)

index 0000000..043df4e
--- /dev/null
+++ b/queue-4.9/flow_dissector-update-pptp-handling-to-avoid-null-pointer-deref.patch
@@ -0,0 +1,82 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Ian Kumlien <ian.kumlien@gmail.com>
+Date: Mon, 2 Jan 2017 09:18:35 +0100
+Subject: flow_dissector: Update pptp handling to avoid null pointer deref.
+
+From: Ian Kumlien <ian.kumlien@gmail.com>
+
+
+[ Upstream commit d0af683407a26a4437d8fa6e283ea201f2ae8146 ]
+
+__skb_flow_dissect can be called with a skb or a data packet, either
+can be NULL. All calls seems to have been moved to __skb_header_pointer
+except the pptp handling which is still calling skb_header_pointer.
+
+skb_header_pointer will use skb->data and thus:
+[  109.556866] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080
+[  109.557102] IP: [<ffffffff88dc02f8>] __skb_flow_dissect+0xa88/0xce0
+[  109.557263] PGD 0
+[  109.557338]
+[  109.557484] Oops: 0000 [#1] SMP
+[  109.557562] Modules linked in: chaoskey
+[  109.557783] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.9.0 #79
+[  109.557867] Hardware name: Supermicro A1SRM-LN7F/LN5F/A1SRM-LN7F-2758, BIOS 1.0c 11/04/2015
+[  109.557957] task: ffff94085c27bc00 task.stack: ffffb745c0068000
+[  109.558041] RIP: 0010:[<ffffffff88dc02f8>]  [<ffffffff88dc02f8>] __skb_flow_dissect+0xa88/0xce0
+[  109.558203] RSP: 0018:ffff94087fc83d40  EFLAGS: 00010206
+[  109.558286] RAX: 0000000000000130 RBX: ffffffff8975bf80 RCX: ffff94084fab6800
+[  109.558373] RDX: 0000000000000010 RSI: 000000000000000c RDI: 0000000000000000
+[  109.558460] RBP: 0000000000000b88 R08: 0000000000000000 R09: 0000000000000022
+[  109.558547] R10: 0000000000000008 R11: ffff94087fc83e04 R12: 0000000000000000
+[  109.558763] R13: ffff94084fab6800 R14: ffff94087fc83e04 R15: 000000000000002f
+[  109.558979] FS:  0000000000000000(0000) GS:ffff94087fc80000(0000) knlGS:0000000000000000
+[  109.559326] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  109.559539] CR2: 0000000000000080 CR3: 0000000281809000 CR4: 00000000001026e0
+[  109.559753] Stack:
+[  109.559957]  000000000000000c ffff94084fab6822 0000000000000001 ffff94085c2b5fc0
+[  109.560578]  0000000000000001 0000000000002000 0000000000000000 0000000000000000
+[  109.561200]  0000000000000000 0000000000000000 0000000000000000 0000000000000000
+[  109.561820] Call Trace:
+[  109.562027]  <IRQ>
+[  109.562108]  [<ffffffff88dfb4fa>] ? eth_get_headlen+0x7a/0xf0
+[  109.562522]  [<ffffffff88c5a35a>] ? igb_poll+0x96a/0xe80
+[  109.562737]  [<ffffffff88dc912b>] ? net_rx_action+0x20b/0x350
+[  109.562953]  [<ffffffff88546d68>] ? __do_softirq+0xe8/0x280
+[  109.563169]  [<ffffffff8854704a>] ? irq_exit+0xaa/0xb0
+[  109.563382]  [<ffffffff8847229b>] ? do_IRQ+0x4b/0xc0
+[  109.563597]  [<ffffffff8902d4ff>] ? common_interrupt+0x7f/0x7f
+[  109.563810]  <EOI>
+[  109.563890]  [<ffffffff88d57530>] ? cpuidle_enter_state+0x130/0x2c0
+[  109.564304]  [<ffffffff88d57520>] ? cpuidle_enter_state+0x120/0x2c0
+[  109.564520]  [<ffffffff8857eacf>] ? cpu_startup_entry+0x19f/0x1f0
+[  109.564737]  [<ffffffff8848d55a>] ? start_secondary+0x12a/0x140
+[  109.564950] Code: 83 e2 20 a8 80 0f 84 60 01 00 00 c7 04 24 08 00
+00 00 66 85 d2 0f 84 be fe ff ff e9 69 fe ff ff 8b 34 24 89 f2 83 c2
+04 66 85 c0 <41> 8b 84 24 80 00 00 00 0f 49 d6 41 8d 31 01 d6 41 2b 84
+24 84
+[  109.569959] RIP  [<ffffffff88dc02f8>] __skb_flow_dissect+0xa88/0xce0
+[  109.570245]  RSP <ffff94087fc83d40>
+[  109.570453] CR2: 0000000000000080
+
+Fixes: ab10dccb1160 ("rps: Inspect PPTP encapsulated by GRE to get flow hash")
+Signed-off-by: Ian Kumlien <ian.kumlien@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/flow_dissector.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -445,8 +445,9 @@ ip_proto_again:
+                       if (hdr->flags & GRE_ACK)
+                               offset += sizeof(((struct pptp_gre_header *)0)->ack);
+ 
+-                      ppp_hdr = skb_header_pointer(skb, nhoff + offset,
+-                                                   sizeof(_ppp_hdr), _ppp_hdr);
++                      ppp_hdr = __skb_header_pointer(skb, nhoff + offset,
++                                                   sizeof(_ppp_hdr),
++                                                   data, hlen, _ppp_hdr);
+                       if (!ppp_hdr)
+                               goto out_bad;
+ 
diff --git a/queue-4.9/gro-disable-frag0-optimization-on-ipv6-ext-headers.patch b/queue-4.9/gro-disable-frag0-optimization-on-ipv6-ext-headers.patch

new file mode 100644 (file)

index 0000000..588a116
--- /dev/null
+++ b/queue-4.9/gro-disable-frag0-optimization-on-ipv6-ext-headers.patch
@@ -0,0 +1,67 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 10 Jan 2017 12:24:15 -0800
+Subject: gro: Disable frag0 optimization on IPv6 ext headers
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+
+[ Upstream commit 57ea52a865144aedbcd619ee0081155e658b6f7d ]
+
+The GRO fast path caches the frag0 address.  This address becomes
+invalid if frag0 is modified by pskb_may_pull or its variants.
+So whenever that happens we must disable the frag0 optimization.
+
+This is usually done through the combination of gro_header_hard
+and gro_header_slow, however, the IPv6 extension header path did
+the pulling directly and would continue to use the GRO fast path
+incorrectly.
+
+This patch fixes it by disabling the fast path when we enter the
+IPv6 extension header path.
+
+Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address")
+Reported-by: Slava Shwartsman <slavash@mellanox.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h |    9 +++++++--
+ net/ipv6/ip6_offload.c    |    1 +
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2502,14 +2502,19 @@ static inline int skb_gro_header_hard(st
+       return NAPI_GRO_CB(skb)->frag0_len < hlen;
+ }
+ 
++static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
++{
++      NAPI_GRO_CB(skb)->frag0 = NULL;
++      NAPI_GRO_CB(skb)->frag0_len = 0;
++}
++
+ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
+                                       unsigned int offset)
+ {
+       if (!pskb_may_pull(skb, hlen))
+               return NULL;
+ 
+-      NAPI_GRO_CB(skb)->frag0 = NULL;
+-      NAPI_GRO_CB(skb)->frag0_len = 0;
++      skb_gro_frag0_invalidate(skb);
+       return skb->data + offset;
+ }
+ 
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -191,6 +191,7 @@ static struct sk_buff **ipv6_gro_receive
+       ops = rcu_dereference(inet6_offloads[proto]);
+       if (!ops || !ops->callbacks.gro_receive) {
+               __pskb_pull(skb, skb_gro_offset(skb));
++              skb_gro_frag0_invalidate(skb);
+               proto = ipv6_gso_pull_exthdrs(skb, proto);
+               skb_gro_pull(skb, -skb_transport_offset(skb));
+               skb_reset_transport_header(skb);
diff --git a/queue-4.9/gro-enter-slow-path-if-there-is-no-tailroom.patch b/queue-4.9/gro-enter-slow-path-if-there-is-no-tailroom.patch

new file mode 100644 (file)

index 0000000..5891b4f
--- /dev/null
+++ b/queue-4.9/gro-enter-slow-path-if-there-is-no-tailroom.patch
@@ -0,0 +1,39 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 10 Jan 2017 12:24:01 -0800
+Subject: gro: Enter slow-path if there is no tailroom
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+
+[ Upstream commit 1272ce87fa017ca4cf32920764d879656b7a005a ]
+
+The GRO path has a fast-path where we avoid calling pskb_may_pull
+and pskb_expand by directly accessing frag0.  However, this should
+only be done if we have enough tailroom in the skb as otherwise
+we'll have to expand it later anyway.
+
+This patch adds the check by capping frag0_len with the skb tailroom.
+
+Fixes: cb18978cbf45 ("gro: Open-code final pskb_may_pull")
+Reported-by: Slava Shwartsman <slavash@mellanox.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4453,7 +4453,8 @@ static void skb_gro_reset_offset(struct
+           pinfo->nr_frags &&
+           !PageHighMem(skb_frag_page(frag0))) {
+               NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
+-              NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
++              NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0),
++                                                skb->end - skb->tail);
+       }
+ }
+ 
diff --git a/queue-4.9/gro-use-min_t-in-skb_gro_reset_offset.patch b/queue-4.9/gro-use-min_t-in-skb_gro_reset_offset.patch

new file mode 100644 (file)

index 0000000..af25dbb
--- /dev/null
+++ b/queue-4.9/gro-use-min_t-in-skb_gro_reset_offset.patch
@@ -0,0 +1,36 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 10 Jan 2017 19:52:43 -0800
+Subject: gro: use min_t() in skb_gro_reset_offset()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 7cfd5fd5a9813f1430290d20c0fead9b4582a307 ]
+
+On 32bit arches, (skb->end - skb->data) is not 'unsigned int',
+so we shall use min_t() instead of min() to avoid a compiler error.
+
+Fixes: 1272ce87fa01 ("gro: Enter slow-path if there is no tailroom")
+Reported-by: kernel test robot <fengguang.wu@intel.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4453,8 +4453,9 @@ static void skb_gro_reset_offset(struct
+           pinfo->nr_frags &&
+           !PageHighMem(skb_frag_page(frag0))) {
+               NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
+-              NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0),
+-                                                skb->end - skb->tail);
++              NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
++                                                  skb_frag_size(frag0),
++                                                  skb->end - skb->tail);
+       }
+ }
+ 
diff --git a/queue-4.9/igmp-make-igmp-group-member-rfc-3376-compliant.patch b/queue-4.9/igmp-make-igmp-group-member-rfc-3376-compliant.patch

new file mode 100644 (file)

index 0000000..4c1dbf5
--- /dev/null
+++ b/queue-4.9/igmp-make-igmp-group-member-rfc-3376-compliant.patch
@@ -0,0 +1,88 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Michal Tesar <mtesar@redhat.com>
+Date: Mon, 2 Jan 2017 14:38:36 +0100
+Subject: igmp: Make igmp group member RFC 3376 compliant
+
+From: Michal Tesar <mtesar@redhat.com>
+
+
+[ Upstream commit 7ababb782690e03b78657e27bd051e20163af2d6 ]
+
+5.2. Action on Reception of a Query
+
+ When a system receives a Query, it does not respond immediately.
+ Instead, it delays its response by a random amount of time, bounded
+ by the Max Resp Time value derived from the Max Resp Code in the
+ received Query message.  A system may receive a variety of Queries on
+ different interfaces and of different kinds (e.g., General Queries,
+ Group-Specific Queries, and Group-and-Source-Specific Queries), each
+ of which may require its own delayed response.
+
+ Before scheduling a response to a Query, the system must first
+ consider previously scheduled pending responses and in many cases
+ schedule a combined response.  Therefore, the system must be able to
+ maintain the following state:
+
+ o A timer per interface for scheduling responses to General Queries.
+
+ o A per-group and interface timer for scheduling responses to Group-
+   Specific and Group-and-Source-Specific Queries.
+
+ o A per-group and interface list of sources to be reported in the
+   response to a Group-and-Source-Specific Query.
+
+ When a new Query with the Router-Alert option arrives on an
+ interface, provided the system has state to report, a delay for a
+ response is randomly selected in the range (0, [Max Resp Time]) where
+ Max Resp Time is derived from Max Resp Code in the received Query
+ message.  The following rules are then used to determine if a Report
+ needs to be scheduled and the type of Report to schedule.  The rules
+ are considered in order and only the first matching rule is applied.
+
+ 1. If there is a pending response to a previous General Query
+    scheduled sooner than the selected delay, no additional response
+    needs to be scheduled.
+
+ 2. If the received Query is a General Query, the interface timer is
+    used to schedule a response to the General Query after the
+    selected delay.  Any previously pending response to a General
+    Query is canceled.
+--8<--
+
+Currently the timer is rearmed with new random expiration time for
+every incoming query regardless of possibly already pending report.
+Which is not aligned with the above RFE.
+It also might happen that higher rate of incoming queries can
+postpone the report after the expiration time of the first query
+causing group membership loss.
+
+Now the per interface general query timer is rearmed only
+when there is no pending report already scheduled on that interface or
+the newly selected expiration time is before the already pending
+scheduled report.
+
+Signed-off-by: Michal Tesar <mtesar@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/igmp.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -219,9 +219,14 @@ static void igmp_start_timer(struct ip_m
+ static void igmp_gq_start_timer(struct in_device *in_dev)
+ {
+       int tv = prandom_u32() % in_dev->mr_maxdelay;
++      unsigned long exp = jiffies + tv + 2;
++
++      if (in_dev->mr_gq_running &&
++          time_after_eq(exp, (in_dev->mr_gq_timer).expires))
++              return;
+ 
+       in_dev->mr_gq_running = 1;
+-      if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2))
++      if (!mod_timer(&in_dev->mr_gq_timer, exp))
+               in_dev_hold(in_dev);
+ }
+ 
diff --git a/queue-4.9/inet-fix-ip-v6-_recvorigdstaddr-for-udp-sockets.patch b/queue-4.9/inet-fix-ip-v6-_recvorigdstaddr-for-udp-sockets.patch

new file mode 100644 (file)

index 0000000..a85d747
--- /dev/null
+++ b/queue-4.9/inet-fix-ip-v6-_recvorigdstaddr-for-udp-sockets.patch
@@ -0,0 +1,46 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Thu, 22 Dec 2016 18:19:16 -0500
+Subject: inet: fix IP(V6)_RECVORIGDSTADDR for udp sockets
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 39b2dd765e0711e1efd1d1df089473a8dd93ad48 ]
+
+Socket cmsg IP(V6)_RECVORIGDSTADDR checks that port range lies within
+the packet. For sockets that have transport headers pulled, transport
+offset can be negative. Use signed comparison to avoid overflow.
+
+Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing")
+Reported-by: Nisar Jagabar <njagabar@cloudmark.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_sockglue.c |    2 +-
+ net/ipv6/datagram.c    |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -137,7 +137,7 @@ static void ip_cmsg_recv_dstaddr(struct
+       const struct iphdr *iph = ip_hdr(skb);
+       __be16 *ports = (__be16 *)skb_transport_header(skb);
+ 
+-      if (skb_transport_offset(skb) + 4 > skb->len)
++      if (skb_transport_offset(skb) + 4 > (int)skb->len)
+               return;
+ 
+       /* All current transport protocols have the port numbers in the
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -700,7 +700,7 @@ void ip6_datagram_recv_specific_ctl(stru
+               struct sockaddr_in6 sin6;
+               __be16 *ports = (__be16 *) skb_transport_header(skb);
+ 
+-              if (skb_transport_offset(skb) + 4 <= skb->len) {
++              if (skb_transport_offset(skb) + 4 <= (int)skb->len) {
+                       /* All current transport protocols have the port numbers in the
+                        * first four bytes of the transport header and this function is
+                        * written with this assumption in mind.
diff --git a/queue-4.9/ipv4-do-not-allow-main-to-be-alias-for-new-local-w-custom-rules.patch b/queue-4.9/ipv4-do-not-allow-main-to-be-alias-for-new-local-w-custom-rules.patch

new file mode 100644 (file)

index 0000000..7a89cd3
--- /dev/null
+++ b/queue-4.9/ipv4-do-not-allow-main-to-be-alias-for-new-local-w-custom-rules.patch
@@ -0,0 +1,35 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+Date: Mon, 2 Jan 2017 13:32:54 -0800
+Subject: ipv4: Do not allow MAIN to be alias for new LOCAL w/ custom rules
+
+From: Alexander Duyck <alexander.h.duyck@intel.com>
+
+
+[ Upstream commit 5350d54f6cd12eaff623e890744c79b700bd3f17 ]
+
+In the case of custom rules being present we need to handle the case of the
+LOCAL table being intialized after the new rule has been added.  To address
+that I am adding a new check so that we can make certain we don't use an
+alias of MAIN for LOCAL when allocating a new table.
+
+Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse")
+Reported-by: Oliver Brunel <jjk@jjacky.com>
+Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -85,7 +85,7 @@ struct fib_table *fib_new_table(struct n
+       if (tb)
+               return tb;
+ 
+-      if (id == RT_TABLE_LOCAL)
++      if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules)
+               alias = fib_new_table(net, RT_TABLE_MAIN);
+ 
+       tb = fib_trie_table(id, alias);
diff --git a/queue-4.9/ipv6-handle-efault-from-skb_copy_bits.patch b/queue-4.9/ipv6-handle-efault-from-skb_copy_bits.patch

new file mode 100644 (file)

index 0000000..cdd30a2
--- /dev/null
+++ b/queue-4.9/ipv6-handle-efault-from-skb_copy_bits.patch
@@ -0,0 +1,82 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Dave Jones <davej@codemonkey.org.uk>
+Date: Thu, 22 Dec 2016 11:16:22 -0500
+Subject: ipv6: handle -EFAULT from skb_copy_bits
+
+From: Dave Jones <davej@codemonkey.org.uk>
+
+
+[ Upstream commit a98f91758995cb59611e61318dddd8a6956b52c3 ]
+
+By setting certain socket options on ipv6 raw sockets, we can confuse the
+length calculation in rawv6_push_pending_frames triggering a BUG_ON.
+
+RIP: 0010:[<ffffffff817c6390>] [<ffffffff817c6390>] rawv6_sendmsg+0xc30/0xc40
+RSP: 0018:ffff881f6c4a7c18  EFLAGS: 00010282
+RAX: 00000000fffffff2 RBX: ffff881f6c681680 RCX: 0000000000000002
+RDX: ffff881f6c4a7cf8 RSI: 0000000000000030 RDI: ffff881fed0f6a00
+RBP: ffff881f6c4a7da8 R08: 0000000000000000 R09: 0000000000000009
+R10: ffff881fed0f6a00 R11: 0000000000000009 R12: 0000000000000030
+R13: ffff881fed0f6a00 R14: ffff881fee39ba00 R15: ffff881fefa93a80
+
+Call Trace:
+ [<ffffffff8118ba23>] ? unmap_page_range+0x693/0x830
+ [<ffffffff81772697>] inet_sendmsg+0x67/0xa0
+ [<ffffffff816d93f8>] sock_sendmsg+0x38/0x50
+ [<ffffffff816d982f>] SYSC_sendto+0xef/0x170
+ [<ffffffff816da27e>] SyS_sendto+0xe/0x10
+ [<ffffffff81002910>] do_syscall_64+0x50/0xa0
+ [<ffffffff817f7cbc>] entry_SYSCALL64_slow_path+0x25/0x25
+
+Handle by jumping to the failure path if skb_copy_bits gets an EFAULT.
+
+Reproducer:
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#define LEN 504
+
+int main(int argc, char* argv[])
+{
+       int fd;
+       int zero = 0;
+       char buf[LEN];
+
+       memset(buf, 0, LEN);
+
+       fd = socket(AF_INET6, SOCK_RAW, 7);
+
+       setsockopt(fd, SOL_IPV6, IPV6_CHECKSUM, &zero, 4);
+       setsockopt(fd, SOL_IPV6, IPV6_DSTOPTS, &buf, LEN);
+
+       sendto(fd, buf, 1, 0, (struct sockaddr *) buf, 110);
+}
+
+Signed-off-by: Dave Jones <davej@codemonkey.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/raw.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -589,7 +589,11 @@ static int rawv6_push_pending_frames(str
+       }
+ 
+       offset += skb_transport_offset(skb);
+-      BUG_ON(skb_copy_bits(skb, offset, &csum, 2));
++      err = skb_copy_bits(skb, offset, &csum, 2);
++      if (err < 0) {
++              ip6_flush_pending_frames(sk);
++              goto out;
++      }
+ 
+       /* in case cksum was not initialized */
+       if (unlikely(csum))
diff --git a/queue-4.9/net-add-the-af_qipcrtr-entries-to-family-name-tables.patch b/queue-4.9/net-add-the-af_qipcrtr-entries-to-family-name-tables.patch

new file mode 100644 (file)

index 0000000..1f55ce8
--- /dev/null
+++ b/queue-4.9/net-add-the-af_qipcrtr-entries-to-family-name-tables.patch
@@ -0,0 +1,53 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: "Anna, Suman" <s-anna@ti.com>
+Date: Mon, 9 Jan 2017 21:48:56 -0600
+Subject: net: add the AF_QIPCRTR entries to family name tables
+
+From: "Anna, Suman" <s-anna@ti.com>
+
+
+[ Upstream commit 5d722b3024f6762addb8642ffddc9f275b5107ae ]
+
+Commit bdabad3e363d ("net: Add Qualcomm IPC router") introduced a
+new address family. Update the family name tables accordingly so
+that the lockdep initialization can use the proper names for this
+family.
+
+Cc: Courtney Cavin <courtney.cavin@sonymobile.com>
+Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
+Signed-off-by: Suman Anna <s-anna@ti.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -222,7 +222,7 @@ static const char *const af_family_key_s
+   "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
+   "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
+   "sk_lock-AF_NFC"   , "sk_lock-AF_VSOCK"    , "sk_lock-AF_KCM"      ,
+-  "sk_lock-AF_MAX"
++  "sk_lock-AF_QIPCRTR", "sk_lock-AF_MAX"
+ };
+ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
+   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
+@@ -239,7 +239,7 @@ static const char *const af_family_slock
+   "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
+   "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
+   "slock-AF_NFC"   , "slock-AF_VSOCK"    ,"slock-AF_KCM"       ,
+-  "slock-AF_MAX"
++  "slock-AF_QIPCRTR", "slock-AF_MAX"
+ };
+ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
+   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
+@@ -256,7 +256,7 @@ static const char *const af_family_clock
+   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
+   "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
+   "clock-AF_NFC"   , "clock-AF_VSOCK"    , "clock-AF_KCM"      ,
+-  "clock-AF_MAX"
++  "clock-AF_QIPCRTR", "clock-AF_MAX"
+ };
+ 
+ /*
diff --git a/queue-4.9/net-dsa-bcm_sf2-do-not-clobber-b53_switch_ops.patch b/queue-4.9/net-dsa-bcm_sf2-do-not-clobber-b53_switch_ops.patch

new file mode 100644 (file)

index 0000000..3ee87c5
--- /dev/null
+++ b/queue-4.9/net-dsa-bcm_sf2-do-not-clobber-b53_switch_ops.patch
@@ -0,0 +1,55 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Sat, 7 Jan 2017 21:01:56 -0800
+Subject: net: dsa: bcm_sf2: Do not clobber b53_switch_ops
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit a4c61b92b3a4cbda35bb0251a5063a68f0861b2c ]
+
+We make the bcm_sf2 driver override ds->ops which points to
+b53_switch_ops since b53_switch_alloc() did the assignent. This is all
+well and good until a second b53 switch comes in, and ends up using the
+bcm_sf2 operations. Make a proper local copy, substitute the ds->ops
+pointer and then override the operations.
+
+Fixes: f458995b9ad8 ("net: dsa: bcm_sf2: Utilize core B53 driver when possible")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -982,6 +982,7 @@ static int bcm_sf2_sw_probe(struct platf
+       const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME;
+       struct device_node *dn = pdev->dev.of_node;
+       struct b53_platform_data *pdata;
++      struct dsa_switch_ops *ops;
+       struct bcm_sf2_priv *priv;
+       struct b53_device *dev;
+       struct dsa_switch *ds;
+@@ -995,6 +996,10 @@ static int bcm_sf2_sw_probe(struct platf
+       if (!priv)
+               return -ENOMEM;
+ 
++      ops = devm_kzalloc(&pdev->dev, sizeof(*ops), GFP_KERNEL);
++      if (!ops)
++              return -ENOMEM;
++
+       dev = b53_switch_alloc(&pdev->dev, &bcm_sf2_io_ops, priv);
+       if (!dev)
+               return -ENOMEM;
+@@ -1014,6 +1019,8 @@ static int bcm_sf2_sw_probe(struct platf
+       ds = dev->ds;
+ 
+       /* Override the parts that are non-standard wrt. normal b53 devices */
++      memcpy(ops, ds->ops, sizeof(*ops));
++      ds->ops = ops;
+       ds->ops->get_tag_protocol = bcm_sf2_sw_get_tag_protocol;
+       ds->ops->setup = bcm_sf2_sw_setup;
+       ds->ops->get_phy_flags = bcm_sf2_sw_get_phy_flags;
diff --git a/queue-4.9/net-dsa-bcm_sf2-utilize-nested-mdio-read-write.patch b/queue-4.9/net-dsa-bcm_sf2-utilize-nested-mdio-read-write.patch

new file mode 100644 (file)

index 0000000..7514fde
--- /dev/null
+++ b/queue-4.9/net-dsa-bcm_sf2-utilize-nested-mdio-read-write.patch
@@ -0,0 +1,42 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Sat, 7 Jan 2017 21:01:57 -0800
+Subject: net: dsa: bcm_sf2: Utilize nested MDIO read/write
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 2cfe8f8290bd28cf1ee67db914a6e76cf8e6437b ]
+
+We are implementing a MDIO bus which is behind another one, so use the
+nested version of the accessors to get lockdep annotations correct.
+
+Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -393,7 +393,7 @@ static int bcm_sf2_sw_mdio_read(struct m
+       if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr))
+               return bcm_sf2_sw_indir_rw(priv, 1, addr, regnum, 0);
+       else
+-              return mdiobus_read(priv->master_mii_bus, addr, regnum);
++              return mdiobus_read_nested(priv->master_mii_bus, addr, regnum);
+ }
+ 
+ static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum,
+@@ -407,7 +407,7 @@ static int bcm_sf2_sw_mdio_write(struct
+       if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr))
+               bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val);
+       else
+-              mdiobus_write(priv->master_mii_bus, addr, regnum, val);
++              mdiobus_write_nested(priv->master_mii_bus, addr, regnum, val);
+ 
+       return 0;
+ }
diff --git a/queue-4.9/net-dsa-ensure-validity-of-dst-ds.patch b/queue-4.9/net-dsa-ensure-validity-of-dst-ds.patch

new file mode 100644 (file)

index 0000000..96fff44
--- /dev/null
+++ b/queue-4.9/net-dsa-ensure-validity-of-dst-ds.patch
@@ -0,0 +1,51 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Mon, 9 Jan 2017 11:58:34 -0800
+Subject: net: dsa: Ensure validity of dst->ds[0]
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit faf3a932fbeb77860226a8323eacb835edc98648 ]
+
+It is perfectly possible to have non zero indexed switches being present
+in a DSA switch tree, in such a case, we will be deferencing a NULL
+pointer while dsa_cpu_port_ethtool_{setup,restore}. Be more defensive
+and ensure that dst->ds[0] is valid before doing anything with it.
+
+Fixes: 0c73c523cf73 ("net: dsa: Initialize CPU port ethtool ops per tree")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/dsa2.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/dsa/dsa2.c
++++ b/net/dsa/dsa2.c
+@@ -394,9 +394,11 @@ static int dsa_dst_apply(struct dsa_swit
+                       return err;
+       }
+ 
+-      err = dsa_cpu_port_ethtool_setup(dst->ds[0]);
+-      if (err)
+-              return err;
++      if (dst->ds[0]) {
++              err = dsa_cpu_port_ethtool_setup(dst->ds[0]);
++              if (err)
++                      return err;
++      }
+ 
+       /* If we use a tagging format that doesn't have an ethertype
+        * field, make sure that all packets from this point on get
+@@ -433,7 +435,8 @@ static void dsa_dst_unapply(struct dsa_s
+               dsa_ds_unapply(dst, ds);
+       }
+ 
+-      dsa_cpu_port_ethtool_restore(dst->ds[0]);
++      if (dst->ds[0])
++              dsa_cpu_port_ethtool_restore(dst->ds[0]);
+ 
+       pr_info("DSA: tree %d unapplied\n", dst->tree);
+       dst->applied = false;
diff --git a/queue-4.9/net-fix-incorrect-original-ingress-device-index-in-pktinfo.patch b/queue-4.9/net-fix-incorrect-original-ingress-device-index-in-pktinfo.patch

new file mode 100644 (file)

index 0000000..36978fb
--- /dev/null
+++ b/queue-4.9/net-fix-incorrect-original-ingress-device-index-in-pktinfo.patch
@@ -0,0 +1,47 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Wei Zhang <asuka.com@163.com>
+Date: Thu, 29 Dec 2016 16:45:04 +0800
+Subject: net: fix incorrect original ingress device index in PKTINFO
+
+From: Wei Zhang <asuka.com@163.com>
+
+
+[ Upstream commit f0c16ba8933ed217c2688b277410b2a37ba81591 ]
+
+When we send a packet for our own local address on a non-loopback
+interface (e.g. eth0), due to the change had been introduced from
+commit 0b922b7a829c ("net: original ingress device index in PKTINFO"), the
+original ingress device index would be set as the loopback interface.
+However, the packet should be considered as if it is being arrived via the
+sending interface (eth0), otherwise it would break the expectation of the
+userspace application (e.g. the DHCPRELEASE message from dhcp_release
+binary would be ignored by the dnsmasq daemon, since it come from lo which
+is not the interface dnsmasq bind to)
+
+Fixes: 0b922b7a829c ("net: original ingress device index in PKTINFO")
+Acked-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: Wei Zhang <asuka.com@163.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_sockglue.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -1202,8 +1202,14 @@ void ipv4_pktinfo_prepare(const struct s
+                * which has interface index (iif) as the first member of the
+                * underlying inet{6}_skb_parm struct. This code then overlays
+                * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
+-               * element so the iif is picked up from the prior IPCB
++               * element so the iif is picked up from the prior IPCB. If iif
++               * is the loopback interface, then return the sending interface
++               * (e.g., process binds socket to eth0 for Tx which is
++               * redirected to loopback in the rtable/dst).
+                */
++              if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
++                      pktinfo->ipi_ifindex = inet_iif(skb);
++
+               pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
+       } else {
+               pktinfo->ipi_ifindex = 0;
diff --git a/queue-4.9/net-ipv4-dst-for-local-input-routes-should-use-l3mdev-if-relevant.patch b/queue-4.9/net-ipv4-dst-for-local-input-routes-should-use-l3mdev-if-relevant.patch

new file mode 100644 (file)

index 0000000..d3acb97
--- /dev/null
+++ b/queue-4.9/net-ipv4-dst-for-local-input-routes-should-use-l3mdev-if-relevant.patch
@@ -0,0 +1,39 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Thu, 29 Dec 2016 15:29:03 -0800
+Subject: net: ipv4: dst for local input routes should use l3mdev if relevant
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit f5a0aab84b74de68523599817569c057c7ac1622 ]
+
+IPv4 output routes already use l3mdev device instead of loopback for dst's
+if it is applicable. Change local input routes to do the same.
+
+This fixes icmp responses for unreachable UDP ports which are directed
+to the wrong table after commit 9d1a6c4ea43e4 because local_input
+routes use the loopback device. Moving from ingress device to loopback
+loses the L3 domain causing responses based on the dst to get to lost.
+
+Fixes: 9d1a6c4ea43e4 ("net: icmp_route_lookup should use rt dev to
+                      determine L3 domain")
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1902,7 +1902,8 @@ local_input:
+               }
+       }
+ 
+-      rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
++      rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
++                         flags | RTCF_LOCAL, res.type,
+                          IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
+       if (!rth)
+               goto e_nobufs;
diff --git a/queue-4.9/net-ipv4-fix-multipath-selection-with-vrf.patch b/queue-4.9/net-ipv4-fix-multipath-selection-with-vrf.patch

new file mode 100644 (file)

index 0000000..21fd1ce
--- /dev/null
+++ b/queue-4.9/net-ipv4-fix-multipath-selection-with-vrf.patch
@@ -0,0 +1,65 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Tue, 10 Jan 2017 14:37:35 -0800
+Subject: net: ipv4: Fix multipath selection with vrf
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit 7a18c5b9fb31a999afc62b0e60978aa896fc89e9 ]
+
+fib_select_path does not call fib_select_multipath if oif is set in the
+flow struct. For VRF use cases oif is always set, so multipath route
+selection is bypassed. Use the FLOWI_FLAG_SKIP_NH_OIF to skip the oif
+check similar to what is done in fib_table_lookup.
+
+Add saddr and proto to the flow struct for the fib lookup done by the
+VRF driver to better match hash computation for a flow.
+
+Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX")
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vrf.c        |    2 ++
+ net/ipv4/fib_semantics.c |    9 +++++++--
+ 2 files changed, 9 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -263,7 +263,9 @@ static netdev_tx_t vrf_process_v4_outbou
+               .flowi4_iif = LOOPBACK_IFINDEX,
+               .flowi4_tos = RT_TOS(ip4h->tos),
+               .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF,
++              .flowi4_proto = ip4h->protocol,
+               .daddr = ip4h->daddr,
++              .saddr = ip4h->saddr,
+       };
+       struct net *net = dev_net(vrf_dev);
+       struct rtable *rt;
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -1617,8 +1617,13 @@ void fib_select_multipath(struct fib_res
+ void fib_select_path(struct net *net, struct fib_result *res,
+                    struct flowi4 *fl4, int mp_hash)
+ {
++      bool oif_check;
++
++      oif_check = (fl4->flowi4_oif == 0 ||
++                   fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
++
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+-      if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
++      if (res->fi->fib_nhs > 1 && oif_check) {
+               if (mp_hash < 0)
+                       mp_hash = get_hash_from_flowi4(fl4) >> 1;
+ 
+@@ -1628,7 +1633,7 @@ void fib_select_path(struct net *net, st
+ #endif
+       if (!res->prefixlen &&
+           res->table->tb_num_default > 1 &&
+-          res->type == RTN_UNICAST && !fl4->flowi4_oif)
++          res->type == RTN_UNICAST && oif_check)
+               fib_select_default(fl4, res);
+ 
+       if (!fl4->saddr)
diff --git a/queue-4.9/net-mlx5-avoid-shadowing-numa_node.patch b/queue-4.9/net-mlx5-avoid-shadowing-numa_node.patch

new file mode 100644 (file)

index 0000000..2c22e26
--- /dev/null
+++ b/queue-4.9/net-mlx5-avoid-shadowing-numa_node.patch
@@ -0,0 +1,41 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Eli Cohen <eli@mellanox.com>
+Date: Wed, 28 Dec 2016 14:58:34 +0200
+Subject: net/mlx5: Avoid shadowing numa_node
+
+From: Eli Cohen <eli@mellanox.com>
+
+
+[ Upstream commit d151d73dcc99de87c63bdefebcc4cb69de1cdc40 ]
+
+Avoid using a local variable named numa_node to avoid shadowing a public
+one.
+
+Fixes: db058a186f98 ('net/mlx5_core: Set irq affinity hints')
+Signed-off-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -547,7 +547,6 @@ static int mlx5_irq_set_affinity_hint(st
+       struct mlx5_priv *priv  = &mdev->priv;
+       struct msix_entry *msix = priv->msix_arr;
+       int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
+-      int numa_node           = priv->numa_node;
+       int err;
+ 
+       if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
+@@ -555,7 +554,7 @@ static int mlx5_irq_set_affinity_hint(st
+               return -ENOMEM;
+       }
+ 
+-      cpumask_set_cpu(cpumask_local_spread(i, numa_node),
++      cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+                       priv->irq_info[i].mask);
+ 
+       err = irq_set_affinity_hint(irq, priv->irq_info[i].mask);
diff --git a/queue-4.9/net-mlx5-cancel-recovery-work-in-remove-flow.patch b/queue-4.9/net-mlx5-cancel-recovery-work-in-remove-flow.patch

new file mode 100644 (file)

index 0000000..57bb720
--- /dev/null
+++ b/queue-4.9/net-mlx5-cancel-recovery-work-in-remove-flow.patch
@@ -0,0 +1,45 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Daniel Jurgens <danielj@mellanox.com>
+Date: Wed, 28 Dec 2016 14:58:33 +0200
+Subject: net/mlx5: Cancel recovery work in remove flow
+
+From: Daniel Jurgens <danielj@mellanox.com>
+
+
+[ Upstream commit 689a248df83b6032edc57e86267b4e5cc8d7174e ]
+
+If there is pending delayed work for health recovery it must be canceled
+if the device is being unloaded.
+
+Fixes: 05ac2c0b7438 ("net/mlx5: Fix race between PCI error handlers and health work")
+Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1159,6 +1159,8 @@ static int mlx5_unload_one(struct mlx5_c
+ {
+       int err = 0;
+ 
++      mlx5_drain_health_wq(dev);
++
+       mutex_lock(&dev->intf_state_mutex);
+       if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
+               dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
+@@ -1319,10 +1321,9 @@ static pci_ers_result_t mlx5_pci_err_det
+ 
+       mlx5_enter_error_state(dev);
+       mlx5_unload_one(dev, priv, false);
+-      /* In case of kernel call save the pci state and drain health wq */
++      /* In case of kernel call save the pci state */
+       if (state) {
+               pci_save_state(pdev);
+-              mlx5_drain_health_wq(dev);
+               mlx5_pci_disable_device(dev);
+       }
+ 
diff --git a/queue-4.9/net-mlx5-check-fw-limitations-on-log_max_qp-before-setting-it.patch b/queue-4.9/net-mlx5-check-fw-limitations-on-log_max_qp-before-setting-it.patch

new file mode 100644 (file)

index 0000000..50170cd
--- /dev/null
+++ b/queue-4.9/net-mlx5-check-fw-limitations-on-log_max_qp-before-setting-it.patch
@@ -0,0 +1,38 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Noa Osherovich <noaos@mellanox.com>
+Date: Wed, 28 Dec 2016 14:58:32 +0200
+Subject: net/mlx5: Check FW limitations on log_max_qp before setting it
+
+From: Noa Osherovich <noaos@mellanox.com>
+
+
+[ Upstream commit 883371c453b937f9eb581fb4915210865982736f ]
+
+When setting HCA capabilities, set log_max_qp to be the minimum
+between the selected profile's value and the HCA limitation.
+
+Fixes: 938fe83c8dcb ('net/mlx5_core: New device capabilities...')
+Signed-off-by: Noa Osherovich <noaos@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -468,6 +468,13 @@ static int handle_hca_cap(struct mlx5_co
+       MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
+                to_fw_pkey_sz(dev, 128));
+ 
++      /* Check log_max_qp from HCA caps to set in current profile */
++      if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) {
++              mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
++                             profile[prof_sel].log_max_qp,
++                             MLX5_CAP_GEN_MAX(dev, log_max_qp));
++              profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
++      }
+       if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
+               MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
+                        prof->log_max_qp);
diff --git a/queue-4.9/net-mlx5-mask-destination-mac-value-in-ethtool-steering-rules.patch b/queue-4.9/net-mlx5-mask-destination-mac-value-in-ethtool-steering-rules.patch

new file mode 100644 (file)

index 0000000..0b700e2
--- /dev/null
+++ b/queue-4.9/net-mlx5-mask-destination-mac-value-in-ethtool-steering-rules.patch
@@ -0,0 +1,32 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Maor Gottlieb <maorg@mellanox.com>
+Date: Wed, 28 Dec 2016 14:58:35 +0200
+Subject: net/mlx5: Mask destination mac value in ethtool steering rules
+
+From: Maor Gottlieb <maorg@mellanox.com>
+
+
+[ Upstream commit 077b1e8069b9b74477b01d28f6b83774dc19a142 ]
+
+We need to mask the destination mac value with the destination mac
+mask when adding steering rule via ethtool.
+
+Fixes: 1174fce8d1410 ('net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering')
+Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+@@ -247,6 +247,7 @@ static int set_flow_attrs(u32 *match_c,
+       }
+       if (fs->flow_type & FLOW_MAC_EXT &&
+           !is_zero_ether_addr(fs->m_ext.h_dest)) {
++              mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN);
+               ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+                                            outer_headers_c, dmac_47_16),
+                               fs->m_ext.h_dest);
diff --git a/queue-4.9/net-mlx5-prevent-setting-multicast-macs-for-vfs.patch b/queue-4.9/net-mlx5-prevent-setting-multicast-macs-for-vfs.patch

new file mode 100644 (file)

index 0000000..ab1eea7
--- /dev/null
+++ b/queue-4.9/net-mlx5-prevent-setting-multicast-macs-for-vfs.patch
@@ -0,0 +1,34 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Mohamad Haj Yahia <mohamad@mellanox.com>
+Date: Wed, 28 Dec 2016 14:58:37 +0200
+Subject: net/mlx5: Prevent setting multicast macs for VFs
+
+From: Mohamad Haj Yahia <mohamad@mellanox.com>
+
+
+[ Upstream commit ccce1700263d8b5b219359d04180492a726cea16 ]
+
+Need to check that VF mac address entered by the admin user is either
+zero or unicast mac.
+Multicast mac addresses are prohibited.
+
+Fixes: 77256579c6b4 ('net/mlx5: E-Switch, Introduce Vport administration functions')
+Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1703,7 +1703,7 @@ int mlx5_eswitch_set_vport_mac(struct ml
+ 
+       if (!ESW_ALLOWED(esw))
+               return -EPERM;
+-      if (!LEGAL_VPORT(esw, vport))
++      if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac))
+               return -EINVAL;
+ 
+       mutex_lock(&esw->state_lock);
diff --git a/queue-4.9/net-mlx5e-disable-netdev-after-close.patch b/queue-4.9/net-mlx5e-disable-netdev-after-close.patch

new file mode 100644 (file)

index 0000000..6abef79
--- /dev/null
+++ b/queue-4.9/net-mlx5e-disable-netdev-after-close.patch
@@ -0,0 +1,46 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Saeed Mahameed <saeedm@mellanox.com>
+Date: Wed, 28 Dec 2016 14:58:42 +0200
+Subject: net/mlx5e: Disable netdev after close
+
+From: Saeed Mahameed <saeedm@mellanox.com>
+
+
+[ Upstream commit 37f304d10030bb425c19099e7b955d9c3ec4cba3 ]
+
+Disable netdev should come after it was closed, although no harm of doing it
+before -hence the MLX5E_STATE_DESTROYING bit- but it is more natural this way.
+
+Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks")
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Reviewed-by: Mohamad Haj Yahia <mohamad@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -3942,10 +3942,6 @@ void mlx5e_detach_netdev(struct mlx5_cor
+       const struct mlx5e_profile *profile = priv->profile;
+ 
+       set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+-      if (profile->disable)
+-              profile->disable(priv);
+-
+-      flush_workqueue(priv->wq);
+ 
+       rtnl_lock();
+       if (netif_running(netdev))
+@@ -3953,6 +3949,10 @@ void mlx5e_detach_netdev(struct mlx5_cor
+       netif_device_detach(netdev);
+       rtnl_unlock();
+ 
++      if (profile->disable)
++              profile->disable(priv);
++      flush_workqueue(priv->wq);
++
+       mlx5e_destroy_q_counter(priv);
+       profile->cleanup_rx(priv);
+       mlx5e_close_drop_rq(priv);
diff --git a/queue-4.9/net-mlx5e-don-t-sync-netdev-state-when-not-registered.patch b/queue-4.9/net-mlx5e-don-t-sync-netdev-state-when-not-registered.patch

new file mode 100644 (file)

index 0000000..7c4020d
--- /dev/null
+++ b/queue-4.9/net-mlx5e-don-t-sync-netdev-state-when-not-registered.patch
@@ -0,0 +1,66 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Saeed Mahameed <saeedm@mellanox.com>
+Date: Wed, 28 Dec 2016 14:58:41 +0200
+Subject: net/mlx5e: Don't sync netdev state when not registered
+
+From: Saeed Mahameed <saeedm@mellanox.com>
+
+
+[ Upstream commit 610e89e05c3f28a7394935aa6b91f99548c4fd3c ]
+
+Skip setting netdev vxlan ports and netdev rx_mode on driver load
+when netdev is not yet registered.
+
+Synchronizing with netdev state is needed only on reset flow where the
+netdev remains registered for the whole reset period.
+
+This also fixes an access before initialization of net_device.addr_list_lock
+- which for some reason initialized on register_netdev - where we queued
+set_rx_mode work on driver load before netdev registration.
+
+Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks")
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
+Reviewed-by: Mohamad Haj Yahia <mohamad@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -3773,14 +3773,7 @@ static void mlx5e_nic_enable(struct mlx5
+ 
+       mlx5_lag_add(mdev, netdev);
+ 
+-      if (mlx5e_vxlan_allowed(mdev)) {
+-              rtnl_lock();
+-              udp_tunnel_get_rx_info(netdev);
+-              rtnl_unlock();
+-      }
+-
+       mlx5e_enable_async_events(priv);
+-      queue_work(priv->wq, &priv->set_rx_mode_work);
+ 
+       if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+               mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id);
+@@ -3790,6 +3783,18 @@ static void mlx5e_nic_enable(struct mlx5
+               rep.priv_data = priv;
+               mlx5_eswitch_register_vport_rep(esw, 0, &rep);
+       }
++
++      if (netdev->reg_state != NETREG_REGISTERED)
++              return;
++
++      /* Device already registered: sync netdev system state */
++      if (mlx5e_vxlan_allowed(mdev)) {
++              rtnl_lock();
++              udp_tunnel_get_rx_info(netdev);
++              rtnl_unlock();
++      }
++
++      queue_work(priv->wq, &priv->set_rx_mode_work);
+ }
+ 
+ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
diff --git a/queue-4.9/net-mlx5e-remove-warn_once-from-adaptive-moderation-code.patch b/queue-4.9/net-mlx5e-remove-warn_once-from-adaptive-moderation-code.patch

new file mode 100644 (file)

index 0000000..713bc17
--- /dev/null
+++ b/queue-4.9/net-mlx5e-remove-warn_once-from-adaptive-moderation-code.patch
@@ -0,0 +1,64 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Gil Rockah <gilr@mellanox.com>
+Date: Tue, 10 Jan 2017 22:33:38 +0200
+Subject: net/mlx5e: Remove WARN_ONCE from adaptive moderation code
+
+From: Gil Rockah <gilr@mellanox.com>
+
+
+[ Upstream commit 0bbcc0a8fc394d01988fe0263ccf7fddb77a12c3 ]
+
+When trying to do interface down or changing interface configuration
+under heavy traffic, some of the adaptive moderation corner cases can
+occur and leave a WARN_ONCE call trace in the kernel log.
+
+Those WARN_ONCE are meant for debug only, and should have been inserted
+only under debug. We avoid such call traces by removing those WARN_ONCE.
+
+Fixes: cb3c7fd4f839 ("net/mlx5e: Support adaptive RX coalescing")
+Signed-off-by: Gil Rockah <gilr@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c |    7 +------
+ 1 file changed, 1 insertion(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
+@@ -109,7 +109,6 @@ static bool mlx5e_am_on_top(struct mlx5e
+       switch (am->tune_state) {
+       case MLX5E_AM_PARKING_ON_TOP:
+       case MLX5E_AM_PARKING_TIRED:
+-              WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n");
+               return true;
+       case MLX5E_AM_GOING_RIGHT:
+               return (am->steps_left > 1) && (am->steps_right == 1);
+@@ -123,7 +122,6 @@ static void mlx5e_am_turn(struct mlx5e_r
+       switch (am->tune_state) {
+       case MLX5E_AM_PARKING_ON_TOP:
+       case MLX5E_AM_PARKING_TIRED:
+-              WARN_ONCE(true, "mlx5e_am_turn: PARKING\n");
+               break;
+       case MLX5E_AM_GOING_RIGHT:
+               am->tune_state = MLX5E_AM_GOING_LEFT;
+@@ -144,7 +142,6 @@ static int mlx5e_am_step(struct mlx5e_rx
+       switch (am->tune_state) {
+       case MLX5E_AM_PARKING_ON_TOP:
+       case MLX5E_AM_PARKING_TIRED:
+-              WARN_ONCE(true, "mlx5e_am_step: PARKING\n");
+               break;
+       case MLX5E_AM_GOING_RIGHT:
+               if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1))
+@@ -282,10 +279,8 @@ static void mlx5e_am_calc_stats(struct m
+       u32 delta_us = ktime_us_delta(end->time, start->time);
+       unsigned int npkts = end->pkt_ctr - start->pkt_ctr;
+ 
+-      if (!delta_us) {
+-              WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n");
++      if (!delta_us)
+               return;
+-      }
+ 
+       curr_stats->ppms =            (npkts * USEC_PER_MSEC) / delta_us;
+       curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us;
diff --git a/queue-4.9/net-sched-cls_flower-fix-missing-addr_type-in-classify.patch b/queue-4.9/net-sched-cls_flower-fix-missing-addr_type-in-classify.patch

new file mode 100644 (file)

index 0000000..85949a8
--- /dev/null
+++ b/queue-4.9/net-sched-cls_flower-fix-missing-addr_type-in-classify.patch
@@ -0,0 +1,42 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Paul Blakey <paulb@mellanox.com>
+Date: Wed, 28 Dec 2016 14:54:47 +0200
+Subject: net/sched: cls_flower: Fix missing addr_type in classify
+
+From: Paul Blakey <paulb@mellanox.com>
+
+
+[ Upstream commit 0df0f207aab4f42e5c96a807adf9a6845b69e984 ]
+
+Since we now use a non zero mask on addr_type, we are matching on its
+value (IPV4/IPV6). So before this fix, matching on enc_src_ip/enc_dst_ip
+failed in SW/classify path since its value was zero.
+This patch sets the proper value of addr_type for encapsulated packets.
+
+Fixes: 970bfcd09791 ('net/sched: cls_flower: Use mask for addr_type')
+Signed-off-by: Paul Blakey <paulb@mellanox.com>
+Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
+Acked-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_flower.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/sched/cls_flower.c
++++ b/net/sched/cls_flower.c
+@@ -149,10 +149,14 @@ static int fl_classify(struct sk_buff *s
+ 
+               switch (ip_tunnel_info_af(info)) {
+               case AF_INET:
++                      skb_key.enc_control.addr_type =
++                              FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+                       skb_key.enc_ipv4.src = key->u.ipv4.src;
+                       skb_key.enc_ipv4.dst = key->u.ipv4.dst;
+                       break;
+               case AF_INET6:
++                      skb_key.enc_control.addr_type =
++                              FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+                       skb_key.enc_ipv6.src = key->u.ipv6.src;
+                       skb_key.enc_ipv6.dst = key->u.ipv6.dst;
+                       break;
diff --git a/queue-4.9/net-sched-fix-soft-lockup-in-tc_classify.patch b/queue-4.9/net-sched-fix-soft-lockup-in-tc_classify.patch

new file mode 100644 (file)

index 0000000..2e754c5
--- /dev/null
+++ b/queue-4.9/net-sched-fix-soft-lockup-in-tc_classify.patch
@@ -0,0 +1,81 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 21 Dec 2016 18:04:11 +0100
+Subject: net, sched: fix soft lockup in tc_classify
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit 628185cfddf1dfb701c4efe2cfd72cf5b09f5702 ]
+
+Shahar reported a soft lockup in tc_classify(), where we run into an
+endless loop when walking the classifier chain due to tp->next == tp
+which is a state we should never run into. The issue only seems to
+trigger under load in the tc control path.
+
+What happens is that in tc_ctl_tfilter(), thread A allocates a new
+tp, initializes it, sets tp_created to 1, and calls into tp->ops->change()
+with it. In that classifier callback we had to unlock/lock the rtnl
+mutex and returned with -EAGAIN. One reason why we need to drop there
+is, for example, that we need to request an action module to be loaded.
+
+This happens via tcf_exts_validate() -> tcf_action_init/_1() meaning
+after we loaded and found the requested action, we need to redo the
+whole request so we don't race against others. While we had to unlock
+rtnl in that time, thread B's request was processed next on that CPU.
+Thread B added a new tp instance successfully to the classifier chain.
+When thread A returned grabbing the rtnl mutex again, propagating -EAGAIN
+and destroying its tp instance which never got linked, we goto replay
+and redo A's request.
+
+This time when walking the classifier chain in tc_ctl_tfilter() for
+checking for existing tp instances we had a priority match and found
+the tp instance that was created and linked by thread B. Now calling
+again into tp->ops->change() with that tp was successful and returned
+without error.
+
+tp_created was never cleared in the second round, thus kernel thinks
+that we need to link it into the classifier chain (once again). tp and
+*back point to the same object due to the match we had earlier on. Thus
+for thread B's already public tp, we reset tp->next to tp itself and
+link it into the chain, which eventually causes the mentioned endless
+loop in tc_classify() once a packet hits the data path.
+
+Fix is to clear tp_created at the beginning of each request, also when
+we replay it. On the paths that can cause -EAGAIN we already destroy
+the original tp instance we had and on replay we really need to start
+from scratch. It seems that this issue was first introduced in commit
+12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining
+and avoid kernel panic when we use cls_cgroup").
+
+Fixes: 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup")
+Reported-by: Shahar Klein <shahark@mellanox.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Tested-by: Shahar Klein <shahark@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_api.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff
+       unsigned long cl;
+       unsigned long fh;
+       int err;
+-      int tp_created = 0;
++      int tp_created;
+ 
+       if ((n->nlmsg_type != RTM_GETTFILTER) &&
+           !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+               return -EPERM;
+ 
+ replay:
++      tp_created = 0;
++
+       err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
+       if (err < 0)
+               return err;
diff --git a/queue-4.9/net-stmmac-fix-race-between-stmmac_drv_probe-and-stmmac_open.patch b/queue-4.9/net-stmmac-fix-race-between-stmmac_drv_probe-and-stmmac_open.patch

new file mode 100644 (file)

index 0000000..285729b
--- /dev/null
+++ b/queue-4.9/net-stmmac-fix-race-between-stmmac_drv_probe-and-stmmac_open.patch
@@ -0,0 +1,94 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Tue, 27 Dec 2016 18:23:06 -0800
+Subject: net: stmmac: Fix race between stmmac_drv_probe and stmmac_open
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 5701659004d68085182d2fd4199c79172165fa65 ]
+
+There is currently a small window during which the network device registered by
+stmmac can be made visible, yet all resources, including and clock and MDIO bus
+have not had a chance to be set up, this can lead to the following error to
+occur:
+
+[  473.919358] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized):
+                stmmac_dvr_probe: warning: cannot get CSR clock
+[  473.919382] stmmaceth 0000:01:00.0: no reset control found
+[  473.919412] stmmac - user ID: 0x10, Synopsys ID: 0x42
+[  473.919429] stmmaceth 0000:01:00.0: DMA HW capability register supported
+[  473.919436] stmmaceth 0000:01:00.0: RX Checksum Offload Engine supported
+[  473.919443] stmmaceth 0000:01:00.0: TX Checksum insertion supported
+[  473.919451] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized):
+                Enable RX Mitigation via HW Watchdog Timer
+[  473.921395] libphy: PHY stmmac-1:00 not found
+[  473.921417] stmmaceth 0000:01:00.0 eth0: Could not attach to PHY
+[  473.921427] stmmaceth 0000:01:00.0 eth0: stmmac_open: Cannot attach to
+                PHY (error: -19)
+[  473.959710] libphy: stmmac: probed
+[  473.959724] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 0 IRQ POLL
+                (stmmac-1:00) active
+[  473.959728] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 1 IRQ POLL
+                (stmmac-1:01)
+[  473.959731] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 2 IRQ POLL
+                (stmmac-1:02)
+[  473.959734] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 3 IRQ POLL
+                (stmmac-1:03)
+
+Fix this by making sure that register_netdev() is the last thing being done,
+which guarantees that the clock and the MDIO bus are available.
+
+Fixes: 4bfcbd7abce2 ("stmmac: Move the mdio_register/_unregister in probe/remove")
+Reported-by: Kweh, Hock Leong <hock.leong.kweh@intel.com>
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   23 ++++++++++++----------
+ 1 file changed, 13 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -3349,12 +3349,6 @@ int stmmac_dvr_probe(struct device *devi
+       spin_lock_init(&priv->lock);
+       spin_lock_init(&priv->tx_lock);
+ 
+-      ret = register_netdev(ndev);
+-      if (ret) {
+-              pr_err("%s: ERROR %i registering the device\n", __func__, ret);
+-              goto error_netdev_register;
+-      }
+-
+       /* If a specific clk_csr value is passed from the platform
+        * this means that the CSR Clock Range selection cannot be
+        * changed at run-time and it is fixed. Viceversa the driver'll try to
+@@ -3376,15 +3370,24 @@ int stmmac_dvr_probe(struct device *devi
+               if (ret < 0) {
+                       pr_debug("%s: MDIO bus (id: %d) registration failed",
+                                __func__, priv->plat->bus_id);
+-                      goto error_mdio_register;
++                      goto error_napi_register;
+               }
+       }
+ 
+-      return 0;
++      ret = register_netdev(ndev);
++      if (ret) {
++              pr_err("%s: ERROR %i registering the device\n", __func__, ret);
++              goto error_netdev_register;
++      }
++
++      return ret;
+ 
+-error_mdio_register:
+-      unregister_netdev(ndev);
+ error_netdev_register:
++      if (priv->hw->pcs != STMMAC_PCS_RGMII &&
++          priv->hw->pcs != STMMAC_PCS_TBI &&
++          priv->hw->pcs != STMMAC_PCS_RTBI)
++              stmmac_mdio_unregister(ndev);
++error_napi_register:
+       netif_napi_del(&priv->napi);
+ error_hw_init:
+       clk_disable_unprepare(priv->pclk);
diff --git a/queue-4.9/net-vrf-add-missing-rx-counters.patch b/queue-4.9/net-vrf-add-missing-rx-counters.patch

new file mode 100644 (file)

index 0000000..984446a
--- /dev/null
+++ b/queue-4.9/net-vrf-add-missing-rx-counters.patch
@@ -0,0 +1,41 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Tue, 3 Jan 2017 09:37:55 -0800
+Subject: net: vrf: Add missing Rx counters
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit 926d93a33e59b2729afdbad357233c17184de9d2 ]
+
+The move from rx-handler to L3 receive handler inadvertantly dropped the
+rx counters. Restore them.
+
+Fixes: 74b20582ac38 ("net: l3mdev: Add hook in ip and ipv6")
+Reported-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vrf.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -968,6 +968,7 @@ static struct sk_buff *vrf_ip6_rcv(struc
+        */
+       need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
+       if (!ipv6_ndisc_frame(skb) && !need_strict) {
++              vrf_rx_stats(vrf_dev, skb->len);
+               skb->dev = vrf_dev;
+               skb->skb_iif = vrf_dev->ifindex;
+ 
+@@ -1009,6 +1010,8 @@ static struct sk_buff *vrf_ip_rcv(struct
+               goto out;
+       }
+ 
++      vrf_rx_stats(vrf_dev, skb->len);
++
+       skb_push(skb, skb->mac_len);
+       dev_queue_xmit_nit(skb, vrf_dev);
+       skb_pull(skb, skb->mac_len);
diff --git a/queue-4.9/net-vrf-do-not-allow-table-id-0.patch b/queue-4.9/net-vrf-do-not-allow-table-id-0.patch

new file mode 100644 (file)

index 0000000..9c12b3f
--- /dev/null
+++ b/queue-4.9/net-vrf-do-not-allow-table-id-0.patch
@@ -0,0 +1,34 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Tue, 10 Jan 2017 15:22:25 -0800
+Subject: net: vrf: do not allow table id 0
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit 24c63bbc18e25d5d8439422aa5fd2d66390b88eb ]
+
+Frank reported that vrf devices can be created with a table id of 0.
+This breaks many of the run time table id checks and should not be
+allowed. Detect this condition at create time and fail with EINVAL.
+
+Fixes: 193125dbd8eb ("net: Introduce VRF device driver")
+Reported-by: Frank Kellermann <frank.kellermann@atos.net>
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vrf.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -1239,6 +1239,8 @@ static int vrf_newlink(struct net *src_n
+               return -EINVAL;
+ 
+       vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]);
++      if (vrf->tb_id == RT_TABLE_UNSPEC)
++              return -EINVAL;
+ 
+       dev->priv_flags |= IFF_L3MDEV_MASTER;
+ 
diff --git a/queue-4.9/net-vrf-drop-conntrack-data-after-pass-through-vrf-device-on-tx.patch b/queue-4.9/net-vrf-drop-conntrack-data-after-pass-through-vrf-device-on-tx.patch

new file mode 100644 (file)

index 0000000..54459c7
--- /dev/null
+++ b/queue-4.9/net-vrf-drop-conntrack-data-after-pass-through-vrf-device-on-tx.patch
@@ -0,0 +1,57 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Wed, 14 Dec 2016 14:31:11 -0800
+Subject: net: vrf: Drop conntrack data after pass through VRF device on Tx
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit eb63ecc1706b3e094d0f57438b6c2067cfc299f2 ]
+
+Locally originated traffic in a VRF fails in the presence of a POSTROUTING
+rule. For example,
+
+    $ iptables -t nat -A POSTROUTING -s 11.1.1.0/24  -j MASQUERADE
+    $ ping -I red -c1 11.1.1.3
+    ping: Warning: source address might be selected on device other than red.
+    PING 11.1.1.3 (11.1.1.3) from 11.1.1.2 red: 56(84) bytes of data.
+    ping: sendmsg: Operation not permitted
+
+Worse, the above causes random corruption resulting in a panic in random
+places (I have not seen a consistent backtrace).
+
+Call nf_reset to drop the conntrack info following the pass through the
+VRF device.  The nf_reset is needed on Tx but not Rx because of the order
+in which NF_HOOK's are hit: on Rx the VRF device is after the real ingress
+device and on Tx it is is before the real egress device. Connection
+tracking should be tied to the real egress device and not the VRF device.
+
+Fixes: 8f58336d3f78a ("net: Add ethernet header for pass through VRF device")
+Fixes: 35402e3136634 ("net: Add IPv6 support to VRF device")
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vrf.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -371,6 +371,8 @@ static int vrf_finish_output6(struct net
+       struct in6_addr *nexthop;
+       int ret;
+ 
++      nf_reset(skb);
++
+       skb->protocol = htons(ETH_P_IPV6);
+       skb->dev = dev;
+ 
+@@ -552,6 +554,8 @@ static int vrf_finish_output(struct net
+       u32 nexthop;
+       int ret = -EINVAL;
+ 
++      nf_reset(skb);
++
+       /* Be paranoid, rather than too clever. */
+       if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+               struct sk_buff *skb2;
diff --git a/queue-4.9/net-vrf-fix-nat-within-a-vrf.patch b/queue-4.9/net-vrf-fix-nat-within-a-vrf.patch

new file mode 100644 (file)

index 0000000..ae4e735
--- /dev/null
+++ b/queue-4.9/net-vrf-fix-nat-within-a-vrf.patch
@@ -0,0 +1,33 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: David Ahern <dsa@cumulusnetworks.com>
+Date: Wed, 14 Dec 2016 11:06:18 -0800
+Subject: net: vrf: Fix NAT within a VRF
+
+From: David Ahern <dsa@cumulusnetworks.com>
+
+
+[ Upstream commit a0f37efa82253994b99623dbf41eea8dd0ba169b ]
+
+Connection tracking with VRF is broken because the pass through the VRF
+device drops the connection tracking info. Removing the call to nf_reset
+allows DNAT and MASQUERADE to work across interfaces within a VRF.
+
+Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device")
+Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vrf.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -850,8 +850,6 @@ static struct sk_buff *vrf_rcv_nfhook(u8
+ {
+       struct net *net = dev_net(dev);
+ 
+-      nf_reset(skb);
+-
+       if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0)
+               skb = NULL;    /* kfree_skb(skb) handled by nf code */
+ 
diff --git a/queue-4.9/r8152-fix-rx-issue-for-runtime-suspend.patch b/queue-4.9/r8152-fix-rx-issue-for-runtime-suspend.patch

new file mode 100644 (file)

index 0000000..4869572
--- /dev/null
+++ b/queue-4.9/r8152-fix-rx-issue-for-runtime-suspend.patch
@@ -0,0 +1,74 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: hayeswang <hayeswang@realtek.com>
+Date: Tue, 10 Jan 2017 17:04:07 +0800
+Subject: r8152: fix rx issue for runtime suspend
+
+From: hayeswang <hayeswang@realtek.com>
+
+
+[ Upstream commit 75dc692eda114cb234a46cb11893a9c3ea520934 ]
+
+Pause the rx and make sure the rx fifo is empty when the autosuspend
+occurs.
+
+If the rx data comes when the driver is canceling the rx urb, the host
+controller would stop getting the data from the device and continue
+it after next rx urb is submitted. That is, one continuing data is
+split into two different urb buffers. That let the driver take the
+data as a rx descriptor, and unexpected behavior happens.
+
+Signed-off-by: Hayes Wang <hayeswang@realtek.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/r8152.c |   31 ++++++++++++++++++++++++++++---
+ 1 file changed, 28 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/usb/r8152.c
++++ b/drivers/net/usb/r8152.c
+@@ -3582,17 +3582,42 @@ static int rtl8152_rumtime_suspend(struc
+       int ret = 0;
+ 
+       if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) {
++              u32 rcr = 0;
++
+               if (delay_autosuspend(tp)) {
+                       ret = -EBUSY;
+                       goto out1;
+               }
+ 
++              if (netif_carrier_ok(netdev)) {
++                      u32 ocp_data;
++
++                      rcr = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
++                      ocp_data = rcr & ~RCR_ACPT_ALL;
++                      ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
++                      rxdy_gated_en(tp, true);
++                      ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA,
++                                               PLA_OOB_CTRL);
++                      if (!(ocp_data & RXFIFO_EMPTY)) {
++                              rxdy_gated_en(tp, false);
++                              ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr);
++                              ret = -EBUSY;
++                              goto out1;
++                      }
++              }
++
+               clear_bit(WORK_ENABLE, &tp->flags);
+               usb_kill_urb(tp->intr_urb);
+-              napi_disable(&tp->napi);
+-              rtl_stop_rx(tp);
++
+               tp->rtl_ops.autosuspend_en(tp, true);
+-              napi_enable(&tp->napi);
++
++              if (netif_carrier_ok(netdev)) {
++                      napi_disable(&tp->napi);
++                      rtl_stop_rx(tp);
++                      rxdy_gated_en(tp, false);
++                      ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr);
++                      napi_enable(&tp->napi);
++              }
+       }
+ 
+       set_bit(SELECTIVE_SUSPEND, &tp->flags);
diff --git a/queue-4.9/r8152-split-rtl8152_suspend-function.patch b/queue-4.9/r8152-split-rtl8152_suspend-function.patch

new file mode 100644 (file)

index 0000000..0f9f887
--- /dev/null
+++ b/queue-4.9/r8152-split-rtl8152_suspend-function.patch
@@ -0,0 +1,102 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: hayeswang <hayeswang@realtek.com>
+Date: Tue, 10 Jan 2017 17:04:06 +0800
+Subject: r8152: split rtl8152_suspend function
+
+From: hayeswang <hayeswang@realtek.com>
+
+
+[ Upstream commit 8fb280616878b81c0790a0c33acbeec59c5711f4 ]
+
+Split rtl8152_suspend() into rtl8152_system_suspend() and
+rtl8152_rumtime_suspend().
+
+Signed-off-by: Hayes Wang <hayeswang@realtek.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/r8152.c |   57 +++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 40 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/usb/r8152.c
++++ b/drivers/net/usb/r8152.c
+@@ -3576,39 +3576,62 @@ static bool delay_autosuspend(struct r81
+               return false;
+ }
+ 
+-static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
++static int rtl8152_rumtime_suspend(struct r8152 *tp)
+ {
+-      struct r8152 *tp = usb_get_intfdata(intf);
+       struct net_device *netdev = tp->netdev;
+       int ret = 0;
+ 
+-      mutex_lock(&tp->control);
+-
+-      if (PMSG_IS_AUTO(message)) {
+-              if (netif_running(netdev) && delay_autosuspend(tp)) {
++      if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) {
++              if (delay_autosuspend(tp)) {
+                       ret = -EBUSY;
+                       goto out1;
+               }
+ 
+-              set_bit(SELECTIVE_SUSPEND, &tp->flags);
+-      } else {
+-              netif_device_detach(netdev);
++              clear_bit(WORK_ENABLE, &tp->flags);
++              usb_kill_urb(tp->intr_urb);
++              napi_disable(&tp->napi);
++              rtl_stop_rx(tp);
++              tp->rtl_ops.autosuspend_en(tp, true);
++              napi_enable(&tp->napi);
+       }
+ 
++      set_bit(SELECTIVE_SUSPEND, &tp->flags);
++
++out1:
++      return ret;
++}
++
++static int rtl8152_system_suspend(struct r8152 *tp)
++{
++      struct net_device *netdev = tp->netdev;
++      int ret = 0;
++
++      netif_device_detach(netdev);
++
+       if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) {
+               clear_bit(WORK_ENABLE, &tp->flags);
+               usb_kill_urb(tp->intr_urb);
+               napi_disable(&tp->napi);
+-              if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
+-                      rtl_stop_rx(tp);
+-                      tp->rtl_ops.autosuspend_en(tp, true);
+-              } else {
+-                      cancel_delayed_work_sync(&tp->schedule);
+-                      tp->rtl_ops.down(tp);
+-              }
++              cancel_delayed_work_sync(&tp->schedule);
++              tp->rtl_ops.down(tp);
+               napi_enable(&tp->napi);
+       }
+-out1:
++
++      return ret;
++}
++
++static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
++{
++      struct r8152 *tp = usb_get_intfdata(intf);
++      int ret;
++
++      mutex_lock(&tp->control);
++
++      if (PMSG_IS_AUTO(message))
++              ret = rtl8152_rumtime_suspend(tp);
++      else
++              ret = rtl8152_system_suspend(tp);
++
+       mutex_unlock(&tp->control);
+ 
+       return ret;
diff --git a/queue-4.9/rtnl-stats-add-missing-netlink-message-size-checks.patch b/queue-4.9/rtnl-stats-add-missing-netlink-message-size-checks.patch

new file mode 100644 (file)

index 0000000..005f02b
--- /dev/null
+++ b/queue-4.9/rtnl-stats-add-missing-netlink-message-size-checks.patch
@@ -0,0 +1,47 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Mathias Krause <minipli@googlemail.com>
+Date: Wed, 28 Dec 2016 17:52:15 +0100
+Subject: rtnl: stats - add missing netlink message size checks
+
+From: Mathias Krause <minipli@googlemail.com>
+
+
+[ Upstream commit 4775cc1f2d5abca894ac32774eefc22c45347d1c ]
+
+We miss to check if the netlink message is actually big enough to contain
+a struct if_stats_msg.
+
+Add a check to prevent userland from sending us short messages that would
+make us access memory beyond the end of the message.
+
+Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump...")
+Signed-off-by: Mathias Krause <minipli@googlemail.com>
+Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3886,6 +3886,9 @@ static int rtnl_stats_get(struct sk_buff
+       u32 filter_mask;
+       int err;
+ 
++      if (nlmsg_len(nlh) < sizeof(*ifsm))
++              return -EINVAL;
++
+       ifsm = nlmsg_data(nlh);
+       if (ifsm->ifindex > 0)
+               dev = __dev_get_by_index(net, ifsm->ifindex);
+@@ -3935,6 +3938,9 @@ static int rtnl_stats_dump(struct sk_buf
+ 
+       cb->seq = net->dev_base_seq;
+ 
++      if (nlmsg_len(cb->nlh) < sizeof(*ifsm))
++              return -EINVAL;
++
+       ifsm = nlmsg_data(cb->nlh);
+       filter_mask = ifsm->filter_mask;
+       if (!filter_mask)
diff --git a/queue-4.9/sctp-sctp_transport_lookup_process-should-rcu_read_unlock-when-transport-is-null.patch b/queue-4.9/sctp-sctp_transport_lookup_process-should-rcu_read_unlock-when-transport-is-null.patch

new file mode 100644 (file)

index 0000000..e4175da
--- /dev/null
+++ b/queue-4.9/sctp-sctp_transport_lookup_process-should-rcu_read_unlock-when-transport-is-null.patch
@@ -0,0 +1,40 @@
+From foo@baz Thu Jan 12 21:37:26 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 15 Dec 2016 23:05:52 +0800
+Subject: sctp: sctp_transport_lookup_process should rcu_read_unlock when transport is null
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 08abb79542c9e8c367d1d8e44fe1026868d3f0a7 ]
+
+Prior to this patch, sctp_transport_lookup_process didn't rcu_read_unlock
+when it failed to find a transport by sctp_addrs_lookup_transport.
+
+This patch is to fix it by moving up rcu_read_unlock right before checking
+transport and also to remove the out path.
+
+Fixes: 1cceda784980 ("sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -4479,9 +4479,10 @@ int sctp_transport_lookup_process(int (*
+ 
+       rcu_read_lock();
+       transport = sctp_addrs_lookup_transport(net, laddr, paddr);
+-      if (!transport || !sctp_transport_hold(transport))
++      if (!transport || !sctp_transport_hold(transport)) {
++              rcu_read_unlock();
+               goto out;
+-
++      }
+       rcu_read_unlock();
+       err = cb(transport, p);
+       sctp_transport_put(transport);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 12 Jan 2017 20:38:36 +0000 (21:38 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 12 Jan 2017 20:38:36 +0000 (21:38 +0100)
queue-4.9/bpf-change-back-to-orig-prog-on-too-many-passes.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/drop_monitor-add-missing-call-to-genlmsg_end.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/drop_monitor-consider-inserted-data-in-genlmsg_end.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/flow_dissector-update-pptp-handling-to-avoid-null-pointer-deref.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/gro-disable-frag0-optimization-on-ipv6-ext-headers.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/gro-enter-slow-path-if-there-is-no-tailroom.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/gro-use-min_t-in-skb_gro_reset_offset.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/igmp-make-igmp-group-member-rfc-3376-compliant.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/inet-fix-ip-v6-_recvorigdstaddr-for-udp-sockets.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ipv4-do-not-allow-main-to-be-alias-for-new-local-w-custom-rules.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ipv6-handle-efault-from-skb_copy_bits.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-add-the-af_qipcrtr-entries-to-family-name-tables.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-dsa-bcm_sf2-do-not-clobber-b53_switch_ops.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-dsa-bcm_sf2-utilize-nested-mdio-read-write.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-dsa-ensure-validity-of-dst-ds.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-fix-incorrect-original-ingress-device-index-in-pktinfo.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-ipv4-dst-for-local-input-routes-should-use-l3mdev-if-relevant.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-ipv4-fix-multipath-selection-with-vrf.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-mlx5-avoid-shadowing-numa_node.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-mlx5-cancel-recovery-work-in-remove-flow.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-mlx5-check-fw-limitations-on-log_max_qp-before-setting-it.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-mlx5-mask-destination-mac-value-in-ethtool-steering-rules.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-mlx5-prevent-setting-multicast-macs-for-vfs.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-mlx5e-disable-netdev-after-close.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-mlx5e-don-t-sync-netdev-state-when-not-registered.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-mlx5e-remove-warn_once-from-adaptive-moderation-code.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-sched-cls_flower-fix-missing-addr_type-in-classify.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-sched-fix-soft-lockup-in-tc_classify.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-stmmac-fix-race-between-stmmac_drv_probe-and-stmmac_open.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-vrf-add-missing-rx-counters.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-vrf-do-not-allow-table-id-0.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-vrf-drop-conntrack-data-after-pass-through-vrf-device-on-tx.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-vrf-fix-nat-within-a-vrf.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/r8152-fix-rx-issue-for-runtime-suspend.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/r8152-split-rtl8152_suspend-function.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/rtnl-stats-add-missing-netlink-message-size-checks.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/sctp-sctp_transport_lookup_process-should-rcu_read_unlock-when-transport-is-null.patch	[new file with mode: 0644]	patch \| blob