From: Greg Kroah-Hartman Date: Thu, 12 Jan 2017 20:38:36 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v4.4.43~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a8360df2e3c588bf406b116ab14d631059aebb9e;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: bpf-change-back-to-orig-prog-on-too-many-passes.patch drop_monitor-add-missing-call-to-genlmsg_end.patch drop_monitor-consider-inserted-data-in-genlmsg_end.patch flow_dissector-update-pptp-handling-to-avoid-null-pointer-deref.patch gro-disable-frag0-optimization-on-ipv6-ext-headers.patch gro-enter-slow-path-if-there-is-no-tailroom.patch gro-use-min_t-in-skb_gro_reset_offset.patch igmp-make-igmp-group-member-rfc-3376-compliant.patch inet-fix-ip-v6-_recvorigdstaddr-for-udp-sockets.patch ipv4-do-not-allow-main-to-be-alias-for-new-local-w-custom-rules.patch ipv6-handle-efault-from-skb_copy_bits.patch net-add-the-af_qipcrtr-entries-to-family-name-tables.patch net-dsa-bcm_sf2-do-not-clobber-b53_switch_ops.patch net-dsa-bcm_sf2-utilize-nested-mdio-read-write.patch net-dsa-ensure-validity-of-dst-ds.patch net-fix-incorrect-original-ingress-device-index-in-pktinfo.patch net-ipv4-dst-for-local-input-routes-should-use-l3mdev-if-relevant.patch net-ipv4-fix-multipath-selection-with-vrf.patch net-mlx5-avoid-shadowing-numa_node.patch net-mlx5-cancel-recovery-work-in-remove-flow.patch net-mlx5-check-fw-limitations-on-log_max_qp-before-setting-it.patch net-mlx5-mask-destination-mac-value-in-ethtool-steering-rules.patch net-mlx5-prevent-setting-multicast-macs-for-vfs.patch net-mlx5e-disable-netdev-after-close.patch net-mlx5e-don-t-sync-netdev-state-when-not-registered.patch net-mlx5e-remove-warn_once-from-adaptive-moderation-code.patch net-sched-cls_flower-fix-missing-addr_type-in-classify.patch net-sched-fix-soft-lockup-in-tc_classify.patch net-stmmac-fix-race-between-stmmac_drv_probe-and-stmmac_open.patch net-vrf-add-missing-rx-counters.patch net-vrf-do-not-allow-table-id-0.patch net-vrf-drop-conntrack-data-after-pass-through-vrf-device-on-tx.patch net-vrf-fix-nat-within-a-vrf.patch r8152-fix-rx-issue-for-runtime-suspend.patch r8152-split-rtl8152_suspend-function.patch rtnl-stats-add-missing-netlink-message-size-checks.patch sctp-sctp_transport_lookup_process-should-rcu_read_unlock-when-transport-is-null.patch --- diff --git a/queue-4.9/bpf-change-back-to-orig-prog-on-too-many-passes.patch b/queue-4.9/bpf-change-back-to-orig-prog-on-too-many-passes.patch new file mode 100644 index 00000000000..d407b336fc1 --- /dev/null +++ b/queue-4.9/bpf-change-back-to-orig-prog-on-too-many-passes.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Daniel Borkmann +Date: Sat, 7 Jan 2017 00:26:33 +0100 +Subject: bpf: change back to orig prog on too many passes + +From: Daniel Borkmann + + +[ Upstream commit 9d5ecb09d525469abd1a10c096cb5a17206523f2 ] + +If after too many passes still no image could be emitted, then +swap back to the original program as we do in all other cases +and don't use the one with blinding. + +Fixes: 959a75791603 ("bpf, x86: add support for constant blinding") +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/net/bpf_jit_comp.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -1172,6 +1172,8 @@ struct bpf_prog *bpf_int_jit_compile(str + set_memory_ro((unsigned long)header, header->pages); + prog->bpf_func = (void *)image; + prog->jited = 1; ++ } else { ++ prog = orig_prog; + } + + out_addrs: diff --git a/queue-4.9/drop_monitor-add-missing-call-to-genlmsg_end.patch b/queue-4.9/drop_monitor-add-missing-call-to-genlmsg_end.patch new file mode 100644 index 00000000000..f8bd5b787ed --- /dev/null +++ b/queue-4.9/drop_monitor-add-missing-call-to-genlmsg_end.patch @@ -0,0 +1,72 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Reiter Wolfgang +Date: Sat, 31 Dec 2016 21:11:57 +0100 +Subject: drop_monitor: add missing call to genlmsg_end + +From: Reiter Wolfgang + + +[ Upstream commit 4200462d88f47f3759bdf4705f87e207b0f5b2e4 ] + +Update nlmsg_len field with genlmsg_end to enable userspace processing +using nlmsg_next helper. Also adds error handling. + +Signed-off-by: Reiter Wolfgang +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/drop_monitor.c | 33 ++++++++++++++++++++++++--------- + 1 file changed, 24 insertions(+), 9 deletions(-) + +--- a/net/core/drop_monitor.c ++++ b/net/core/drop_monitor.c +@@ -80,6 +80,7 @@ static struct sk_buff *reset_per_cpu_dat + struct nlattr *nla; + struct sk_buff *skb; + unsigned long flags; ++ void *msg_header; + + al = sizeof(struct net_dm_alert_msg); + al += dm_hit_limit * sizeof(struct net_dm_drop_point); +@@ -87,17 +88,31 @@ static struct sk_buff *reset_per_cpu_dat + + skb = genlmsg_new(al, GFP_KERNEL); + +- if (skb) { +- genlmsg_put(skb, 0, 0, &net_drop_monitor_family, +- 0, NET_DM_CMD_ALERT); +- nla = nla_reserve(skb, NLA_UNSPEC, +- sizeof(struct net_dm_alert_msg)); +- msg = nla_data(nla); +- memset(msg, 0, al); +- } else { +- mod_timer(&data->send_timer, jiffies + HZ / 10); ++ if (!skb) ++ goto err; ++ ++ msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family, ++ 0, NET_DM_CMD_ALERT); ++ if (!msg_header) { ++ nlmsg_free(skb); ++ skb = NULL; ++ goto err; ++ } ++ nla = nla_reserve(skb, NLA_UNSPEC, ++ sizeof(struct net_dm_alert_msg)); ++ if (!nla) { ++ nlmsg_free(skb); ++ skb = NULL; ++ goto err; + } ++ msg = nla_data(nla); ++ memset(msg, 0, al); ++ genlmsg_end(skb, msg_header); ++ goto out; + ++err: ++ mod_timer(&data->send_timer, jiffies + HZ / 10); ++out: + spin_lock_irqsave(&data->lock, flags); + swap(data->skb, skb); + spin_unlock_irqrestore(&data->lock, flags); diff --git a/queue-4.9/drop_monitor-consider-inserted-data-in-genlmsg_end.patch b/queue-4.9/drop_monitor-consider-inserted-data-in-genlmsg_end.patch new file mode 100644 index 00000000000..9c48bd36ab3 --- /dev/null +++ b/queue-4.9/drop_monitor-consider-inserted-data-in-genlmsg_end.patch @@ -0,0 +1,48 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Reiter Wolfgang +Date: Tue, 3 Jan 2017 01:39:10 +0100 +Subject: drop_monitor: consider inserted data in genlmsg_end + +From: Reiter Wolfgang + + +[ Upstream commit 3b48ab2248e61408910e792fe84d6ec466084c1a ] + +Final nlmsg_len field update must reflect inserted net_dm_drop_point +data. + +This patch depends on previous patch: +"drop_monitor: add missing call to genlmsg_end" + +Signed-off-by: Reiter Wolfgang +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/drop_monitor.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/net/core/drop_monitor.c ++++ b/net/core/drop_monitor.c +@@ -107,7 +107,6 @@ static struct sk_buff *reset_per_cpu_dat + } + msg = nla_data(nla); + memset(msg, 0, al); +- genlmsg_end(skb, msg_header); + goto out; + + err: +@@ -117,6 +116,13 @@ out: + swap(data->skb, skb); + spin_unlock_irqrestore(&data->lock, flags); + ++ if (skb) { ++ struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; ++ struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh); ++ ++ genlmsg_end(skb, genlmsg_data(gnlh)); ++ } ++ + return skb; + } + diff --git a/queue-4.9/flow_dissector-update-pptp-handling-to-avoid-null-pointer-deref.patch b/queue-4.9/flow_dissector-update-pptp-handling-to-avoid-null-pointer-deref.patch new file mode 100644 index 00000000000..043df4e07c8 --- /dev/null +++ b/queue-4.9/flow_dissector-update-pptp-handling-to-avoid-null-pointer-deref.patch @@ -0,0 +1,82 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Ian Kumlien +Date: Mon, 2 Jan 2017 09:18:35 +0100 +Subject: flow_dissector: Update pptp handling to avoid null pointer deref. + +From: Ian Kumlien + + +[ Upstream commit d0af683407a26a4437d8fa6e283ea201f2ae8146 ] + +__skb_flow_dissect can be called with a skb or a data packet, either +can be NULL. All calls seems to have been moved to __skb_header_pointer +except the pptp handling which is still calling skb_header_pointer. + +skb_header_pointer will use skb->data and thus: +[ 109.556866] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 +[ 109.557102] IP: [] __skb_flow_dissect+0xa88/0xce0 +[ 109.557263] PGD 0 +[ 109.557338] +[ 109.557484] Oops: 0000 [#1] SMP +[ 109.557562] Modules linked in: chaoskey +[ 109.557783] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.9.0 #79 +[ 109.557867] Hardware name: Supermicro A1SRM-LN7F/LN5F/A1SRM-LN7F-2758, BIOS 1.0c 11/04/2015 +[ 109.557957] task: ffff94085c27bc00 task.stack: ffffb745c0068000 +[ 109.558041] RIP: 0010:[] [] __skb_flow_dissect+0xa88/0xce0 +[ 109.558203] RSP: 0018:ffff94087fc83d40 EFLAGS: 00010206 +[ 109.558286] RAX: 0000000000000130 RBX: ffffffff8975bf80 RCX: ffff94084fab6800 +[ 109.558373] RDX: 0000000000000010 RSI: 000000000000000c RDI: 0000000000000000 +[ 109.558460] RBP: 0000000000000b88 R08: 0000000000000000 R09: 0000000000000022 +[ 109.558547] R10: 0000000000000008 R11: ffff94087fc83e04 R12: 0000000000000000 +[ 109.558763] R13: ffff94084fab6800 R14: ffff94087fc83e04 R15: 000000000000002f +[ 109.558979] FS: 0000000000000000(0000) GS:ffff94087fc80000(0000) knlGS:0000000000000000 +[ 109.559326] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 109.559539] CR2: 0000000000000080 CR3: 0000000281809000 CR4: 00000000001026e0 +[ 109.559753] Stack: +[ 109.559957] 000000000000000c ffff94084fab6822 0000000000000001 ffff94085c2b5fc0 +[ 109.560578] 0000000000000001 0000000000002000 0000000000000000 0000000000000000 +[ 109.561200] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 +[ 109.561820] Call Trace: +[ 109.562027] +[ 109.562108] [] ? eth_get_headlen+0x7a/0xf0 +[ 109.562522] [] ? igb_poll+0x96a/0xe80 +[ 109.562737] [] ? net_rx_action+0x20b/0x350 +[ 109.562953] [] ? __do_softirq+0xe8/0x280 +[ 109.563169] [] ? irq_exit+0xaa/0xb0 +[ 109.563382] [] ? do_IRQ+0x4b/0xc0 +[ 109.563597] [] ? common_interrupt+0x7f/0x7f +[ 109.563810] +[ 109.563890] [] ? cpuidle_enter_state+0x130/0x2c0 +[ 109.564304] [] ? cpuidle_enter_state+0x120/0x2c0 +[ 109.564520] [] ? cpu_startup_entry+0x19f/0x1f0 +[ 109.564737] [] ? start_secondary+0x12a/0x140 +[ 109.564950] Code: 83 e2 20 a8 80 0f 84 60 01 00 00 c7 04 24 08 00 +00 00 66 85 d2 0f 84 be fe ff ff e9 69 fe ff ff 8b 34 24 89 f2 83 c2 +04 66 85 c0 <41> 8b 84 24 80 00 00 00 0f 49 d6 41 8d 31 01 d6 41 2b 84 +24 84 +[ 109.569959] RIP [] __skb_flow_dissect+0xa88/0xce0 +[ 109.570245] RSP +[ 109.570453] CR2: 0000000000000080 + +Fixes: ab10dccb1160 ("rps: Inspect PPTP encapsulated by GRE to get flow hash") +Signed-off-by: Ian Kumlien +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/flow_dissector.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -445,8 +445,9 @@ ip_proto_again: + if (hdr->flags & GRE_ACK) + offset += sizeof(((struct pptp_gre_header *)0)->ack); + +- ppp_hdr = skb_header_pointer(skb, nhoff + offset, +- sizeof(_ppp_hdr), _ppp_hdr); ++ ppp_hdr = __skb_header_pointer(skb, nhoff + offset, ++ sizeof(_ppp_hdr), ++ data, hlen, _ppp_hdr); + if (!ppp_hdr) + goto out_bad; + diff --git a/queue-4.9/gro-disable-frag0-optimization-on-ipv6-ext-headers.patch b/queue-4.9/gro-disable-frag0-optimization-on-ipv6-ext-headers.patch new file mode 100644 index 00000000000..588a1168531 --- /dev/null +++ b/queue-4.9/gro-disable-frag0-optimization-on-ipv6-ext-headers.patch @@ -0,0 +1,67 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Herbert Xu +Date: Tue, 10 Jan 2017 12:24:15 -0800 +Subject: gro: Disable frag0 optimization on IPv6 ext headers + +From: Herbert Xu + + +[ Upstream commit 57ea52a865144aedbcd619ee0081155e658b6f7d ] + +The GRO fast path caches the frag0 address. This address becomes +invalid if frag0 is modified by pskb_may_pull or its variants. +So whenever that happens we must disable the frag0 optimization. + +This is usually done through the combination of gro_header_hard +and gro_header_slow, however, the IPv6 extension header path did +the pulling directly and would continue to use the GRO fast path +incorrectly. + +This patch fixes it by disabling the fast path when we enter the +IPv6 extension header path. + +Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address") +Reported-by: Slava Shwartsman +Signed-off-by: Herbert Xu +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 9 +++++++-- + net/ipv6/ip6_offload.c | 1 + + 2 files changed, 8 insertions(+), 2 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -2502,14 +2502,19 @@ static inline int skb_gro_header_hard(st + return NAPI_GRO_CB(skb)->frag0_len < hlen; + } + ++static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) ++{ ++ NAPI_GRO_CB(skb)->frag0 = NULL; ++ NAPI_GRO_CB(skb)->frag0_len = 0; ++} ++ + static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) + { + if (!pskb_may_pull(skb, hlen)) + return NULL; + +- NAPI_GRO_CB(skb)->frag0 = NULL; +- NAPI_GRO_CB(skb)->frag0_len = 0; ++ skb_gro_frag0_invalidate(skb); + return skb->data + offset; + } + +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -191,6 +191,7 @@ static struct sk_buff **ipv6_gro_receive + ops = rcu_dereference(inet6_offloads[proto]); + if (!ops || !ops->callbacks.gro_receive) { + __pskb_pull(skb, skb_gro_offset(skb)); ++ skb_gro_frag0_invalidate(skb); + proto = ipv6_gso_pull_exthdrs(skb, proto); + skb_gro_pull(skb, -skb_transport_offset(skb)); + skb_reset_transport_header(skb); diff --git a/queue-4.9/gro-enter-slow-path-if-there-is-no-tailroom.patch b/queue-4.9/gro-enter-slow-path-if-there-is-no-tailroom.patch new file mode 100644 index 00000000000..5891b4f1d10 --- /dev/null +++ b/queue-4.9/gro-enter-slow-path-if-there-is-no-tailroom.patch @@ -0,0 +1,39 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Herbert Xu +Date: Tue, 10 Jan 2017 12:24:01 -0800 +Subject: gro: Enter slow-path if there is no tailroom + +From: Herbert Xu + + +[ Upstream commit 1272ce87fa017ca4cf32920764d879656b7a005a ] + +The GRO path has a fast-path where we avoid calling pskb_may_pull +and pskb_expand by directly accessing frag0. However, this should +only be done if we have enough tailroom in the skb as otherwise +we'll have to expand it later anyway. + +This patch adds the check by capping frag0_len with the skb tailroom. + +Fixes: cb18978cbf45 ("gro: Open-code final pskb_may_pull") +Reported-by: Slava Shwartsman +Signed-off-by: Herbert Xu +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4453,7 +4453,8 @@ static void skb_gro_reset_offset(struct + pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); +- NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); ++ NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0), ++ skb->end - skb->tail); + } + } + diff --git a/queue-4.9/gro-use-min_t-in-skb_gro_reset_offset.patch b/queue-4.9/gro-use-min_t-in-skb_gro_reset_offset.patch new file mode 100644 index 00000000000..af25dbb48ad --- /dev/null +++ b/queue-4.9/gro-use-min_t-in-skb_gro_reset_offset.patch @@ -0,0 +1,36 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Eric Dumazet +Date: Tue, 10 Jan 2017 19:52:43 -0800 +Subject: gro: use min_t() in skb_gro_reset_offset() + +From: Eric Dumazet + + +[ Upstream commit 7cfd5fd5a9813f1430290d20c0fead9b4582a307 ] + +On 32bit arches, (skb->end - skb->data) is not 'unsigned int', +so we shall use min_t() instead of min() to avoid a compiler error. + +Fixes: 1272ce87fa01 ("gro: Enter slow-path if there is no tailroom") +Reported-by: kernel test robot +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4453,8 +4453,9 @@ static void skb_gro_reset_offset(struct + pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); +- NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0), +- skb->end - skb->tail); ++ NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, ++ skb_frag_size(frag0), ++ skb->end - skb->tail); + } + } + diff --git a/queue-4.9/igmp-make-igmp-group-member-rfc-3376-compliant.patch b/queue-4.9/igmp-make-igmp-group-member-rfc-3376-compliant.patch new file mode 100644 index 00000000000..4c1dbf51564 --- /dev/null +++ b/queue-4.9/igmp-make-igmp-group-member-rfc-3376-compliant.patch @@ -0,0 +1,88 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Michal Tesar +Date: Mon, 2 Jan 2017 14:38:36 +0100 +Subject: igmp: Make igmp group member RFC 3376 compliant + +From: Michal Tesar + + +[ Upstream commit 7ababb782690e03b78657e27bd051e20163af2d6 ] + +5.2. Action on Reception of a Query + + When a system receives a Query, it does not respond immediately. + Instead, it delays its response by a random amount of time, bounded + by the Max Resp Time value derived from the Max Resp Code in the + received Query message. A system may receive a variety of Queries on + different interfaces and of different kinds (e.g., General Queries, + Group-Specific Queries, and Group-and-Source-Specific Queries), each + of which may require its own delayed response. + + Before scheduling a response to a Query, the system must first + consider previously scheduled pending responses and in many cases + schedule a combined response. Therefore, the system must be able to + maintain the following state: + + o A timer per interface for scheduling responses to General Queries. + + o A per-group and interface timer for scheduling responses to Group- + Specific and Group-and-Source-Specific Queries. + + o A per-group and interface list of sources to be reported in the + response to a Group-and-Source-Specific Query. + + When a new Query with the Router-Alert option arrives on an + interface, provided the system has state to report, a delay for a + response is randomly selected in the range (0, [Max Resp Time]) where + Max Resp Time is derived from Max Resp Code in the received Query + message. The following rules are then used to determine if a Report + needs to be scheduled and the type of Report to schedule. The rules + are considered in order and only the first matching rule is applied. + + 1. If there is a pending response to a previous General Query + scheduled sooner than the selected delay, no additional response + needs to be scheduled. + + 2. If the received Query is a General Query, the interface timer is + used to schedule a response to the General Query after the + selected delay. Any previously pending response to a General + Query is canceled. +--8<-- + +Currently the timer is rearmed with new random expiration time for +every incoming query regardless of possibly already pending report. +Which is not aligned with the above RFE. +It also might happen that higher rate of incoming queries can +postpone the report after the expiration time of the first query +causing group membership loss. + +Now the per interface general query timer is rearmed only +when there is no pending report already scheduled on that interface or +the newly selected expiration time is before the already pending +scheduled report. + +Signed-off-by: Michal Tesar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/igmp.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -219,9 +219,14 @@ static void igmp_start_timer(struct ip_m + static void igmp_gq_start_timer(struct in_device *in_dev) + { + int tv = prandom_u32() % in_dev->mr_maxdelay; ++ unsigned long exp = jiffies + tv + 2; ++ ++ if (in_dev->mr_gq_running && ++ time_after_eq(exp, (in_dev->mr_gq_timer).expires)) ++ return; + + in_dev->mr_gq_running = 1; +- if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) ++ if (!mod_timer(&in_dev->mr_gq_timer, exp)) + in_dev_hold(in_dev); + } + diff --git a/queue-4.9/inet-fix-ip-v6-_recvorigdstaddr-for-udp-sockets.patch b/queue-4.9/inet-fix-ip-v6-_recvorigdstaddr-for-udp-sockets.patch new file mode 100644 index 00000000000..a85d7478e1a --- /dev/null +++ b/queue-4.9/inet-fix-ip-v6-_recvorigdstaddr-for-udp-sockets.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Willem de Bruijn +Date: Thu, 22 Dec 2016 18:19:16 -0500 +Subject: inet: fix IP(V6)_RECVORIGDSTADDR for udp sockets + +From: Willem de Bruijn + + +[ Upstream commit 39b2dd765e0711e1efd1d1df089473a8dd93ad48 ] + +Socket cmsg IP(V6)_RECVORIGDSTADDR checks that port range lies within +the packet. For sockets that have transport headers pulled, transport +offset can be negative. Use signed comparison to avoid overflow. + +Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") +Reported-by: Nisar Jagabar +Signed-off-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_sockglue.c | 2 +- + net/ipv6/datagram.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -137,7 +137,7 @@ static void ip_cmsg_recv_dstaddr(struct + const struct iphdr *iph = ip_hdr(skb); + __be16 *ports = (__be16 *)skb_transport_header(skb); + +- if (skb_transport_offset(skb) + 4 > skb->len) ++ if (skb_transport_offset(skb) + 4 > (int)skb->len) + return; + + /* All current transport protocols have the port numbers in the +--- a/net/ipv6/datagram.c ++++ b/net/ipv6/datagram.c +@@ -700,7 +700,7 @@ void ip6_datagram_recv_specific_ctl(stru + struct sockaddr_in6 sin6; + __be16 *ports = (__be16 *) skb_transport_header(skb); + +- if (skb_transport_offset(skb) + 4 <= skb->len) { ++ if (skb_transport_offset(skb) + 4 <= (int)skb->len) { + /* All current transport protocols have the port numbers in the + * first four bytes of the transport header and this function is + * written with this assumption in mind. diff --git a/queue-4.9/ipv4-do-not-allow-main-to-be-alias-for-new-local-w-custom-rules.patch b/queue-4.9/ipv4-do-not-allow-main-to-be-alias-for-new-local-w-custom-rules.patch new file mode 100644 index 00000000000..7a89cd355ab --- /dev/null +++ b/queue-4.9/ipv4-do-not-allow-main-to-be-alias-for-new-local-w-custom-rules.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Alexander Duyck +Date: Mon, 2 Jan 2017 13:32:54 -0800 +Subject: ipv4: Do not allow MAIN to be alias for new LOCAL w/ custom rules + +From: Alexander Duyck + + +[ Upstream commit 5350d54f6cd12eaff623e890744c79b700bd3f17 ] + +In the case of custom rules being present we need to handle the case of the +LOCAL table being intialized after the new rule has been added. To address +that I am adding a new check so that we can make certain we don't use an +alias of MAIN for LOCAL when allocating a new table. + +Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") +Reported-by: Oliver Brunel +Signed-off-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_frontend.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -85,7 +85,7 @@ struct fib_table *fib_new_table(struct n + if (tb) + return tb; + +- if (id == RT_TABLE_LOCAL) ++ if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules) + alias = fib_new_table(net, RT_TABLE_MAIN); + + tb = fib_trie_table(id, alias); diff --git a/queue-4.9/ipv6-handle-efault-from-skb_copy_bits.patch b/queue-4.9/ipv6-handle-efault-from-skb_copy_bits.patch new file mode 100644 index 00000000000..cdd30a2671d --- /dev/null +++ b/queue-4.9/ipv6-handle-efault-from-skb_copy_bits.patch @@ -0,0 +1,82 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Dave Jones +Date: Thu, 22 Dec 2016 11:16:22 -0500 +Subject: ipv6: handle -EFAULT from skb_copy_bits + +From: Dave Jones + + +[ Upstream commit a98f91758995cb59611e61318dddd8a6956b52c3 ] + +By setting certain socket options on ipv6 raw sockets, we can confuse the +length calculation in rawv6_push_pending_frames triggering a BUG_ON. + +RIP: 0010:[] [] rawv6_sendmsg+0xc30/0xc40 +RSP: 0018:ffff881f6c4a7c18 EFLAGS: 00010282 +RAX: 00000000fffffff2 RBX: ffff881f6c681680 RCX: 0000000000000002 +RDX: ffff881f6c4a7cf8 RSI: 0000000000000030 RDI: ffff881fed0f6a00 +RBP: ffff881f6c4a7da8 R08: 0000000000000000 R09: 0000000000000009 +R10: ffff881fed0f6a00 R11: 0000000000000009 R12: 0000000000000030 +R13: ffff881fed0f6a00 R14: ffff881fee39ba00 R15: ffff881fefa93a80 + +Call Trace: + [] ? unmap_page_range+0x693/0x830 + [] inet_sendmsg+0x67/0xa0 + [] sock_sendmsg+0x38/0x50 + [] SYSC_sendto+0xef/0x170 + [] SyS_sendto+0xe/0x10 + [] do_syscall_64+0x50/0xa0 + [] entry_SYSCALL64_slow_path+0x25/0x25 + +Handle by jumping to the failure path if skb_copy_bits gets an EFAULT. + +Reproducer: + +#include +#include +#include +#include +#include +#include +#include + +#define LEN 504 + +int main(int argc, char* argv[]) +{ + int fd; + int zero = 0; + char buf[LEN]; + + memset(buf, 0, LEN); + + fd = socket(AF_INET6, SOCK_RAW, 7); + + setsockopt(fd, SOL_IPV6, IPV6_CHECKSUM, &zero, 4); + setsockopt(fd, SOL_IPV6, IPV6_DSTOPTS, &buf, LEN); + + sendto(fd, buf, 1, 0, (struct sockaddr *) buf, 110); +} + +Signed-off-by: Dave Jones +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/raw.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/net/ipv6/raw.c ++++ b/net/ipv6/raw.c +@@ -589,7 +589,11 @@ static int rawv6_push_pending_frames(str + } + + offset += skb_transport_offset(skb); +- BUG_ON(skb_copy_bits(skb, offset, &csum, 2)); ++ err = skb_copy_bits(skb, offset, &csum, 2); ++ if (err < 0) { ++ ip6_flush_pending_frames(sk); ++ goto out; ++ } + + /* in case cksum was not initialized */ + if (unlikely(csum)) diff --git a/queue-4.9/net-add-the-af_qipcrtr-entries-to-family-name-tables.patch b/queue-4.9/net-add-the-af_qipcrtr-entries-to-family-name-tables.patch new file mode 100644 index 00000000000..1f55ce85ba5 --- /dev/null +++ b/queue-4.9/net-add-the-af_qipcrtr-entries-to-family-name-tables.patch @@ -0,0 +1,53 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: "Anna, Suman" +Date: Mon, 9 Jan 2017 21:48:56 -0600 +Subject: net: add the AF_QIPCRTR entries to family name tables + +From: "Anna, Suman" + + +[ Upstream commit 5d722b3024f6762addb8642ffddc9f275b5107ae ] + +Commit bdabad3e363d ("net: Add Qualcomm IPC router") introduced a +new address family. Update the family name tables accordingly so +that the lockdep initialization can use the proper names for this +family. + +Cc: Courtney Cavin +Cc: Bjorn Andersson +Signed-off-by: Suman Anna +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -222,7 +222,7 @@ static const char *const af_family_key_s + "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , + "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , +- "sk_lock-AF_MAX" ++ "sk_lock-AF_QIPCRTR", "sk_lock-AF_MAX" + }; + static const char *const af_family_slock_key_strings[AF_MAX+1] = { + "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , +@@ -239,7 +239,7 @@ static const char *const af_family_slock + "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , + "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , +- "slock-AF_MAX" ++ "slock-AF_QIPCRTR", "slock-AF_MAX" + }; + static const char *const af_family_clock_key_strings[AF_MAX+1] = { + "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , +@@ -256,7 +256,7 @@ static const char *const af_family_clock + "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , + "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , +- "clock-AF_MAX" ++ "clock-AF_QIPCRTR", "clock-AF_MAX" + }; + + /* diff --git a/queue-4.9/net-dsa-bcm_sf2-do-not-clobber-b53_switch_ops.patch b/queue-4.9/net-dsa-bcm_sf2-do-not-clobber-b53_switch_ops.patch new file mode 100644 index 00000000000..3ee87c5590e --- /dev/null +++ b/queue-4.9/net-dsa-bcm_sf2-do-not-clobber-b53_switch_ops.patch @@ -0,0 +1,55 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Florian Fainelli +Date: Sat, 7 Jan 2017 21:01:56 -0800 +Subject: net: dsa: bcm_sf2: Do not clobber b53_switch_ops + +From: Florian Fainelli + + +[ Upstream commit a4c61b92b3a4cbda35bb0251a5063a68f0861b2c ] + +We make the bcm_sf2 driver override ds->ops which points to +b53_switch_ops since b53_switch_alloc() did the assignent. This is all +well and good until a second b53 switch comes in, and ends up using the +bcm_sf2 operations. Make a proper local copy, substitute the ds->ops +pointer and then override the operations. + +Fixes: f458995b9ad8 ("net: dsa: bcm_sf2: Utilize core B53 driver when possible") +Signed-off-by: Florian Fainelli +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -982,6 +982,7 @@ static int bcm_sf2_sw_probe(struct platf + const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME; + struct device_node *dn = pdev->dev.of_node; + struct b53_platform_data *pdata; ++ struct dsa_switch_ops *ops; + struct bcm_sf2_priv *priv; + struct b53_device *dev; + struct dsa_switch *ds; +@@ -995,6 +996,10 @@ static int bcm_sf2_sw_probe(struct platf + if (!priv) + return -ENOMEM; + ++ ops = devm_kzalloc(&pdev->dev, sizeof(*ops), GFP_KERNEL); ++ if (!ops) ++ return -ENOMEM; ++ + dev = b53_switch_alloc(&pdev->dev, &bcm_sf2_io_ops, priv); + if (!dev) + return -ENOMEM; +@@ -1014,6 +1019,8 @@ static int bcm_sf2_sw_probe(struct platf + ds = dev->ds; + + /* Override the parts that are non-standard wrt. normal b53 devices */ ++ memcpy(ops, ds->ops, sizeof(*ops)); ++ ds->ops = ops; + ds->ops->get_tag_protocol = bcm_sf2_sw_get_tag_protocol; + ds->ops->setup = bcm_sf2_sw_setup; + ds->ops->get_phy_flags = bcm_sf2_sw_get_phy_flags; diff --git a/queue-4.9/net-dsa-bcm_sf2-utilize-nested-mdio-read-write.patch b/queue-4.9/net-dsa-bcm_sf2-utilize-nested-mdio-read-write.patch new file mode 100644 index 00000000000..7514fde531f --- /dev/null +++ b/queue-4.9/net-dsa-bcm_sf2-utilize-nested-mdio-read-write.patch @@ -0,0 +1,42 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Florian Fainelli +Date: Sat, 7 Jan 2017 21:01:57 -0800 +Subject: net: dsa: bcm_sf2: Utilize nested MDIO read/write + +From: Florian Fainelli + + +[ Upstream commit 2cfe8f8290bd28cf1ee67db914a6e76cf8e6437b ] + +We are implementing a MDIO bus which is behind another one, so use the +nested version of the accessors to get lockdep annotations correct. + +Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus") +Signed-off-by: Florian Fainelli +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -393,7 +393,7 @@ static int bcm_sf2_sw_mdio_read(struct m + if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr)) + return bcm_sf2_sw_indir_rw(priv, 1, addr, regnum, 0); + else +- return mdiobus_read(priv->master_mii_bus, addr, regnum); ++ return mdiobus_read_nested(priv->master_mii_bus, addr, regnum); + } + + static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum, +@@ -407,7 +407,7 @@ static int bcm_sf2_sw_mdio_write(struct + if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr)) + bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val); + else +- mdiobus_write(priv->master_mii_bus, addr, regnum, val); ++ mdiobus_write_nested(priv->master_mii_bus, addr, regnum, val); + + return 0; + } diff --git a/queue-4.9/net-dsa-ensure-validity-of-dst-ds.patch b/queue-4.9/net-dsa-ensure-validity-of-dst-ds.patch new file mode 100644 index 00000000000..96fff44b162 --- /dev/null +++ b/queue-4.9/net-dsa-ensure-validity-of-dst-ds.patch @@ -0,0 +1,51 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Florian Fainelli +Date: Mon, 9 Jan 2017 11:58:34 -0800 +Subject: net: dsa: Ensure validity of dst->ds[0] + +From: Florian Fainelli + + +[ Upstream commit faf3a932fbeb77860226a8323eacb835edc98648 ] + +It is perfectly possible to have non zero indexed switches being present +in a DSA switch tree, in such a case, we will be deferencing a NULL +pointer while dsa_cpu_port_ethtool_{setup,restore}. Be more defensive +and ensure that dst->ds[0] is valid before doing anything with it. + +Fixes: 0c73c523cf73 ("net: dsa: Initialize CPU port ethtool ops per tree") +Signed-off-by: Florian Fainelli +Reviewed-by: Vivien Didelot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/dsa2.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/dsa/dsa2.c ++++ b/net/dsa/dsa2.c +@@ -394,9 +394,11 @@ static int dsa_dst_apply(struct dsa_swit + return err; + } + +- err = dsa_cpu_port_ethtool_setup(dst->ds[0]); +- if (err) +- return err; ++ if (dst->ds[0]) { ++ err = dsa_cpu_port_ethtool_setup(dst->ds[0]); ++ if (err) ++ return err; ++ } + + /* If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get +@@ -433,7 +435,8 @@ static void dsa_dst_unapply(struct dsa_s + dsa_ds_unapply(dst, ds); + } + +- dsa_cpu_port_ethtool_restore(dst->ds[0]); ++ if (dst->ds[0]) ++ dsa_cpu_port_ethtool_restore(dst->ds[0]); + + pr_info("DSA: tree %d unapplied\n", dst->tree); + dst->applied = false; diff --git a/queue-4.9/net-fix-incorrect-original-ingress-device-index-in-pktinfo.patch b/queue-4.9/net-fix-incorrect-original-ingress-device-index-in-pktinfo.patch new file mode 100644 index 00000000000..36978fb9f9a --- /dev/null +++ b/queue-4.9/net-fix-incorrect-original-ingress-device-index-in-pktinfo.patch @@ -0,0 +1,47 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Wei Zhang +Date: Thu, 29 Dec 2016 16:45:04 +0800 +Subject: net: fix incorrect original ingress device index in PKTINFO + +From: Wei Zhang + + +[ Upstream commit f0c16ba8933ed217c2688b277410b2a37ba81591 ] + +When we send a packet for our own local address on a non-loopback +interface (e.g. eth0), due to the change had been introduced from +commit 0b922b7a829c ("net: original ingress device index in PKTINFO"), the +original ingress device index would be set as the loopback interface. +However, the packet should be considered as if it is being arrived via the +sending interface (eth0), otherwise it would break the expectation of the +userspace application (e.g. the DHCPRELEASE message from dhcp_release +binary would be ignored by the dnsmasq daemon, since it come from lo which +is not the interface dnsmasq bind to) + +Fixes: 0b922b7a829c ("net: original ingress device index in PKTINFO") +Acked-by: David Ahern +Signed-off-by: Wei Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_sockglue.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -1202,8 +1202,14 @@ void ipv4_pktinfo_prepare(const struct s + * which has interface index (iif) as the first member of the + * underlying inet{6}_skb_parm struct. This code then overlays + * PKTINFO_SKB_CB and in_pktinfo also has iif as the first +- * element so the iif is picked up from the prior IPCB ++ * element so the iif is picked up from the prior IPCB. If iif ++ * is the loopback interface, then return the sending interface ++ * (e.g., process binds socket to eth0 for Tx which is ++ * redirected to loopback in the rtable/dst). + */ ++ if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) ++ pktinfo->ipi_ifindex = inet_iif(skb); ++ + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); + } else { + pktinfo->ipi_ifindex = 0; diff --git a/queue-4.9/net-ipv4-dst-for-local-input-routes-should-use-l3mdev-if-relevant.patch b/queue-4.9/net-ipv4-dst-for-local-input-routes-should-use-l3mdev-if-relevant.patch new file mode 100644 index 00000000000..d3acb9767aa --- /dev/null +++ b/queue-4.9/net-ipv4-dst-for-local-input-routes-should-use-l3mdev-if-relevant.patch @@ -0,0 +1,39 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: David Ahern +Date: Thu, 29 Dec 2016 15:29:03 -0800 +Subject: net: ipv4: dst for local input routes should use l3mdev if relevant + +From: David Ahern + + +[ Upstream commit f5a0aab84b74de68523599817569c057c7ac1622 ] + +IPv4 output routes already use l3mdev device instead of loopback for dst's +if it is applicable. Change local input routes to do the same. + +This fixes icmp responses for unreachable UDP ports which are directed +to the wrong table after commit 9d1a6c4ea43e4 because local_input +routes use the loopback device. Moving from ingress device to loopback +loses the L3 domain causing responses based on the dst to get to lost. + +Fixes: 9d1a6c4ea43e4 ("net: icmp_route_lookup should use rt dev to + determine L3 domain") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1902,7 +1902,8 @@ local_input: + } + } + +- rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type, ++ rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, ++ flags | RTCF_LOCAL, res.type, + IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); + if (!rth) + goto e_nobufs; diff --git a/queue-4.9/net-ipv4-fix-multipath-selection-with-vrf.patch b/queue-4.9/net-ipv4-fix-multipath-selection-with-vrf.patch new file mode 100644 index 00000000000..21fd1ce53f2 --- /dev/null +++ b/queue-4.9/net-ipv4-fix-multipath-selection-with-vrf.patch @@ -0,0 +1,65 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: David Ahern +Date: Tue, 10 Jan 2017 14:37:35 -0800 +Subject: net: ipv4: Fix multipath selection with vrf + +From: David Ahern + + +[ Upstream commit 7a18c5b9fb31a999afc62b0e60978aa896fc89e9 ] + +fib_select_path does not call fib_select_multipath if oif is set in the +flow struct. For VRF use cases oif is always set, so multipath route +selection is bypassed. Use the FLOWI_FLAG_SKIP_NH_OIF to skip the oif +check similar to what is done in fib_table_lookup. + +Add saddr and proto to the flow struct for the fib lookup done by the +VRF driver to better match hash computation for a flow. + +Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 2 ++ + net/ipv4/fib_semantics.c | 9 +++++++-- + 2 files changed, 9 insertions(+), 2 deletions(-) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -263,7 +263,9 @@ static netdev_tx_t vrf_process_v4_outbou + .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, ++ .flowi4_proto = ip4h->protocol, + .daddr = ip4h->daddr, ++ .saddr = ip4h->saddr, + }; + struct net *net = dev_net(vrf_dev); + struct rtable *rt; +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1617,8 +1617,13 @@ void fib_select_multipath(struct fib_res + void fib_select_path(struct net *net, struct fib_result *res, + struct flowi4 *fl4, int mp_hash) + { ++ bool oif_check; ++ ++ oif_check = (fl4->flowi4_oif == 0 || ++ fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF); ++ + #ifdef CONFIG_IP_ROUTE_MULTIPATH +- if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { ++ if (res->fi->fib_nhs > 1 && oif_check) { + if (mp_hash < 0) + mp_hash = get_hash_from_flowi4(fl4) >> 1; + +@@ -1628,7 +1633,7 @@ void fib_select_path(struct net *net, st + #endif + if (!res->prefixlen && + res->table->tb_num_default > 1 && +- res->type == RTN_UNICAST && !fl4->flowi4_oif) ++ res->type == RTN_UNICAST && oif_check) + fib_select_default(fl4, res); + + if (!fl4->saddr) diff --git a/queue-4.9/net-mlx5-avoid-shadowing-numa_node.patch b/queue-4.9/net-mlx5-avoid-shadowing-numa_node.patch new file mode 100644 index 00000000000..2c22e26d865 --- /dev/null +++ b/queue-4.9/net-mlx5-avoid-shadowing-numa_node.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Eli Cohen +Date: Wed, 28 Dec 2016 14:58:34 +0200 +Subject: net/mlx5: Avoid shadowing numa_node + +From: Eli Cohen + + +[ Upstream commit d151d73dcc99de87c63bdefebcc4cb69de1cdc40 ] + +Avoid using a local variable named numa_node to avoid shadowing a public +one. + +Fixes: db058a186f98 ('net/mlx5_core: Set irq affinity hints') +Signed-off-by: Eli Cohen +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -547,7 +547,6 @@ static int mlx5_irq_set_affinity_hint(st + struct mlx5_priv *priv = &mdev->priv; + struct msix_entry *msix = priv->msix_arr; + int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; +- int numa_node = priv->numa_node; + int err; + + if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { +@@ -555,7 +554,7 @@ static int mlx5_irq_set_affinity_hint(st + return -ENOMEM; + } + +- cpumask_set_cpu(cpumask_local_spread(i, numa_node), ++ cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), + priv->irq_info[i].mask); + + err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); diff --git a/queue-4.9/net-mlx5-cancel-recovery-work-in-remove-flow.patch b/queue-4.9/net-mlx5-cancel-recovery-work-in-remove-flow.patch new file mode 100644 index 00000000000..57bb72017fe --- /dev/null +++ b/queue-4.9/net-mlx5-cancel-recovery-work-in-remove-flow.patch @@ -0,0 +1,45 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Daniel Jurgens +Date: Wed, 28 Dec 2016 14:58:33 +0200 +Subject: net/mlx5: Cancel recovery work in remove flow + +From: Daniel Jurgens + + +[ Upstream commit 689a248df83b6032edc57e86267b4e5cc8d7174e ] + +If there is pending delayed work for health recovery it must be canceled +if the device is being unloaded. + +Fixes: 05ac2c0b7438 ("net/mlx5: Fix race between PCI error handlers and health work") +Signed-off-by: Daniel Jurgens +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1159,6 +1159,8 @@ static int mlx5_unload_one(struct mlx5_c + { + int err = 0; + ++ mlx5_drain_health_wq(dev); ++ + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { + dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", +@@ -1319,10 +1321,9 @@ static pci_ers_result_t mlx5_pci_err_det + + mlx5_enter_error_state(dev); + mlx5_unload_one(dev, priv, false); +- /* In case of kernel call save the pci state and drain health wq */ ++ /* In case of kernel call save the pci state */ + if (state) { + pci_save_state(pdev); +- mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); + } + diff --git a/queue-4.9/net-mlx5-check-fw-limitations-on-log_max_qp-before-setting-it.patch b/queue-4.9/net-mlx5-check-fw-limitations-on-log_max_qp-before-setting-it.patch new file mode 100644 index 00000000000..50170cdd62e --- /dev/null +++ b/queue-4.9/net-mlx5-check-fw-limitations-on-log_max_qp-before-setting-it.patch @@ -0,0 +1,38 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Noa Osherovich +Date: Wed, 28 Dec 2016 14:58:32 +0200 +Subject: net/mlx5: Check FW limitations on log_max_qp before setting it + +From: Noa Osherovich + + +[ Upstream commit 883371c453b937f9eb581fb4915210865982736f ] + +When setting HCA capabilities, set log_max_qp to be the minimum +between the selected profile's value and the HCA limitation. + +Fixes: 938fe83c8dcb ('net/mlx5_core: New device capabilities...') +Signed-off-by: Noa Osherovich +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -468,6 +468,13 @@ static int handle_hca_cap(struct mlx5_co + MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, + to_fw_pkey_sz(dev, 128)); + ++ /* Check log_max_qp from HCA caps to set in current profile */ ++ if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { ++ mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", ++ profile[prof_sel].log_max_qp, ++ MLX5_CAP_GEN_MAX(dev, log_max_qp)); ++ profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); ++ } + if (prof->mask & MLX5_PROF_MASK_QP_SIZE) + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, + prof->log_max_qp); diff --git a/queue-4.9/net-mlx5-mask-destination-mac-value-in-ethtool-steering-rules.patch b/queue-4.9/net-mlx5-mask-destination-mac-value-in-ethtool-steering-rules.patch new file mode 100644 index 00000000000..0b700e255dc --- /dev/null +++ b/queue-4.9/net-mlx5-mask-destination-mac-value-in-ethtool-steering-rules.patch @@ -0,0 +1,32 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Maor Gottlieb +Date: Wed, 28 Dec 2016 14:58:35 +0200 +Subject: net/mlx5: Mask destination mac value in ethtool steering rules + +From: Maor Gottlieb + + +[ Upstream commit 077b1e8069b9b74477b01d28f6b83774dc19a142 ] + +We need to mask the destination mac value with the destination mac +mask when adding steering rule via ethtool. + +Fixes: 1174fce8d1410 ('net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering') +Signed-off-by: Maor Gottlieb +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +@@ -247,6 +247,7 @@ static int set_flow_attrs(u32 *match_c, + } + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) { ++ mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_c, dmac_47_16), + fs->m_ext.h_dest); diff --git a/queue-4.9/net-mlx5-prevent-setting-multicast-macs-for-vfs.patch b/queue-4.9/net-mlx5-prevent-setting-multicast-macs-for-vfs.patch new file mode 100644 index 00000000000..ab1eea7faf4 --- /dev/null +++ b/queue-4.9/net-mlx5-prevent-setting-multicast-macs-for-vfs.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Mohamad Haj Yahia +Date: Wed, 28 Dec 2016 14:58:37 +0200 +Subject: net/mlx5: Prevent setting multicast macs for VFs + +From: Mohamad Haj Yahia + + +[ Upstream commit ccce1700263d8b5b219359d04180492a726cea16 ] + +Need to check that VF mac address entered by the admin user is either +zero or unicast mac. +Multicast mac addresses are prohibited. + +Fixes: 77256579c6b4 ('net/mlx5: E-Switch, Introduce Vport administration functions') +Signed-off-by: Mohamad Haj Yahia +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -1703,7 +1703,7 @@ int mlx5_eswitch_set_vport_mac(struct ml + + if (!ESW_ALLOWED(esw)) + return -EPERM; +- if (!LEGAL_VPORT(esw, vport)) ++ if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) + return -EINVAL; + + mutex_lock(&esw->state_lock); diff --git a/queue-4.9/net-mlx5e-disable-netdev-after-close.patch b/queue-4.9/net-mlx5e-disable-netdev-after-close.patch new file mode 100644 index 00000000000..6abef79c5fe --- /dev/null +++ b/queue-4.9/net-mlx5e-disable-netdev-after-close.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Saeed Mahameed +Date: Wed, 28 Dec 2016 14:58:42 +0200 +Subject: net/mlx5e: Disable netdev after close + +From: Saeed Mahameed + + +[ Upstream commit 37f304d10030bb425c19099e7b955d9c3ec4cba3 ] + +Disable netdev should come after it was closed, although no harm of doing it +before -hence the MLX5E_STATE_DESTROYING bit- but it is more natural this way. + +Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") +Signed-off-by: Saeed Mahameed +Reviewed-by: Mohamad Haj Yahia +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -3942,10 +3942,6 @@ void mlx5e_detach_netdev(struct mlx5_cor + const struct mlx5e_profile *profile = priv->profile; + + set_bit(MLX5E_STATE_DESTROYING, &priv->state); +- if (profile->disable) +- profile->disable(priv); +- +- flush_workqueue(priv->wq); + + rtnl_lock(); + if (netif_running(netdev)) +@@ -3953,6 +3949,10 @@ void mlx5e_detach_netdev(struct mlx5_cor + netif_device_detach(netdev); + rtnl_unlock(); + ++ if (profile->disable) ++ profile->disable(priv); ++ flush_workqueue(priv->wq); ++ + mlx5e_destroy_q_counter(priv); + profile->cleanup_rx(priv); + mlx5e_close_drop_rq(priv); diff --git a/queue-4.9/net-mlx5e-don-t-sync-netdev-state-when-not-registered.patch b/queue-4.9/net-mlx5e-don-t-sync-netdev-state-when-not-registered.patch new file mode 100644 index 00000000000..7c4020dba48 --- /dev/null +++ b/queue-4.9/net-mlx5e-don-t-sync-netdev-state-when-not-registered.patch @@ -0,0 +1,66 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Saeed Mahameed +Date: Wed, 28 Dec 2016 14:58:41 +0200 +Subject: net/mlx5e: Don't sync netdev state when not registered + +From: Saeed Mahameed + + +[ Upstream commit 610e89e05c3f28a7394935aa6b91f99548c4fd3c ] + +Skip setting netdev vxlan ports and netdev rx_mode on driver load +when netdev is not yet registered. + +Synchronizing with netdev state is needed only on reset flow where the +netdev remains registered for the whole reset period. + +This also fixes an access before initialization of net_device.addr_list_lock +- which for some reason initialized on register_netdev - where we queued +set_rx_mode work on driver load before netdev registration. + +Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") +Signed-off-by: Saeed Mahameed +Reported-by: Sebastian Ott +Reviewed-by: Mohamad Haj Yahia +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -3773,14 +3773,7 @@ static void mlx5e_nic_enable(struct mlx5 + + mlx5_lag_add(mdev, netdev); + +- if (mlx5e_vxlan_allowed(mdev)) { +- rtnl_lock(); +- udp_tunnel_get_rx_info(netdev); +- rtnl_unlock(); +- } +- + mlx5e_enable_async_events(priv); +- queue_work(priv->wq, &priv->set_rx_mode_work); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); +@@ -3790,6 +3783,18 @@ static void mlx5e_nic_enable(struct mlx5 + rep.priv_data = priv; + mlx5_eswitch_register_vport_rep(esw, 0, &rep); + } ++ ++ if (netdev->reg_state != NETREG_REGISTERED) ++ return; ++ ++ /* Device already registered: sync netdev system state */ ++ if (mlx5e_vxlan_allowed(mdev)) { ++ rtnl_lock(); ++ udp_tunnel_get_rx_info(netdev); ++ rtnl_unlock(); ++ } ++ ++ queue_work(priv->wq, &priv->set_rx_mode_work); + } + + static void mlx5e_nic_disable(struct mlx5e_priv *priv) diff --git a/queue-4.9/net-mlx5e-remove-warn_once-from-adaptive-moderation-code.patch b/queue-4.9/net-mlx5e-remove-warn_once-from-adaptive-moderation-code.patch new file mode 100644 index 00000000000..713bc17ae60 --- /dev/null +++ b/queue-4.9/net-mlx5e-remove-warn_once-from-adaptive-moderation-code.patch @@ -0,0 +1,64 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Gil Rockah +Date: Tue, 10 Jan 2017 22:33:38 +0200 +Subject: net/mlx5e: Remove WARN_ONCE from adaptive moderation code + +From: Gil Rockah + + +[ Upstream commit 0bbcc0a8fc394d01988fe0263ccf7fddb77a12c3 ] + +When trying to do interface down or changing interface configuration +under heavy traffic, some of the adaptive moderation corner cases can +occur and leave a WARN_ONCE call trace in the kernel log. + +Those WARN_ONCE are meant for debug only, and should have been inserted +only under debug. We avoid such call traces by removing those WARN_ONCE. + +Fixes: cb3c7fd4f839 ("net/mlx5e: Support adaptive RX coalescing") +Signed-off-by: Gil Rockah +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +@@ -109,7 +109,6 @@ static bool mlx5e_am_on_top(struct mlx5e + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: +- WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n"); + return true; + case MLX5E_AM_GOING_RIGHT: + return (am->steps_left > 1) && (am->steps_right == 1); +@@ -123,7 +122,6 @@ static void mlx5e_am_turn(struct mlx5e_r + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: +- WARN_ONCE(true, "mlx5e_am_turn: PARKING\n"); + break; + case MLX5E_AM_GOING_RIGHT: + am->tune_state = MLX5E_AM_GOING_LEFT; +@@ -144,7 +142,6 @@ static int mlx5e_am_step(struct mlx5e_rx + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: +- WARN_ONCE(true, "mlx5e_am_step: PARKING\n"); + break; + case MLX5E_AM_GOING_RIGHT: + if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1)) +@@ -282,10 +279,8 @@ static void mlx5e_am_calc_stats(struct m + u32 delta_us = ktime_us_delta(end->time, start->time); + unsigned int npkts = end->pkt_ctr - start->pkt_ctr; + +- if (!delta_us) { +- WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n"); ++ if (!delta_us) + return; +- } + + curr_stats->ppms = (npkts * USEC_PER_MSEC) / delta_us; + curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us; diff --git a/queue-4.9/net-sched-cls_flower-fix-missing-addr_type-in-classify.patch b/queue-4.9/net-sched-cls_flower-fix-missing-addr_type-in-classify.patch new file mode 100644 index 00000000000..85949a8953d --- /dev/null +++ b/queue-4.9/net-sched-cls_flower-fix-missing-addr_type-in-classify.patch @@ -0,0 +1,42 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Paul Blakey +Date: Wed, 28 Dec 2016 14:54:47 +0200 +Subject: net/sched: cls_flower: Fix missing addr_type in classify + +From: Paul Blakey + + +[ Upstream commit 0df0f207aab4f42e5c96a807adf9a6845b69e984 ] + +Since we now use a non zero mask on addr_type, we are matching on its +value (IPV4/IPV6). So before this fix, matching on enc_src_ip/enc_dst_ip +failed in SW/classify path since its value was zero. +This patch sets the proper value of addr_type for encapsulated packets. + +Fixes: 970bfcd09791 ('net/sched: cls_flower: Use mask for addr_type') +Signed-off-by: Paul Blakey +Reviewed-by: Hadar Hen Zion +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_flower.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/sched/cls_flower.c ++++ b/net/sched/cls_flower.c +@@ -149,10 +149,14 @@ static int fl_classify(struct sk_buff *s + + switch (ip_tunnel_info_af(info)) { + case AF_INET: ++ skb_key.enc_control.addr_type = ++ FLOW_DISSECTOR_KEY_IPV4_ADDRS; + skb_key.enc_ipv4.src = key->u.ipv4.src; + skb_key.enc_ipv4.dst = key->u.ipv4.dst; + break; + case AF_INET6: ++ skb_key.enc_control.addr_type = ++ FLOW_DISSECTOR_KEY_IPV6_ADDRS; + skb_key.enc_ipv6.src = key->u.ipv6.src; + skb_key.enc_ipv6.dst = key->u.ipv6.dst; + break; diff --git a/queue-4.9/net-sched-fix-soft-lockup-in-tc_classify.patch b/queue-4.9/net-sched-fix-soft-lockup-in-tc_classify.patch new file mode 100644 index 00000000000..2e754c59b29 --- /dev/null +++ b/queue-4.9/net-sched-fix-soft-lockup-in-tc_classify.patch @@ -0,0 +1,81 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Daniel Borkmann +Date: Wed, 21 Dec 2016 18:04:11 +0100 +Subject: net, sched: fix soft lockup in tc_classify + +From: Daniel Borkmann + + +[ Upstream commit 628185cfddf1dfb701c4efe2cfd72cf5b09f5702 ] + +Shahar reported a soft lockup in tc_classify(), where we run into an +endless loop when walking the classifier chain due to tp->next == tp +which is a state we should never run into. The issue only seems to +trigger under load in the tc control path. + +What happens is that in tc_ctl_tfilter(), thread A allocates a new +tp, initializes it, sets tp_created to 1, and calls into tp->ops->change() +with it. In that classifier callback we had to unlock/lock the rtnl +mutex and returned with -EAGAIN. One reason why we need to drop there +is, for example, that we need to request an action module to be loaded. + +This happens via tcf_exts_validate() -> tcf_action_init/_1() meaning +after we loaded and found the requested action, we need to redo the +whole request so we don't race against others. While we had to unlock +rtnl in that time, thread B's request was processed next on that CPU. +Thread B added a new tp instance successfully to the classifier chain. +When thread A returned grabbing the rtnl mutex again, propagating -EAGAIN +and destroying its tp instance which never got linked, we goto replay +and redo A's request. + +This time when walking the classifier chain in tc_ctl_tfilter() for +checking for existing tp instances we had a priority match and found +the tp instance that was created and linked by thread B. Now calling +again into tp->ops->change() with that tp was successful and returned +without error. + +tp_created was never cleared in the second round, thus kernel thinks +that we need to link it into the classifier chain (once again). tp and +*back point to the same object due to the match we had earlier on. Thus +for thread B's already public tp, we reset tp->next to tp itself and +link it into the chain, which eventually causes the mentioned endless +loop in tc_classify() once a packet hits the data path. + +Fix is to clear tp_created at the beginning of each request, also when +we replay it. On the paths that can cause -EAGAIN we already destroy +the original tp instance we had and on replay we really need to start +from scratch. It seems that this issue was first introduced in commit +12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining +and avoid kernel panic when we use cls_cgroup"). + +Fixes: 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup") +Reported-by: Shahar Klein +Signed-off-by: Daniel Borkmann +Cc: Cong Wang +Acked-by: Eric Dumazet +Tested-by: Shahar Klein +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_api.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff + unsigned long cl; + unsigned long fh; + int err; +- int tp_created = 0; ++ int tp_created; + + if ((n->nlmsg_type != RTM_GETTFILTER) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + replay: ++ tp_created = 0; ++ + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); + if (err < 0) + return err; diff --git a/queue-4.9/net-stmmac-fix-race-between-stmmac_drv_probe-and-stmmac_open.patch b/queue-4.9/net-stmmac-fix-race-between-stmmac_drv_probe-and-stmmac_open.patch new file mode 100644 index 00000000000..285729b9fdb --- /dev/null +++ b/queue-4.9/net-stmmac-fix-race-between-stmmac_drv_probe-and-stmmac_open.patch @@ -0,0 +1,94 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Florian Fainelli +Date: Tue, 27 Dec 2016 18:23:06 -0800 +Subject: net: stmmac: Fix race between stmmac_drv_probe and stmmac_open + +From: Florian Fainelli + + +[ Upstream commit 5701659004d68085182d2fd4199c79172165fa65 ] + +There is currently a small window during which the network device registered by +stmmac can be made visible, yet all resources, including and clock and MDIO bus +have not had a chance to be set up, this can lead to the following error to +occur: + +[ 473.919358] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized): + stmmac_dvr_probe: warning: cannot get CSR clock +[ 473.919382] stmmaceth 0000:01:00.0: no reset control found +[ 473.919412] stmmac - user ID: 0x10, Synopsys ID: 0x42 +[ 473.919429] stmmaceth 0000:01:00.0: DMA HW capability register supported +[ 473.919436] stmmaceth 0000:01:00.0: RX Checksum Offload Engine supported +[ 473.919443] stmmaceth 0000:01:00.0: TX Checksum insertion supported +[ 473.919451] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized): + Enable RX Mitigation via HW Watchdog Timer +[ 473.921395] libphy: PHY stmmac-1:00 not found +[ 473.921417] stmmaceth 0000:01:00.0 eth0: Could not attach to PHY +[ 473.921427] stmmaceth 0000:01:00.0 eth0: stmmac_open: Cannot attach to + PHY (error: -19) +[ 473.959710] libphy: stmmac: probed +[ 473.959724] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 0 IRQ POLL + (stmmac-1:00) active +[ 473.959728] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 1 IRQ POLL + (stmmac-1:01) +[ 473.959731] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 2 IRQ POLL + (stmmac-1:02) +[ 473.959734] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 3 IRQ POLL + (stmmac-1:03) + +Fix this by making sure that register_netdev() is the last thing being done, +which guarantees that the clock and the MDIO bus are available. + +Fixes: 4bfcbd7abce2 ("stmmac: Move the mdio_register/_unregister in probe/remove") +Reported-by: Kweh, Hock Leong +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 23 ++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -3349,12 +3349,6 @@ int stmmac_dvr_probe(struct device *devi + spin_lock_init(&priv->lock); + spin_lock_init(&priv->tx_lock); + +- ret = register_netdev(ndev); +- if (ret) { +- pr_err("%s: ERROR %i registering the device\n", __func__, ret); +- goto error_netdev_register; +- } +- + /* If a specific clk_csr value is passed from the platform + * this means that the CSR Clock Range selection cannot be + * changed at run-time and it is fixed. Viceversa the driver'll try to +@@ -3376,15 +3370,24 @@ int stmmac_dvr_probe(struct device *devi + if (ret < 0) { + pr_debug("%s: MDIO bus (id: %d) registration failed", + __func__, priv->plat->bus_id); +- goto error_mdio_register; ++ goto error_napi_register; + } + } + +- return 0; ++ ret = register_netdev(ndev); ++ if (ret) { ++ pr_err("%s: ERROR %i registering the device\n", __func__, ret); ++ goto error_netdev_register; ++ } ++ ++ return ret; + +-error_mdio_register: +- unregister_netdev(ndev); + error_netdev_register: ++ if (priv->hw->pcs != STMMAC_PCS_RGMII && ++ priv->hw->pcs != STMMAC_PCS_TBI && ++ priv->hw->pcs != STMMAC_PCS_RTBI) ++ stmmac_mdio_unregister(ndev); ++error_napi_register: + netif_napi_del(&priv->napi); + error_hw_init: + clk_disable_unprepare(priv->pclk); diff --git a/queue-4.9/net-vrf-add-missing-rx-counters.patch b/queue-4.9/net-vrf-add-missing-rx-counters.patch new file mode 100644 index 00000000000..984446a5ff0 --- /dev/null +++ b/queue-4.9/net-vrf-add-missing-rx-counters.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: David Ahern +Date: Tue, 3 Jan 2017 09:37:55 -0800 +Subject: net: vrf: Add missing Rx counters + +From: David Ahern + + +[ Upstream commit 926d93a33e59b2729afdbad357233c17184de9d2 ] + +The move from rx-handler to L3 receive handler inadvertantly dropped the +rx counters. Restore them. + +Fixes: 74b20582ac38 ("net: l3mdev: Add hook in ip and ipv6") +Reported-by: Dinesh Dutt +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -968,6 +968,7 @@ static struct sk_buff *vrf_ip6_rcv(struc + */ + need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); + if (!ipv6_ndisc_frame(skb) && !need_strict) { ++ vrf_rx_stats(vrf_dev, skb->len); + skb->dev = vrf_dev; + skb->skb_iif = vrf_dev->ifindex; + +@@ -1009,6 +1010,8 @@ static struct sk_buff *vrf_ip_rcv(struct + goto out; + } + ++ vrf_rx_stats(vrf_dev, skb->len); ++ + skb_push(skb, skb->mac_len); + dev_queue_xmit_nit(skb, vrf_dev); + skb_pull(skb, skb->mac_len); diff --git a/queue-4.9/net-vrf-do-not-allow-table-id-0.patch b/queue-4.9/net-vrf-do-not-allow-table-id-0.patch new file mode 100644 index 00000000000..9c12b3f85e9 --- /dev/null +++ b/queue-4.9/net-vrf-do-not-allow-table-id-0.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: David Ahern +Date: Tue, 10 Jan 2017 15:22:25 -0800 +Subject: net: vrf: do not allow table id 0 + +From: David Ahern + + +[ Upstream commit 24c63bbc18e25d5d8439422aa5fd2d66390b88eb ] + +Frank reported that vrf devices can be created with a table id of 0. +This breaks many of the run time table id checks and should not be +allowed. Detect this condition at create time and fail with EINVAL. + +Fixes: 193125dbd8eb ("net: Introduce VRF device driver") +Reported-by: Frank Kellermann +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -1239,6 +1239,8 @@ static int vrf_newlink(struct net *src_n + return -EINVAL; + + vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]); ++ if (vrf->tb_id == RT_TABLE_UNSPEC) ++ return -EINVAL; + + dev->priv_flags |= IFF_L3MDEV_MASTER; + diff --git a/queue-4.9/net-vrf-drop-conntrack-data-after-pass-through-vrf-device-on-tx.patch b/queue-4.9/net-vrf-drop-conntrack-data-after-pass-through-vrf-device-on-tx.patch new file mode 100644 index 00000000000..54459c7ecc2 --- /dev/null +++ b/queue-4.9/net-vrf-drop-conntrack-data-after-pass-through-vrf-device-on-tx.patch @@ -0,0 +1,57 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: David Ahern +Date: Wed, 14 Dec 2016 14:31:11 -0800 +Subject: net: vrf: Drop conntrack data after pass through VRF device on Tx + +From: David Ahern + + +[ Upstream commit eb63ecc1706b3e094d0f57438b6c2067cfc299f2 ] + +Locally originated traffic in a VRF fails in the presence of a POSTROUTING +rule. For example, + + $ iptables -t nat -A POSTROUTING -s 11.1.1.0/24 -j MASQUERADE + $ ping -I red -c1 11.1.1.3 + ping: Warning: source address might be selected on device other than red. + PING 11.1.1.3 (11.1.1.3) from 11.1.1.2 red: 56(84) bytes of data. + ping: sendmsg: Operation not permitted + +Worse, the above causes random corruption resulting in a panic in random +places (I have not seen a consistent backtrace). + +Call nf_reset to drop the conntrack info following the pass through the +VRF device. The nf_reset is needed on Tx but not Rx because of the order +in which NF_HOOK's are hit: on Rx the VRF device is after the real ingress +device and on Tx it is is before the real egress device. Connection +tracking should be tied to the real egress device and not the VRF device. + +Fixes: 8f58336d3f78a ("net: Add ethernet header for pass through VRF device") +Fixes: 35402e3136634 ("net: Add IPv6 support to VRF device") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -371,6 +371,8 @@ static int vrf_finish_output6(struct net + struct in6_addr *nexthop; + int ret; + ++ nf_reset(skb); ++ + skb->protocol = htons(ETH_P_IPV6); + skb->dev = dev; + +@@ -552,6 +554,8 @@ static int vrf_finish_output(struct net + u32 nexthop; + int ret = -EINVAL; + ++ nf_reset(skb); ++ + /* Be paranoid, rather than too clever. */ + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + struct sk_buff *skb2; diff --git a/queue-4.9/net-vrf-fix-nat-within-a-vrf.patch b/queue-4.9/net-vrf-fix-nat-within-a-vrf.patch new file mode 100644 index 00000000000..ae4e735251f --- /dev/null +++ b/queue-4.9/net-vrf-fix-nat-within-a-vrf.patch @@ -0,0 +1,33 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: David Ahern +Date: Wed, 14 Dec 2016 11:06:18 -0800 +Subject: net: vrf: Fix NAT within a VRF + +From: David Ahern + + +[ Upstream commit a0f37efa82253994b99623dbf41eea8dd0ba169b ] + +Connection tracking with VRF is broken because the pass through the VRF +device drops the connection tracking info. Removing the call to nf_reset +allows DNAT and MASQUERADE to work across interfaces within a VRF. + +Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -850,8 +850,6 @@ static struct sk_buff *vrf_rcv_nfhook(u8 + { + struct net *net = dev_net(dev); + +- nf_reset(skb); +- + if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0) + skb = NULL; /* kfree_skb(skb) handled by nf code */ + diff --git a/queue-4.9/r8152-fix-rx-issue-for-runtime-suspend.patch b/queue-4.9/r8152-fix-rx-issue-for-runtime-suspend.patch new file mode 100644 index 00000000000..4869572384e --- /dev/null +++ b/queue-4.9/r8152-fix-rx-issue-for-runtime-suspend.patch @@ -0,0 +1,74 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: hayeswang +Date: Tue, 10 Jan 2017 17:04:07 +0800 +Subject: r8152: fix rx issue for runtime suspend + +From: hayeswang + + +[ Upstream commit 75dc692eda114cb234a46cb11893a9c3ea520934 ] + +Pause the rx and make sure the rx fifo is empty when the autosuspend +occurs. + +If the rx data comes when the driver is canceling the rx urb, the host +controller would stop getting the data from the device and continue +it after next rx urb is submitted. That is, one continuing data is +split into two different urb buffers. That let the driver take the +data as a rx descriptor, and unexpected behavior happens. + +Signed-off-by: Hayes Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/r8152.c | 31 ++++++++++++++++++++++++++++--- + 1 file changed, 28 insertions(+), 3 deletions(-) + +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -3582,17 +3582,42 @@ static int rtl8152_rumtime_suspend(struc + int ret = 0; + + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { ++ u32 rcr = 0; ++ + if (delay_autosuspend(tp)) { + ret = -EBUSY; + goto out1; + } + ++ if (netif_carrier_ok(netdev)) { ++ u32 ocp_data; ++ ++ rcr = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); ++ ocp_data = rcr & ~RCR_ACPT_ALL; ++ ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); ++ rxdy_gated_en(tp, true); ++ ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, ++ PLA_OOB_CTRL); ++ if (!(ocp_data & RXFIFO_EMPTY)) { ++ rxdy_gated_en(tp, false); ++ ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr); ++ ret = -EBUSY; ++ goto out1; ++ } ++ } ++ + clear_bit(WORK_ENABLE, &tp->flags); + usb_kill_urb(tp->intr_urb); +- napi_disable(&tp->napi); +- rtl_stop_rx(tp); ++ + tp->rtl_ops.autosuspend_en(tp, true); +- napi_enable(&tp->napi); ++ ++ if (netif_carrier_ok(netdev)) { ++ napi_disable(&tp->napi); ++ rtl_stop_rx(tp); ++ rxdy_gated_en(tp, false); ++ ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr); ++ napi_enable(&tp->napi); ++ } + } + + set_bit(SELECTIVE_SUSPEND, &tp->flags); diff --git a/queue-4.9/r8152-split-rtl8152_suspend-function.patch b/queue-4.9/r8152-split-rtl8152_suspend-function.patch new file mode 100644 index 00000000000..0f9f88754b7 --- /dev/null +++ b/queue-4.9/r8152-split-rtl8152_suspend-function.patch @@ -0,0 +1,102 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: hayeswang +Date: Tue, 10 Jan 2017 17:04:06 +0800 +Subject: r8152: split rtl8152_suspend function + +From: hayeswang + + +[ Upstream commit 8fb280616878b81c0790a0c33acbeec59c5711f4 ] + +Split rtl8152_suspend() into rtl8152_system_suspend() and +rtl8152_rumtime_suspend(). + +Signed-off-by: Hayes Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/r8152.c | 57 +++++++++++++++++++++++++++++++++--------------- + 1 file changed, 40 insertions(+), 17 deletions(-) + +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -3576,39 +3576,62 @@ static bool delay_autosuspend(struct r81 + return false; + } + +-static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) ++static int rtl8152_rumtime_suspend(struct r8152 *tp) + { +- struct r8152 *tp = usb_get_intfdata(intf); + struct net_device *netdev = tp->netdev; + int ret = 0; + +- mutex_lock(&tp->control); +- +- if (PMSG_IS_AUTO(message)) { +- if (netif_running(netdev) && delay_autosuspend(tp)) { ++ if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { ++ if (delay_autosuspend(tp)) { + ret = -EBUSY; + goto out1; + } + +- set_bit(SELECTIVE_SUSPEND, &tp->flags); +- } else { +- netif_device_detach(netdev); ++ clear_bit(WORK_ENABLE, &tp->flags); ++ usb_kill_urb(tp->intr_urb); ++ napi_disable(&tp->napi); ++ rtl_stop_rx(tp); ++ tp->rtl_ops.autosuspend_en(tp, true); ++ napi_enable(&tp->napi); + } + ++ set_bit(SELECTIVE_SUSPEND, &tp->flags); ++ ++out1: ++ return ret; ++} ++ ++static int rtl8152_system_suspend(struct r8152 *tp) ++{ ++ struct net_device *netdev = tp->netdev; ++ int ret = 0; ++ ++ netif_device_detach(netdev); ++ + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { + clear_bit(WORK_ENABLE, &tp->flags); + usb_kill_urb(tp->intr_urb); + napi_disable(&tp->napi); +- if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { +- rtl_stop_rx(tp); +- tp->rtl_ops.autosuspend_en(tp, true); +- } else { +- cancel_delayed_work_sync(&tp->schedule); +- tp->rtl_ops.down(tp); +- } ++ cancel_delayed_work_sync(&tp->schedule); ++ tp->rtl_ops.down(tp); + napi_enable(&tp->napi); + } +-out1: ++ ++ return ret; ++} ++ ++static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) ++{ ++ struct r8152 *tp = usb_get_intfdata(intf); ++ int ret; ++ ++ mutex_lock(&tp->control); ++ ++ if (PMSG_IS_AUTO(message)) ++ ret = rtl8152_rumtime_suspend(tp); ++ else ++ ret = rtl8152_system_suspend(tp); ++ + mutex_unlock(&tp->control); + + return ret; diff --git a/queue-4.9/rtnl-stats-add-missing-netlink-message-size-checks.patch b/queue-4.9/rtnl-stats-add-missing-netlink-message-size-checks.patch new file mode 100644 index 00000000000..005f02bc179 --- /dev/null +++ b/queue-4.9/rtnl-stats-add-missing-netlink-message-size-checks.patch @@ -0,0 +1,47 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Mathias Krause +Date: Wed, 28 Dec 2016 17:52:15 +0100 +Subject: rtnl: stats - add missing netlink message size checks + +From: Mathias Krause + + +[ Upstream commit 4775cc1f2d5abca894ac32774eefc22c45347d1c ] + +We miss to check if the netlink message is actually big enough to contain +a struct if_stats_msg. + +Add a check to prevent userland from sending us short messages that would +make us access memory beyond the end of the message. + +Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump...") +Signed-off-by: Mathias Krause +Cc: Roopa Prabhu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -3886,6 +3886,9 @@ static int rtnl_stats_get(struct sk_buff + u32 filter_mask; + int err; + ++ if (nlmsg_len(nlh) < sizeof(*ifsm)) ++ return -EINVAL; ++ + ifsm = nlmsg_data(nlh); + if (ifsm->ifindex > 0) + dev = __dev_get_by_index(net, ifsm->ifindex); +@@ -3935,6 +3938,9 @@ static int rtnl_stats_dump(struct sk_buf + + cb->seq = net->dev_base_seq; + ++ if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) ++ return -EINVAL; ++ + ifsm = nlmsg_data(cb->nlh); + filter_mask = ifsm->filter_mask; + if (!filter_mask) diff --git a/queue-4.9/sctp-sctp_transport_lookup_process-should-rcu_read_unlock-when-transport-is-null.patch b/queue-4.9/sctp-sctp_transport_lookup_process-should-rcu_read_unlock-when-transport-is-null.patch new file mode 100644 index 00000000000..e4175da435f --- /dev/null +++ b/queue-4.9/sctp-sctp_transport_lookup_process-should-rcu_read_unlock-when-transport-is-null.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Jan 12 21:37:26 CET 2017 +From: Xin Long +Date: Thu, 15 Dec 2016 23:05:52 +0800 +Subject: sctp: sctp_transport_lookup_process should rcu_read_unlock when transport is null + +From: Xin Long + + +[ Upstream commit 08abb79542c9e8c367d1d8e44fe1026868d3f0a7 ] + +Prior to this patch, sctp_transport_lookup_process didn't rcu_read_unlock +when it failed to find a transport by sctp_addrs_lookup_transport. + +This patch is to fix it by moving up rcu_read_unlock right before checking +transport and also to remove the out path. + +Fixes: 1cceda784980 ("sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock") +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -4479,9 +4479,10 @@ int sctp_transport_lookup_process(int (* + + rcu_read_lock(); + transport = sctp_addrs_lookup_transport(net, laddr, paddr); +- if (!transport || !sctp_transport_hold(transport)) ++ if (!transport || !sctp_transport_hold(transport)) { ++ rcu_read_unlock(); + goto out; +- ++ } + rcu_read_unlock(); + err = cb(transport, p); + sctp_transport_put(transport);