From: Greg Kroah-Hartman Date: Sat, 29 Apr 2017 06:23:11 +0000 (+0200) Subject: 4.10-stable patches X-Git-Tag: v4.4.65~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6c5e2549d1db156b5d79935158f9d57d0212b7d6;p=thirdparty%2Fkernel%2Fstable-queue.git 4.10-stable patches added patches: bpf-improve-verifier-packet-range-checks.patch dp83640-don-t-recieve-time-stamps-twice.patch gso-validate-assumption-of-frag_list-segementation.patch ip6mr-fix-notification-device-destruction.patch ipv6-check-raw-payload-size-correctly-in-ioctl.patch ipv6-check-skb-protocol-before-lookup-for-nexthop.patch ipv6-fix-idev-addr_list-corruption.patch ipv6-fix-source-routing.patch ipv6-sr-fix-double-free-of-skb-after-handling-invalid-srh.patch ipv6-sr-fix-out-of-bounds-access-in-srh-validation.patch kcm-return-immediately-after-copy_from_user-failure.patch l2tp-fix-ppp-pseudo-wire-auto-loading.patch l2tp-hold-tunnel-socket-when-handling-control-frames-in-l2tp_ip-and-l2tp_ip6.patch l2tp-purge-socket-queues-in-the-.destruct-callback.patch l2tp-take-reference-on-sessions-being-dumped.patch macvlan-fix-device-ref-leak-when-purging-bc_queue.patch net-ipv4-fix-multipath-rtm_getroute-behavior-when-iif-is-given.patch net-ipv6-regenerate-host-route-if-moved-to-gc-list.patch net-ipv6-rtf_pcpu-should-not-be-settable-from-userspace.patch net-mlx5-avoid-dereferencing-uninitialized-pointer.patch net-mlx5-e-switch-correctly-deal-with-inline-mode-on-connectx-5.patch net-mlx5-fix-driver-load-bad-flow-when-having-fw-initializing-timeout.patch net-mlx5e-fix-ethtool_grxclsrlall-handling.patch net-mlx5e-fix-small-packet-threshold.patch net-neigh-guard-against-null-solicit-method.patch net-packet-fix-overflow-in-check-for-tp_frame_nr.patch net-packet-fix-overflow-in-check-for-tp_reserve.patch net-phy-fix-auto-negotiation-stall-due-to-unavailable-interrupt.patch net-phy-handle-state-correctly-in-phy_stop_machine.patch net-timestamp-avoid-use-after-free-in-ip_recv_error.patch net-vrf-fix-setting-nlm_f_excl-flag-when-adding-l3mdev-rule.patch netpoll-check-for-skb-queue_mapping.patch openvswitch-fix-ovs_flow_key_update.patch sctp-listen-on-the-sock-only-when-it-s-state-is-listening-or-closed.patch secure_seq-downgrade-to-per-host-timestamp-offsets.patch sh_eth-unmap-dma-buffers-when-freeing-rings.patch sparc64-fix-kernel-panic-due-to-erroneous-ifdef-surrounding-pmd_write.patch sparc64-kern_addr_valid-regression.patch tcp-clear-saved_syn-in-tcp_disconnect.patch tcp-fix-scm_timestamping_opt_stats-for-normal-skbs.patch tcp-mark-skbs-with-scm_timestamping_opt_stats.patch tcp-memset-ca_priv-data-to-0-properly.patch --- diff --git a/queue-4.10/bpf-improve-verifier-packet-range-checks.patch b/queue-4.10/bpf-improve-verifier-packet-range-checks.patch new file mode 100644 index 00000000000..950941e61bd --- /dev/null +++ b/queue-4.10/bpf-improve-verifier-packet-range-checks.patch @@ -0,0 +1,83 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Alexei Starovoitov +Date: Fri, 24 Mar 2017 15:57:33 -0700 +Subject: bpf: improve verifier packet range checks + +From: Alexei Starovoitov + + +[ Upstream commit b1977682a3858b5584ffea7cfb7bd863f68db18d ] + +llvm can optimize the 'if (ptr > data_end)' checks to be in the order +slightly different than the original C code which will confuse verifier. +Like: +if (ptr + 16 > data_end) + return TC_ACT_SHOT; +// may be followed by +if (ptr + 14 > data_end) + return TC_ACT_SHOT; +while llvm can see that 'ptr' is valid for all 16 bytes, +the verifier could not. +Fix verifier logic to account for such case and add a test. + +Reported-by: Huapeng Zhou +Fixes: 969bf05eb3ce ("bpf: direct packet access") +Signed-off-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Acked-by: Martin KaFai Lau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 5 +++-- + tools/testing/selftests/bpf/test_verifier.c | 20 ++++++++++++++++++++ + 2 files changed, 23 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1859,14 +1859,15 @@ static void find_good_pkt_pointers(struc + + for (i = 0; i < MAX_BPF_REG; i++) + if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) +- regs[i].range = dst_reg->off; ++ /* keep the maximum range already checked */ ++ regs[i].range = max(regs[i].range, dst_reg->off); + + for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { + if (state->stack_slot_type[i] != STACK_SPILL) + continue; + reg = &state->spilled_regs[i / BPF_REG_SIZE]; + if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) +- reg->range = dst_reg->off; ++ reg->range = max(reg->range, dst_reg->off); + } + } + +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -2876,6 +2876,26 @@ static struct bpf_test tests[] = { + .prog_type = BPF_PROG_TYPE_LWT_XMIT, + }, + { ++ "overlapping checks for direct packet access", ++ .insns = { ++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, ++ offsetof(struct __sk_buff, data)), ++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, ++ offsetof(struct __sk_buff, data_end)), ++ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), ++ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), ++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), ++ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), ++ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }, ++ .result = ACCEPT, ++ .prog_type = BPF_PROG_TYPE_LWT_XMIT, ++ }, ++ { + "invalid access of tc_classid for LWT_IN", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, diff --git a/queue-4.10/dp83640-don-t-recieve-time-stamps-twice.patch b/queue-4.10/dp83640-don-t-recieve-time-stamps-twice.patch new file mode 100644 index 00000000000..a7d83cb498a --- /dev/null +++ b/queue-4.10/dp83640-don-t-recieve-time-stamps-twice.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Dan Carpenter +Date: Tue, 18 Apr 2017 22:14:26 +0300 +Subject: dp83640: don't recieve time stamps twice + +From: Dan Carpenter + + +[ Upstream commit 9d386cd9a755c8293e8916264d4d053878a7c9c7 ] + +This patch is prompted by a static checker warning about a potential +use after free. The concern is that netif_rx_ni() can free "skb" and we +call it twice. + +When I look at the commit that added this, it looks like some stray +lines were added accidentally. It doesn't make sense to me that we +would recieve the same data two times. I asked the author but never +recieved a response. + +I can't test this code, but I'm pretty sure my patch is correct. + +Fixes: 4b063258ab93 ("dp83640: Delay scheduled work.") +Signed-off-by: Dan Carpenter +Acked-by: Stefan Sørensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/dp83640.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/net/phy/dp83640.c ++++ b/drivers/net/phy/dp83640.c +@@ -1438,8 +1438,6 @@ static bool dp83640_rxtstamp(struct phy_ + skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT; + skb_queue_tail(&dp83640->rx_queue, skb); + schedule_delayed_work(&dp83640->ts_work, SKB_TIMESTAMP_TIMEOUT); +- } else { +- netif_rx_ni(skb); + } + + return true; diff --git a/queue-4.10/gso-validate-assumption-of-frag_list-segementation.patch b/queue-4.10/gso-validate-assumption-of-frag_list-segementation.patch new file mode 100644 index 00000000000..4b9d962d855 --- /dev/null +++ b/queue-4.10/gso-validate-assumption-of-frag_list-segementation.patch @@ -0,0 +1,75 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Ilan Tayari +Date: Wed, 19 Apr 2017 21:26:07 +0300 +Subject: gso: Validate assumption of frag_list segementation + +From: Ilan Tayari + + +[ Upstream commit 43170c4e0ba709c79130c3fe5a41e66279950cd0 ] + +Commit 07b26c9454a2 ("gso: Support partial splitting at the frag_list +pointer") assumes that all SKBs in a frag_list (except maybe the last +one) contain the same amount of GSO payload. + +This assumption is not always correct, resulting in the following +warning message in the log: + skb_segment: too many frags + +For example, mlx5 driver in Striding RQ mode creates some RX SKBs with +one frag, and some with 2 frags. +After GRO, the frag_list SKBs end up having different amounts of payload. +If this frag_list SKB is then forwarded, the aforementioned assumption +is violated. + +Validate the assumption, and fall back to software GSO if it not true. + +Change-Id: Ia03983f4a47b6534dd987d7a2aad96d54d46d212 +Fixes: 07b26c9454a2 ("gso: Support partial splitting at the frag_list pointer") +Signed-off-by: Ilan Tayari +Signed-off-by: Ilya Lesokhin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3078,22 +3078,32 @@ struct sk_buff *skb_segment(struct sk_bu + if (sg && csum && (mss != GSO_BY_FRAGS)) { + if (!(features & NETIF_F_GSO_PARTIAL)) { + struct sk_buff *iter; ++ unsigned int frag_len; + + if (!list_skb || + !net_gso_ok(features, skb_shinfo(head_skb)->gso_type)) + goto normal; + +- /* Split the buffer at the frag_list pointer. +- * This is based on the assumption that all +- * buffers in the chain excluding the last +- * containing the same amount of data. ++ /* If we get here then all the required ++ * GSO features except frag_list are supported. ++ * Try to split the SKB to multiple GSO SKBs ++ * with no frag_list. ++ * Currently we can do that only when the buffers don't ++ * have a linear part and all the buffers except ++ * the last are of the same length. + */ ++ frag_len = list_skb->len; + skb_walk_frags(head_skb, iter) { ++ if (frag_len != iter->len && iter->next) ++ goto normal; + if (skb_headlen(iter)) + goto normal; + + len -= iter->len; + } ++ ++ if (len != frag_len) ++ goto normal; + } + + /* GSO partial only requires that we trim off any excess that diff --git a/queue-4.10/ip6mr-fix-notification-device-destruction.patch b/queue-4.10/ip6mr-fix-notification-device-destruction.patch new file mode 100644 index 00000000000..d31b1ca1500 --- /dev/null +++ b/queue-4.10/ip6mr-fix-notification-device-destruction.patch @@ -0,0 +1,131 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Nikolay Aleksandrov +Date: Fri, 21 Apr 2017 20:42:16 +0300 +Subject: ip6mr: fix notification device destruction + +From: Nikolay Aleksandrov + + +[ Upstream commit 723b929ca0f79c0796f160c2eeda4597ee98d2b8 ] + +Andrey Konovalov reported a BUG caused by the ip6mr code which is caused +because we call unregister_netdevice_many for a device that is already +being destroyed. In IPv4's ipmr that has been resolved by two commits +long time ago by introducing the "notify" parameter to the delete +function and avoiding the unregister when called from a notifier, so +let's do the same for ip6mr. + +The trace from Andrey: +------------[ cut here ]------------ +kernel BUG at net/core/dev.c:6813! +invalid opcode: 0000 [#1] SMP KASAN +Modules linked in: +CPU: 1 PID: 1165 Comm: kworker/u4:3 Not tainted 4.11.0-rc7+ #251 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs +01/01/2011 +Workqueue: netns cleanup_net +task: ffff880069208000 task.stack: ffff8800692d8000 +RIP: 0010:rollback_registered_many+0x348/0xeb0 net/core/dev.c:6813 +RSP: 0018:ffff8800692de7f0 EFLAGS: 00010297 +RAX: ffff880069208000 RBX: 0000000000000002 RCX: 0000000000000001 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88006af90569 +RBP: ffff8800692de9f0 R08: ffff8800692dec60 R09: 0000000000000000 +R10: 0000000000000006 R11: 0000000000000000 R12: ffff88006af90070 +R13: ffff8800692debf0 R14: dffffc0000000000 R15: ffff88006af90000 +FS: 0000000000000000(0000) GS:ffff88006cb00000(0000) +knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007fe7e897d870 CR3: 00000000657e7000 CR4: 00000000000006e0 +Call Trace: + unregister_netdevice_many.part.105+0x87/0x440 net/core/dev.c:7881 + unregister_netdevice_many+0xc8/0x120 net/core/dev.c:7880 + ip6mr_device_event+0x362/0x3f0 net/ipv6/ip6mr.c:1346 + notifier_call_chain+0x145/0x2f0 kernel/notifier.c:93 + __raw_notifier_call_chain kernel/notifier.c:394 + raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 + call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1647 + call_netdevice_notifiers net/core/dev.c:1663 + rollback_registered_many+0x919/0xeb0 net/core/dev.c:6841 + unregister_netdevice_many.part.105+0x87/0x440 net/core/dev.c:7881 + unregister_netdevice_many net/core/dev.c:7880 + default_device_exit_batch+0x4fa/0x640 net/core/dev.c:8333 + ops_exit_list.isra.4+0x100/0x150 net/core/net_namespace.c:144 + cleanup_net+0x5a8/0xb40 net/core/net_namespace.c:463 + process_one_work+0xc04/0x1c10 kernel/workqueue.c:2097 + worker_thread+0x223/0x19c0 kernel/workqueue.c:2231 + kthread+0x35e/0x430 kernel/kthread.c:231 + ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430 +Code: 3c 32 00 0f 85 70 0b 00 00 48 b8 00 02 00 00 00 00 ad de 49 89 +47 78 e9 93 fe ff ff 49 8d 57 70 49 8d 5f 78 eb 9e e8 88 7a 14 fe <0f> +0b 48 8b 9d 28 fe ff ff e8 7a 7a 14 fe 48 b8 00 00 00 00 00 +RIP: rollback_registered_many+0x348/0xeb0 RSP: ffff8800692de7f0 +---[ end trace e0b29c57e9b3292c ]--- + +Reported-by: Andrey Konovalov +Signed-off-by: Nikolay Aleksandrov +Tested-by: Andrey Konovalov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6mr.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +--- a/net/ipv6/ip6mr.c ++++ b/net/ipv6/ip6mr.c +@@ -774,7 +774,8 @@ failure: + * Delete a VIF entry + */ + +-static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) ++static int mif6_delete(struct mr6_table *mrt, int vifi, int notify, ++ struct list_head *head) + { + struct mif_device *v; + struct net_device *dev; +@@ -820,7 +821,7 @@ static int mif6_delete(struct mr6_table + dev->ifindex, &in6_dev->cnf); + } + +- if (v->flags & MIFF_REGISTER) ++ if ((v->flags & MIFF_REGISTER) && !notify) + unregister_netdevice_queue(dev, head); + + dev_put(dev); +@@ -1331,7 +1332,6 @@ static int ip6mr_device_event(struct not + struct mr6_table *mrt; + struct mif_device *v; + int ct; +- LIST_HEAD(list); + + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; +@@ -1340,10 +1340,9 @@ static int ip6mr_device_event(struct not + v = &mrt->vif6_table[0]; + for (ct = 0; ct < mrt->maxvif; ct++, v++) { + if (v->dev == dev) +- mif6_delete(mrt, ct, &list); ++ mif6_delete(mrt, ct, 1, NULL); + } + } +- unregister_netdevice_many(&list); + + return NOTIFY_DONE; + } +@@ -1552,7 +1551,7 @@ static void mroute_clean_tables(struct m + for (i = 0; i < mrt->maxvif; i++) { + if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) + continue; +- mif6_delete(mrt, i, &list); ++ mif6_delete(mrt, i, 0, &list); + } + unregister_netdevice_many(&list); + +@@ -1706,7 +1705,7 @@ int ip6_mroute_setsockopt(struct sock *s + if (copy_from_user(&mifi, optval, sizeof(mifi_t))) + return -EFAULT; + rtnl_lock(); +- ret = mif6_delete(mrt, mifi, NULL); ++ ret = mif6_delete(mrt, mifi, 0, NULL); + rtnl_unlock(); + return ret; + diff --git a/queue-4.10/ipv6-check-raw-payload-size-correctly-in-ioctl.patch b/queue-4.10/ipv6-check-raw-payload-size-correctly-in-ioctl.patch new file mode 100644 index 00000000000..3b2b91cd662 --- /dev/null +++ b/queue-4.10/ipv6-check-raw-payload-size-correctly-in-ioctl.patch @@ -0,0 +1,39 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Jamie Bainbridge +Date: Wed, 26 Apr 2017 10:43:27 +1000 +Subject: ipv6: check raw payload size correctly in ioctl + +From: Jamie Bainbridge + + +[ Upstream commit 105f5528b9bbaa08b526d3405a5bcd2ff0c953c8 ] + +In situations where an skb is paged, the transport header pointer and +tail pointer can be the same because the skb contents are in frags. + +This results in ioctl(SIOCINQ/FIONREAD) incorrectly returning a +length of 0 when the length to receive is actually greater than zero. + +skb->len is already correctly set in ip6_input_finish() with +pskb_pull(), so use skb->len as it always returns the correct result +for both linear and paged data. + +Signed-off-by: Jamie Bainbridge +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/raw.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv6/raw.c ++++ b/net/ipv6/raw.c +@@ -1174,8 +1174,7 @@ static int rawv6_ioctl(struct sock *sk, + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + if (skb) +- amount = skb_tail_pointer(skb) - +- skb_transport_header(skb); ++ amount = skb->len; + spin_unlock_bh(&sk->sk_receive_queue.lock); + return put_user(amount, (int __user *)arg); + } diff --git a/queue-4.10/ipv6-check-skb-protocol-before-lookup-for-nexthop.patch b/queue-4.10/ipv6-check-skb-protocol-before-lookup-for-nexthop.patch new file mode 100644 index 00000000000..5a5d591ee05 --- /dev/null +++ b/queue-4.10/ipv6-check-skb-protocol-before-lookup-for-nexthop.patch @@ -0,0 +1,108 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: WANG Cong +Date: Tue, 25 Apr 2017 14:37:15 -0700 +Subject: ipv6: check skb->protocol before lookup for nexthop + +From: WANG Cong + + +[ Upstream commit 199ab00f3cdb6f154ea93fa76fd80192861a821d ] + +Andrey reported a out-of-bound access in ip6_tnl_xmit(), this +is because we use an ipv4 dst in ip6_tnl_xmit() and cast an IPv4 +neigh key as an IPv6 address: + + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (!neigh) + goto tx_err_link_failure; + + addr6 = (struct in6_addr *)&neigh->primary_key; // <=== HERE + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) + addr6 = &ipv6_hdr(skb)->daddr; + + memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); + +Also the network header of the skb at this point should be still IPv4 +for 4in6 tunnels, we shold not just use it as IPv6 header. + +This patch fixes it by checking if skb->protocol is ETH_P_IPV6: if it +is, we are safe to do the nexthop lookup using skb_dst() and +ipv6_hdr(skb)->daddr; if not (aka IPv4), we have no clue about which +dest address we can pick here, we have to rely on callers to fill it +from tunnel config, so just fall to ip6_route_output() to make the +decision. + +Fixes: ea3dc9601bda ("ip6_tunnel: Add support for wildcard tunnel endpoints.") +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Cc: Steffen Klassert +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 44 +++++++++++++++++++++++--------------------- + 1 file changed, 23 insertions(+), 21 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1037,7 +1037,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, st + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; + struct net_device_stats *stats = &t->dev->stats; +- struct ipv6hdr *ipv6h = ipv6_hdr(skb); ++ struct ipv6hdr *ipv6h; + struct ipv6_tel_txoption opt; + struct dst_entry *dst = NULL, *ndst = NULL; + struct net_device *tdev; +@@ -1057,26 +1057,28 @@ int ip6_tnl_xmit(struct sk_buff *skb, st + + /* NBMA tunnel */ + if (ipv6_addr_any(&t->parms.raddr)) { +- struct in6_addr *addr6; +- struct neighbour *neigh; +- int addr_type; +- +- if (!skb_dst(skb)) +- goto tx_err_link_failure; +- +- neigh = dst_neigh_lookup(skb_dst(skb), +- &ipv6_hdr(skb)->daddr); +- if (!neigh) +- goto tx_err_link_failure; +- +- addr6 = (struct in6_addr *)&neigh->primary_key; +- addr_type = ipv6_addr_type(addr6); +- +- if (addr_type == IPV6_ADDR_ANY) +- addr6 = &ipv6_hdr(skb)->daddr; +- +- memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); +- neigh_release(neigh); ++ if (skb->protocol == htons(ETH_P_IPV6)) { ++ struct in6_addr *addr6; ++ struct neighbour *neigh; ++ int addr_type; ++ ++ if (!skb_dst(skb)) ++ goto tx_err_link_failure; ++ ++ neigh = dst_neigh_lookup(skb_dst(skb), ++ &ipv6_hdr(skb)->daddr); ++ if (!neigh) ++ goto tx_err_link_failure; ++ ++ addr6 = (struct in6_addr *)&neigh->primary_key; ++ addr_type = ipv6_addr_type(addr6); ++ ++ if (addr_type == IPV6_ADDR_ANY) ++ addr6 = &ipv6_hdr(skb)->daddr; ++ ++ memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); ++ neigh_release(neigh); ++ } + } else if (!(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only only if the routing decision does diff --git a/queue-4.10/ipv6-fix-idev-addr_list-corruption.patch b/queue-4.10/ipv6-fix-idev-addr_list-corruption.patch new file mode 100644 index 00000000000..e242576dffc --- /dev/null +++ b/queue-4.10/ipv6-fix-idev-addr_list-corruption.patch @@ -0,0 +1,70 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Rabin Vincent +Date: Mon, 10 Apr 2017 08:36:39 +0200 +Subject: ipv6: Fix idev->addr_list corruption + +From: Rabin Vincent + + +[ Upstream commit a2d6cbb0670d54806f18192cb0db266b4a6d285a ] + +addrconf_ifdown() removes elements from the idev->addr_list without +holding the idev->lock. + +If this happens while the loop in __ipv6_dev_get_saddr() is handling the +same element, that function ends up in an infinite loop: + + NMI watchdog: BUG: soft lockup - CPU#1 stuck for 23s! [test:1719] + Call Trace: + ipv6_get_saddr_eval+0x13c/0x3a0 + __ipv6_dev_get_saddr+0xe4/0x1f0 + ipv6_dev_get_saddr+0x1b4/0x204 + ip6_dst_lookup_tail+0xcc/0x27c + ip6_dst_lookup_flow+0x38/0x80 + udpv6_sendmsg+0x708/0xba8 + sock_sendmsg+0x18/0x30 + SyS_sendto+0xb8/0xf8 + syscall_common+0x34/0x58 + +Fixes: 6a923934c33 (Revert "ipv6: Revert optional address flusing on ifdown.") +Signed-off-by: Rabin Vincent +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -3618,14 +3618,19 @@ restart: + INIT_LIST_HEAD(&del_list); + list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + struct rt6_info *rt = NULL; ++ bool keep; + + addrconf_del_dad_work(ifa); + ++ keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && ++ !addr_is_local(&ifa->addr); ++ if (!keep) ++ list_move(&ifa->if_list, &del_list); ++ + write_unlock_bh(&idev->lock); + spin_lock_bh(&ifa->lock); + +- if (keep_addr && (ifa->flags & IFA_F_PERMANENT) && +- !addr_is_local(&ifa->addr)) { ++ if (keep) { + /* set state to skip the notifier below */ + state = INET6_IFADDR_STATE_DEAD; + ifa->state = 0; +@@ -3637,8 +3642,6 @@ restart: + } else { + state = ifa->state; + ifa->state = INET6_IFADDR_STATE_DEAD; +- +- list_move(&ifa->if_list, &del_list); + } + + spin_unlock_bh(&ifa->lock); diff --git a/queue-4.10/ipv6-fix-source-routing.patch b/queue-4.10/ipv6-fix-source-routing.patch new file mode 100644 index 00000000000..400e9cca3a1 --- /dev/null +++ b/queue-4.10/ipv6-fix-source-routing.patch @@ -0,0 +1,49 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Sabrina Dubroca +Date: Tue, 25 Apr 2017 15:56:50 +0200 +Subject: ipv6: fix source routing + +From: Sabrina Dubroca + + +[ Upstream commit ec9c4215fef37da6668c4105f5ad3891aaa6527a ] + +Commit a149e7c7ce81 ("ipv6: sr: add support for SRH injection through +setsockopt") introduced handling of IPV6_SRCRT_TYPE_4, but at the same +time restricted it to only IPV6_SRCRT_TYPE_0 and +IPV6_SRCRT_TYPE_4. Previously, ipv6_push_exthdr() and fl6_update_dst() +would also handle other values (ie STRICT and TYPE_2). + +Restore previous source routing behavior, by handling IPV6_SRCRT_STRICT +and IPV6_SRCRT_TYPE_2 the same way as IPV6_SRCRT_TYPE_0 in +ipv6_push_exthdr() and fl6_update_dst(). + +Fixes: a149e7c7ce81 ("ipv6: sr: add support for SRH injection through setsockopt") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/exthdrs.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv6/exthdrs.c ++++ b/net/ipv6/exthdrs.c +@@ -909,6 +909,8 @@ static void ipv6_push_rthdr(struct sk_bu + { + switch (opt->type) { + case IPV6_SRCRT_TYPE_0: ++ case IPV6_SRCRT_STRICT: ++ case IPV6_SRCRT_TYPE_2: + ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr); + break; + case IPV6_SRCRT_TYPE_4: +@@ -1163,6 +1165,8 @@ struct in6_addr *fl6_update_dst(struct f + + switch (opt->srcrt->type) { + case IPV6_SRCRT_TYPE_0: ++ case IPV6_SRCRT_STRICT: ++ case IPV6_SRCRT_TYPE_2: + fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; + break; + case IPV6_SRCRT_TYPE_4: diff --git a/queue-4.10/ipv6-sr-fix-double-free-of-skb-after-handling-invalid-srh.patch b/queue-4.10/ipv6-sr-fix-double-free-of-skb-after-handling-invalid-srh.patch new file mode 100644 index 00000000000..972e8e72a07 --- /dev/null +++ b/queue-4.10/ipv6-sr-fix-double-free-of-skb-after-handling-invalid-srh.patch @@ -0,0 +1,33 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: David Lebrun +Date: Wed, 19 Apr 2017 16:10:19 +0200 +Subject: ipv6: sr: fix double free of skb after handling invalid SRH + +From: David Lebrun + + +[ Upstream commit 95b9b88d2da5e43e025400afcb492643933bf858 ] + +The icmpv6_param_prob() function already does a kfree_skb(), +this patch removes the duplicate one. + +Fixes: 1ababeba4a21f3dba3da3523c670b207fb2feb62 ("ipv6: implement dataplane support for rthdr type 4 (Segment Routing Header)") +Reported-by: Dan Carpenter +Cc: Dan Carpenter +Signed-off-by: David Lebrun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/exthdrs.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/ipv6/exthdrs.c ++++ b/net/ipv6/exthdrs.c +@@ -388,7 +388,6 @@ looped_back: + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + ((&hdr->segments_left) - + skb_network_header(skb))); +- kfree_skb(skb); + return -1; + } + diff --git a/queue-4.10/ipv6-sr-fix-out-of-bounds-access-in-srh-validation.patch b/queue-4.10/ipv6-sr-fix-out-of-bounds-access-in-srh-validation.patch new file mode 100644 index 00000000000..8c2dd74116a --- /dev/null +++ b/queue-4.10/ipv6-sr-fix-out-of-bounds-access-in-srh-validation.patch @@ -0,0 +1,34 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: David Lebrun +Date: Tue, 18 Apr 2017 17:59:49 +0200 +Subject: ipv6: sr: fix out-of-bounds access in SRH validation + +From: David Lebrun + + +[ Upstream commit 2f3bb64247b5b083d05ccecad9c2e139bbfdc294 ] + +This patch fixes an out-of-bounds access in seg6_validate_srh() when the +trailing data is less than sizeof(struct sr6_tlv). + +Reported-by: Andrey Konovalov +Cc: Andrey Konovalov +Signed-off-by: David Lebrun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/seg6.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ipv6/seg6.c ++++ b/net/ipv6/seg6.c +@@ -53,6 +53,9 @@ bool seg6_validate_srh(struct ipv6_sr_hd + struct sr6_tlv *tlv; + unsigned int tlv_len; + ++ if (trailing < sizeof(*tlv)) ++ return false; ++ + tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset); + tlv_len = sizeof(*tlv) + tlv->len; + diff --git a/queue-4.10/kcm-return-immediately-after-copy_from_user-failure.patch b/queue-4.10/kcm-return-immediately-after-copy_from_user-failure.patch new file mode 100644 index 00000000000..7d23acb128f --- /dev/null +++ b/queue-4.10/kcm-return-immediately-after-copy_from_user-failure.patch @@ -0,0 +1,51 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: WANG Cong +Date: Thu, 23 Mar 2017 11:03:31 -0700 +Subject: kcm: return immediately after copy_from_user() failure + +From: WANG Cong + + +[ Upstream commit a80db69e47d764bbcaf2fec54b1f308925e7c490 ] + +There is no reason to continue after a copy_from_user() +failure. + +Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") +Cc: Tom Herbert +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/kcm/kcmsock.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/kcm/kcmsock.c ++++ b/net/kcm/kcmsock.c +@@ -1685,7 +1685,7 @@ static int kcm_ioctl(struct socket *sock + struct kcm_attach info; + + if (copy_from_user(&info, (void __user *)arg, sizeof(info))) +- err = -EFAULT; ++ return -EFAULT; + + err = kcm_attach_ioctl(sock, &info); + +@@ -1695,7 +1695,7 @@ static int kcm_ioctl(struct socket *sock + struct kcm_unattach info; + + if (copy_from_user(&info, (void __user *)arg, sizeof(info))) +- err = -EFAULT; ++ return -EFAULT; + + err = kcm_unattach_ioctl(sock, &info); + +@@ -1706,7 +1706,7 @@ static int kcm_ioctl(struct socket *sock + struct socket *newsock = NULL; + + if (copy_from_user(&info, (void __user *)arg, sizeof(info))) +- err = -EFAULT; ++ return -EFAULT; + + err = kcm_clone(sock, &info, &newsock); + diff --git a/queue-4.10/l2tp-fix-ppp-pseudo-wire-auto-loading.patch b/queue-4.10/l2tp-fix-ppp-pseudo-wire-auto-loading.patch new file mode 100644 index 00000000000..d0ab064be05 --- /dev/null +++ b/queue-4.10/l2tp-fix-ppp-pseudo-wire-auto-loading.patch @@ -0,0 +1,28 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Guillaume Nault +Date: Mon, 3 Apr 2017 13:23:15 +0200 +Subject: l2tp: fix PPP pseudo-wire auto-loading + +From: Guillaume Nault + + +[ Upstream commit 249ee819e24c180909f43c1173c8ef6724d21faf ] + +PPP pseudo-wire type is 7 (11 is L2TP_PWTYPE_IP). + +Fixes: f1f39f911027 ("l2tp: auto load type modules") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -1848,4 +1848,4 @@ MODULE_DESCRIPTION("PPP over L2TP over U + MODULE_LICENSE("GPL"); + MODULE_VERSION(PPPOL2TP_DRV_VERSION); + MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP); +-MODULE_ALIAS_L2TP_PWTYPE(11); ++MODULE_ALIAS_L2TP_PWTYPE(7); diff --git a/queue-4.10/l2tp-hold-tunnel-socket-when-handling-control-frames-in-l2tp_ip-and-l2tp_ip6.patch b/queue-4.10/l2tp-hold-tunnel-socket-when-handling-control-frames-in-l2tp_ip-and-l2tp_ip6.patch new file mode 100644 index 00000000000..cd6d05a5bd9 --- /dev/null +++ b/queue-4.10/l2tp-hold-tunnel-socket-when-handling-control-frames-in-l2tp_ip-and-l2tp_ip6.patch @@ -0,0 +1,55 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Guillaume Nault +Date: Wed, 29 Mar 2017 08:44:59 +0200 +Subject: l2tp: hold tunnel socket when handling control frames in l2tp_ip and l2tp_ip6 + +From: Guillaume Nault + + +[ Upstream commit 94d7ee0baa8b764cf64ad91ed69464c1a6a0066b ] + +The code following l2tp_tunnel_find() expects that a new reference is +held on sk. Either sk_receive_skb() or the discard_put error path will +drop a reference from the tunnel's socket. + +This issue exists in both l2tp_ip and l2tp_ip6. + +Fixes: a3c18422a4b4 ("l2tp: hold socket before dropping lock in l2tp_ip{, 6}_recv()") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ip.c | 5 +++-- + net/l2tp/l2tp_ip6.c | 5 +++-- + 2 files changed, 6 insertions(+), 4 deletions(-) + +--- a/net/l2tp/l2tp_ip.c ++++ b/net/l2tp/l2tp_ip.c +@@ -171,9 +171,10 @@ pass_up: + + tunnel_id = ntohl(*(__be32 *) &skb->data[4]); + tunnel = l2tp_tunnel_find(net, tunnel_id); +- if (tunnel != NULL) ++ if (tunnel) { + sk = tunnel->sock; +- else { ++ sock_hold(sk); ++ } else { + struct iphdr *iph = (struct iphdr *) skb_network_header(skb); + + read_lock_bh(&l2tp_ip_lock); +--- a/net/l2tp/l2tp_ip6.c ++++ b/net/l2tp/l2tp_ip6.c +@@ -183,9 +183,10 @@ pass_up: + + tunnel_id = ntohl(*(__be32 *) &skb->data[4]); + tunnel = l2tp_tunnel_find(net, tunnel_id); +- if (tunnel != NULL) ++ if (tunnel) { + sk = tunnel->sock; +- else { ++ sock_hold(sk); ++ } else { + struct ipv6hdr *iph = ipv6_hdr(skb); + + read_lock_bh(&l2tp_ip6_lock); diff --git a/queue-4.10/l2tp-purge-socket-queues-in-the-.destruct-callback.patch b/queue-4.10/l2tp-purge-socket-queues-in-the-.destruct-callback.patch new file mode 100644 index 00000000000..e754d13d5e9 --- /dev/null +++ b/queue-4.10/l2tp-purge-socket-queues-in-the-.destruct-callback.patch @@ -0,0 +1,49 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Guillaume Nault +Date: Wed, 29 Mar 2017 08:45:29 +0200 +Subject: l2tp: purge socket queues in the .destruct() callback + +From: Guillaume Nault + + +[ Upstream commit e91793bb615cf6cdd59c0b6749fe173687bb0947 ] + +The Rx path may grab the socket right before pppol2tp_release(), but +nothing guarantees that it will enqueue packets before +skb_queue_purge(). Therefore, the socket can be destroyed without its +queues fully purged. + +Fix this by purging queues in pppol2tp_session_destruct() where we're +guaranteed nothing is still referencing the socket. + +Fixes: 9e9cb6221aa7 ("l2tp: fix userspace reception on plain L2TP sockets") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -450,6 +450,10 @@ static void pppol2tp_session_close(struc + static void pppol2tp_session_destruct(struct sock *sk) + { + struct l2tp_session *session = sk->sk_user_data; ++ ++ skb_queue_purge(&sk->sk_receive_queue); ++ skb_queue_purge(&sk->sk_write_queue); ++ + if (session) { + sk->sk_user_data = NULL; + BUG_ON(session->magic != L2TP_SESSION_MAGIC); +@@ -488,9 +492,6 @@ static int pppol2tp_release(struct socke + l2tp_session_queue_purge(session); + sock_put(sk); + } +- skb_queue_purge(&sk->sk_receive_queue); +- skb_queue_purge(&sk->sk_write_queue); +- + release_sock(sk); + + /* This will delete the session context via diff --git a/queue-4.10/l2tp-take-reference-on-sessions-being-dumped.patch b/queue-4.10/l2tp-take-reference-on-sessions-being-dumped.patch new file mode 100644 index 00000000000..d2414c15436 --- /dev/null +++ b/queue-4.10/l2tp-take-reference-on-sessions-being-dumped.patch @@ -0,0 +1,159 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Guillaume Nault +Date: Mon, 3 Apr 2017 12:03:13 +0200 +Subject: l2tp: take reference on sessions being dumped + +From: Guillaume Nault + + +[ Upstream commit e08293a4ccbcc993ded0fdc46f1e57926b833d63 ] + +Take a reference on the sessions returned by l2tp_session_find_nth() +(and rename it l2tp_session_get_nth() to reflect this change), so that +caller is assured that the session isn't going to disappear while +processing it. + +For procfs and debugfs handlers, the session is held in the .start() +callback and dropped in .show(). Given that pppol2tp_seq_session_show() +dereferences the associated PPPoL2TP socket and that +l2tp_dfs_seq_session_show() might call pppol2tp_show(), we also need to +call the session's .ref() callback to prevent the socket from going +away from under us. + +Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") +Fixes: 0ad6614048cf ("l2tp: Add debugfs files for dumping l2tp debug info") +Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_core.c | 8 ++++++-- + net/l2tp/l2tp_core.h | 3 ++- + net/l2tp/l2tp_debugfs.c | 10 +++++++--- + net/l2tp/l2tp_netlink.c | 7 +++++-- + net/l2tp/l2tp_ppp.c | 10 +++++++--- + 5 files changed, 27 insertions(+), 11 deletions(-) + +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -278,7 +278,8 @@ struct l2tp_session *l2tp_session_find(s + } + EXPORT_SYMBOL_GPL(l2tp_session_find); + +-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) ++struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, ++ bool do_ref) + { + int hash; + struct l2tp_session *session; +@@ -288,6 +289,9 @@ struct l2tp_session *l2tp_session_find_n + for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { + hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { + if (++count > nth) { ++ l2tp_session_inc_refcount(session); ++ if (do_ref && session->ref) ++ session->ref(session); + read_unlock_bh(&tunnel->hlist_lock); + return session; + } +@@ -298,7 +302,7 @@ struct l2tp_session *l2tp_session_find_n + + return NULL; + } +-EXPORT_SYMBOL_GPL(l2tp_session_find_nth); ++EXPORT_SYMBOL_GPL(l2tp_session_get_nth); + + /* Lookup a session by interface name. + * This is very inefficient but is only used by management interfaces. +--- a/net/l2tp/l2tp_core.h ++++ b/net/l2tp/l2tp_core.h +@@ -233,7 +233,8 @@ out: + struct l2tp_session *l2tp_session_find(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id); +-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); ++struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, ++ bool do_ref); + struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); + struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); + struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); +--- a/net/l2tp/l2tp_debugfs.c ++++ b/net/l2tp/l2tp_debugfs.c +@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct + + static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) + { +- pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); ++ pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); + pd->session_idx++; + + if (pd->session == NULL) { +@@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_ + } + + /* Show the tunnel or session context */ +- if (pd->session == NULL) ++ if (!pd->session) { + l2tp_dfs_seq_tunnel_show(m, pd->tunnel); +- else ++ } else { + l2tp_dfs_seq_session_show(m, pd->session); ++ if (pd->session->deref) ++ pd->session->deref(pd->session); ++ l2tp_session_dec_refcount(pd->session); ++ } + + out: + return 0; +--- a/net/l2tp/l2tp_netlink.c ++++ b/net/l2tp/l2tp_netlink.c +@@ -852,7 +852,7 @@ static int l2tp_nl_cmd_session_dump(stru + goto out; + } + +- session = l2tp_session_find_nth(tunnel, si); ++ session = l2tp_session_get_nth(tunnel, si, false); + if (session == NULL) { + ti++; + tunnel = NULL; +@@ -862,8 +862,11 @@ static int l2tp_nl_cmd_session_dump(stru + + if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, +- session, L2TP_CMD_SESSION_GET) < 0) ++ session, L2TP_CMD_SESSION_GET) < 0) { ++ l2tp_session_dec_refcount(session); + break; ++ } ++ l2tp_session_dec_refcount(session); + + si++; + } +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -1555,7 +1555,7 @@ static void pppol2tp_next_tunnel(struct + + static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) + { +- pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); ++ pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); + pd->session_idx++; + + if (pd->session == NULL) { +@@ -1682,10 +1682,14 @@ static int pppol2tp_seq_show(struct seq_ + + /* Show the tunnel or session context. + */ +- if (pd->session == NULL) ++ if (!pd->session) { + pppol2tp_seq_tunnel_show(m, pd->tunnel); +- else ++ } else { + pppol2tp_seq_session_show(m, pd->session); ++ if (pd->session->deref) ++ pd->session->deref(pd->session); ++ l2tp_session_dec_refcount(pd->session); ++ } + + out: + return 0; diff --git a/queue-4.10/macvlan-fix-device-ref-leak-when-purging-bc_queue.patch b/queue-4.10/macvlan-fix-device-ref-leak-when-purging-bc_queue.patch new file mode 100644 index 00000000000..f01f69a9342 --- /dev/null +++ b/queue-4.10/macvlan-fix-device-ref-leak-when-purging-bc_queue.patch @@ -0,0 +1,52 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Herbert Xu +Date: Thu, 20 Apr 2017 20:55:12 +0800 +Subject: macvlan: Fix device ref leak when purging bc_queue + +From: Herbert Xu + + +[ Upstream commit f6478218e6edc2a587b8f132f66373baa7b2497c ] + +When a parent macvlan device is destroyed we end up purging its +broadcast queue without dropping the device reference count on +the packet source device. This causes the source device to linger. + +This patch drops that reference count. + +Fixes: 260916dfb48c ("macvlan: Fix potential use-after free for...") +Reported-by: Joe Ghalam +Signed-off-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvlan.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/net/macvlan.c ++++ b/drivers/net/macvlan.c +@@ -1140,6 +1140,7 @@ static int macvlan_port_create(struct ne + static void macvlan_port_destroy(struct net_device *dev) + { + struct macvlan_port *port = macvlan_port_get_rtnl(dev); ++ struct sk_buff *skb; + + dev->priv_flags &= ~IFF_MACVLAN_PORT; + netdev_rx_handler_unregister(dev); +@@ -1148,7 +1149,15 @@ static void macvlan_port_destroy(struct + * but we need to cancel it and purge left skbs if any. + */ + cancel_work_sync(&port->bc_work); +- __skb_queue_purge(&port->bc_queue); ++ ++ while ((skb = __skb_dequeue(&port->bc_queue))) { ++ const struct macvlan_dev *src = MACVLAN_SKB_CB(skb)->src; ++ ++ if (src) ++ dev_put(src->dev); ++ ++ kfree_skb(skb); ++ } + + kfree(port); + } diff --git a/queue-4.10/net-ipv4-fix-multipath-rtm_getroute-behavior-when-iif-is-given.patch b/queue-4.10/net-ipv4-fix-multipath-rtm_getroute-behavior-when-iif-is-given.patch new file mode 100644 index 00000000000..51b79e989b6 --- /dev/null +++ b/queue-4.10/net-ipv4-fix-multipath-rtm_getroute-behavior-when-iif-is-given.patch @@ -0,0 +1,37 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Florian Larysch +Date: Mon, 3 Apr 2017 16:46:09 +0200 +Subject: net: ipv4: fix multipath RTM_GETROUTE behavior when iif is given + +From: Florian Larysch + + +[ Upstream commit a8801799c6975601fd58ae62f48964caec2eb83f ] + +inet_rtm_getroute synthesizes a skeletal ICMP skb, which is passed to +ip_route_input when iif is given. If a multipath route is present for +the designated destination, ip_multipath_icmp_hash ends up being called, +which uses the source/destination addresses within the skb to calculate +a hash. However, those are not set in the synthetic skb, causing it to +return an arbitrary and incorrect result. + +Instead, use UDP, which gets no such special treatment. + +Signed-off-by: Florian Larysch +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2608,7 +2608,7 @@ static int inet_rtm_getroute(struct sk_b + skb_reset_network_header(skb); + + /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ +- ip_hdr(skb)->protocol = IPPROTO_ICMP; ++ ip_hdr(skb)->protocol = IPPROTO_UDP; + skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); + + src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; diff --git a/queue-4.10/net-ipv6-regenerate-host-route-if-moved-to-gc-list.patch b/queue-4.10/net-ipv6-regenerate-host-route-if-moved-to-gc-list.patch new file mode 100644 index 00000000000..c4cded49c6b --- /dev/null +++ b/queue-4.10/net-ipv6-regenerate-host-route-if-moved-to-gc-list.patch @@ -0,0 +1,81 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: David Ahern +Date: Tue, 25 Apr 2017 09:17:29 -0700 +Subject: net: ipv6: regenerate host route if moved to gc list + +From: David Ahern + + +[ Upstream commit 8048ced9beb21a52e3305f3332ae82020619f24e ] + +Taking down the loopback device wreaks havoc on IPv6 routing. By +extension, taking down a VRF device wreaks havoc on its table. + +Dmitry and Andrey both reported heap out-of-bounds reports in the IPv6 +FIB code while running syzkaller fuzzer. The root cause is a dead dst +that is on the garbage list gets reinserted into the IPv6 FIB. While on +the gc (or perhaps when it gets added to the gc list) the dst->next is +set to an IPv4 dst. A subsequent walk of the ipv6 tables causes the +out-of-bounds access. + +Andrey's reproducer was the key to getting to the bottom of this. + +With IPv6, host routes for an address have the dst->dev set to the +loopback device. When the 'lo' device is taken down, rt6_ifdown initiates +a walk of the fib evicting routes with the 'lo' device which means all +host routes are removed. That process moves the dst which is attached to +an inet6_ifaddr to the gc list and marks it as dead. + +The recent change to keep global IPv6 addresses added a new function, +fixup_permanent_addr, that is called on admin up. That function restarts +dad for an inet6_ifaddr and when it completes the host route attached +to it is inserted into the fib. Since the route was marked dead and +moved to the gc list, re-inserting the route causes the reported +out-of-bounds accesses. If the device with the address is taken down +or the address is removed, the WARN_ON in fib6_del is triggered. + +All of those faults are fixed by regenerating the host route if the +existing one has been moved to the gc list, something that can be +determined by checking if the rt6i_ref counter is 0. + +Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") +Reported-by: Dmitry Vyukov +Reported-by: Andrey Konovalov +Signed-off-by: David Ahern +Acked-by: Martin KaFai Lau +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -3263,14 +3263,24 @@ static void addrconf_gre_config(struct n + static int fixup_permanent_addr(struct inet6_dev *idev, + struct inet6_ifaddr *ifp) + { +- if (!ifp->rt) { +- struct rt6_info *rt; ++ /* rt6i_ref == 0 means the host route was removed from the ++ * FIB, for example, if 'lo' device is taken down. In that ++ * case regenerate the host route. ++ */ ++ if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) { ++ struct rt6_info *rt, *prev; + + rt = addrconf_dst_alloc(idev, &ifp->addr, false); + if (unlikely(IS_ERR(rt))) + return PTR_ERR(rt); + ++ /* ifp->rt can be accessed outside of rtnl */ ++ spin_lock(&ifp->lock); ++ prev = ifp->rt; + ifp->rt = rt; ++ spin_unlock(&ifp->lock); ++ ++ ip6_rt_put(prev); + } + + if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { diff --git a/queue-4.10/net-ipv6-rtf_pcpu-should-not-be-settable-from-userspace.patch b/queue-4.10/net-ipv6-rtf_pcpu-should-not-be-settable-from-userspace.patch new file mode 100644 index 00000000000..9060c3c346b --- /dev/null +++ b/queue-4.10/net-ipv6-rtf_pcpu-should-not-be-settable-from-userspace.patch @@ -0,0 +1,79 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: David Ahern +Date: Wed, 19 Apr 2017 14:19:43 -0700 +Subject: net: ipv6: RTF_PCPU should not be settable from userspace + +From: David Ahern + + +[ Upstream commit 557c44be917c322860665be3d28376afa84aa936 ] + +Andrey reported a fault in the IPv6 route code: + +kasan: GPF could be caused by NULL-ptr deref or user memory access +general protection fault: 0000 [#1] SMP KASAN +Modules linked in: +CPU: 1 PID: 4035 Comm: a.out Not tainted 4.11.0-rc7+ #250 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 +task: ffff880069809600 task.stack: ffff880062dc8000 +RIP: 0010:ip6_rt_cache_alloc+0xa6/0x560 net/ipv6/route.c:975 +RSP: 0018:ffff880062dced30 EFLAGS: 00010206 +RAX: dffffc0000000000 RBX: ffff8800670561c0 RCX: 0000000000000006 +RDX: 0000000000000003 RSI: ffff880062dcfb28 RDI: 0000000000000018 +RBP: ffff880062dced68 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 +R13: ffff880062dcfb28 R14: dffffc0000000000 R15: 0000000000000000 +FS: 00007feebe37e7c0(0000) GS:ffff88006cb00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00000000205a0fe4 CR3: 000000006b5c9000 CR4: 00000000000006e0 +Call Trace: + ip6_pol_route+0x1512/0x1f20 net/ipv6/route.c:1128 + ip6_pol_route_output+0x4c/0x60 net/ipv6/route.c:1212 +... + +Andrey's syzkaller program passes rtmsg.rtmsg_flags with the RTF_PCPU bit +set. Flags passed to the kernel are blindly copied to the allocated +rt6_info by ip6_route_info_create making a newly inserted route appear +as though it is a per-cpu route. ip6_rt_cache_alloc sees the flag set +and expects rt->dst.from to be set - which it is not since it is not +really a per-cpu copy. The subsequent call to __ip6_dst_alloc then +generates the fault. + +Fix by checking for the flag and failing with EINVAL. + +Fixes: d52d3997f843f ("ipv6: Create percpu rt6_info") +Reported-by: Andrey Konovalov +Signed-off-by: David Ahern +Acked-by: Martin KaFai Lau +Tested-by: Andrey Konovalov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/ipv6_route.h | 2 +- + net/ipv6/route.c | 4 ++++ + 2 files changed, 5 insertions(+), 1 deletion(-) + +--- a/include/uapi/linux/ipv6_route.h ++++ b/include/uapi/linux/ipv6_route.h +@@ -34,7 +34,7 @@ + #define RTF_PREF(pref) ((pref) << 27) + #define RTF_PREF_MASK 0x18000000 + +-#define RTF_PCPU 0x40000000 ++#define RTF_PCPU 0x40000000 /* read-only: can not be set by user */ + #define RTF_LOCAL 0x80000000 + + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1831,6 +1831,10 @@ static struct rt6_info *ip6_route_info_c + int addr_type; + int err = -EINVAL; + ++ /* RTF_PCPU is an internal flag; can not be set by userspace */ ++ if (cfg->fc_flags & RTF_PCPU) ++ goto out; ++ + if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) + goto out; + #ifndef CONFIG_IPV6_SUBTREES diff --git a/queue-4.10/net-mlx5-avoid-dereferencing-uninitialized-pointer.patch b/queue-4.10/net-mlx5-avoid-dereferencing-uninitialized-pointer.patch new file mode 100644 index 00000000000..91460b24053 --- /dev/null +++ b/queue-4.10/net-mlx5-avoid-dereferencing-uninitialized-pointer.patch @@ -0,0 +1,45 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Talat Batheesh +Date: Tue, 28 Mar 2017 16:13:41 +0300 +Subject: net/mlx5: Avoid dereferencing uninitialized pointer + +From: Talat Batheesh + + +[ Upstream commit e497ec680c4cd51e76bfcdd49363d9ab8d32a757 ] + +In NETDEV_CHANGEUPPER event the upper_info field is valid +only when linking is true. Otherwise it should be ignored. + +Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature) +Signed-off-by: Talat Batheesh +Reviewed-by: Aviv Heller +Reviewed-by: Moni Shoua +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/lag.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c +@@ -294,7 +294,7 @@ static int mlx5_handle_changeupper_event + struct netdev_notifier_changeupper_info *info) + { + struct net_device *upper = info->upper_dev, *ndev_tmp; +- struct netdev_lag_upper_info *lag_upper_info; ++ struct netdev_lag_upper_info *lag_upper_info = NULL; + bool is_bonded; + int bond_status = 0; + int num_slaves = 0; +@@ -303,7 +303,8 @@ static int mlx5_handle_changeupper_event + if (!netif_is_lag_master(upper)) + return 0; + +- lag_upper_info = info->upper_info; ++ if (info->linking) ++ lag_upper_info = info->upper_info; + + /* The event may still be of interest if the slave does not belong to + * us, but is enslaved to a master which has one or more of our netdevs diff --git a/queue-4.10/net-mlx5-e-switch-correctly-deal-with-inline-mode-on-connectx-5.patch b/queue-4.10/net-mlx5-e-switch-correctly-deal-with-inline-mode-on-connectx-5.patch new file mode 100644 index 00000000000..61bb1b4b9ee --- /dev/null +++ b/queue-4.10/net-mlx5-e-switch-correctly-deal-with-inline-mode-on-connectx-5.patch @@ -0,0 +1,122 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Or Gerlitz +Date: Thu, 30 Mar 2017 15:56:10 +0200 +Subject: net/mlx5: E-Switch, Correctly deal with inline mode on ConnectX-5 + +From: Or Gerlitz + + +[ Upstream commit c415f704c8276bc686abcb0497bf2606038ca73c ] + +On ConnectX5 the wqe inline mode is "none" and hence the FW +reports MLX5_CAP_INLINE_MODE_NOT_REQUIRED. + +Fix our devlink callbacks to deal with that on get and set. + +Also fix the tc flow parsing code not to fail anything when +inline isn't required. + +Fixes: bffaa916588e ('net/mlx5: E-Switch, Add control for inline mode') +Signed-off-by: Or Gerlitz +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 - + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 36 ++++++++----- + 2 files changed, 26 insertions(+), 13 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -611,7 +611,8 @@ static int parse_cls_flower(struct mlx5e + + if (!err && esw->mode == SRIOV_OFFLOADS && + rep->vport != FDB_UPLINK_VPORT) { +- if (min_inline > esw->offloads.inline_mode) { ++ if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && ++ esw->offloads.inline_mode < min_inline) { + netdev_warn(priv->netdev, + "Flow is not offloaded due to min inline setting, required %d actual %d\n", + min_inline, esw->offloads.inline_mode); +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +@@ -908,8 +908,7 @@ int mlx5_devlink_eswitch_inline_mode_set + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + int num_vports = esw->enabled_vports; +- int err; +- int vport; ++ int err, vport; + u8 mlx5_mode; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) +@@ -918,9 +917,17 @@ int mlx5_devlink_eswitch_inline_mode_set + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + +- if (MLX5_CAP_ETH(dev, wqe_inline_mode) != +- MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) ++ switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { ++ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: ++ if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) ++ return 0; ++ /* fall through */ ++ case MLX5_CAP_INLINE_MODE_L2: ++ esw_warn(dev, "Inline mode can't be set\n"); + return -EOPNOTSUPP; ++ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: ++ break; ++ } + + if (esw->offloads.num_flows > 0) { + esw_warn(dev, "Can't set inline mode when flows are configured\n"); +@@ -963,18 +970,14 @@ int mlx5_devlink_eswitch_inline_mode_get + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + +- if (MLX5_CAP_ETH(dev, wqe_inline_mode) != +- MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) +- return -EOPNOTSUPP; +- + return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); + } + + int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) + { ++ u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; + struct mlx5_core_dev *dev = esw->dev; + int vport; +- u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; +@@ -982,10 +985,18 @@ int mlx5_eswitch_inline_mode_get(struct + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + +- if (MLX5_CAP_ETH(dev, wqe_inline_mode) != +- MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) +- return -EOPNOTSUPP; ++ switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { ++ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: ++ mlx5_mode = MLX5_INLINE_MODE_NONE; ++ goto out; ++ case MLX5_CAP_INLINE_MODE_L2: ++ mlx5_mode = MLX5_INLINE_MODE_L2; ++ goto out; ++ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: ++ goto query_vports; ++ } + ++query_vports: + for (vport = 1; vport <= nvfs; vport++) { + mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); + if (vport > 1 && prev_mlx5_mode != mlx5_mode) +@@ -993,6 +1004,7 @@ int mlx5_eswitch_inline_mode_get(struct + prev_mlx5_mode = mlx5_mode; + } + ++out: + *mode = mlx5_mode; + return 0; + } diff --git a/queue-4.10/net-mlx5-fix-driver-load-bad-flow-when-having-fw-initializing-timeout.patch b/queue-4.10/net-mlx5-fix-driver-load-bad-flow-when-having-fw-initializing-timeout.patch new file mode 100644 index 00000000000..ac4de69d226 --- /dev/null +++ b/queue-4.10/net-mlx5-fix-driver-load-bad-flow-when-having-fw-initializing-timeout.patch @@ -0,0 +1,33 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Mohamad Haj Yahia +Date: Thu, 30 Mar 2017 17:00:25 +0300 +Subject: net/mlx5: Fix driver load bad flow when having fw initializing timeout + +From: Mohamad Haj Yahia + + +[ Upstream commit 55378a238e04b39cc82957d91d16499704ea719b ] + +If FW is stuck in initializing state we will skip the driver load, but +current error handling flow doesn't clean previously allocated command +interface resources. + +Fixes: e3297246c2c8 ('net/mlx5_core: Wait for FW readiness on startup') +Signed-off-by: Mohamad Haj Yahia +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1001,7 +1001,7 @@ static int mlx5_load_one(struct mlx5_cor + if (err) { + dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n", + FW_INIT_TIMEOUT_MILI); +- goto out_err; ++ goto err_cmd_cleanup; + } + + err = mlx5_core_enable_hca(dev, 0); diff --git a/queue-4.10/net-mlx5e-fix-ethtool_grxclsrlall-handling.patch b/queue-4.10/net-mlx5e-fix-ethtool_grxclsrlall-handling.patch new file mode 100644 index 00000000000..ed45cff3816 --- /dev/null +++ b/queue-4.10/net-mlx5e-fix-ethtool_grxclsrlall-handling.patch @@ -0,0 +1,44 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Ilan Tayari +Date: Thu, 2 Mar 2017 15:49:45 +0200 +Subject: net/mlx5e: Fix ETHTOOL_GRXCLSRLALL handling + +From: Ilan Tayari + + +[ Upstream commit 5e82c9e4ed60beba83f46a1a5a8307b99a23e982 ] + +Handler for ETHTOOL_GRXCLSRLALL must set info->data to the size +of the table, regardless of the amount of entries in it. +Existing code does not do that, and this breaks all usage of ethtool -N +or -n without explicit location, with this error: +rmgr: Invalid RX class rules table size: Success + +Set info->data to the table size. + +Tested: +ethtool -n ens8 +ethtool -N ens8 flow-type ip4 src-ip 1.1.1.1 dst-ip 2.2.2.2 action 1 +ethtool -N ens8 flow-type ip4 src-ip 1.1.1.1 dst-ip 2.2.2.2 action 1 loc 55 +ethtool -n ens8 +ethtool -N ens8 delete 1023 +ethtool -N ens8 delete 55 + +Fixes: f913a72aa008 ("net/mlx5e: Add support to get ethtool flow rules") +Signed-off-by: Ilan Tayari +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +@@ -564,6 +564,7 @@ int mlx5e_ethtool_get_all_flows(struct m + int idx = 0; + int err = 0; + ++ info->data = MAX_NUM_OF_ETHTOOL_RULES; + while ((!err || err == -ENOENT) && idx < info->rule_cnt) { + err = mlx5e_ethtool_get_flow(priv, info, location); + if (!err) diff --git a/queue-4.10/net-mlx5e-fix-small-packet-threshold.patch b/queue-4.10/net-mlx5e-fix-small-packet-threshold.patch new file mode 100644 index 00000000000..666c01e1022 --- /dev/null +++ b/queue-4.10/net-mlx5e-fix-small-packet-threshold.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Eugenia Emantayev +Date: Wed, 22 Mar 2017 11:44:14 +0200 +Subject: net/mlx5e: Fix small packet threshold + +From: Eugenia Emantayev + + +[ Upstream commit cbad8cddb6ed7ef3a5f0a9a70f1711d4d7fb9a8f ] + +RX packet headers are meant to be contained in SKB linear part, +and chose a threshold of 128. +It turns out this is not enough, i.e. for IPv6 packet over VxLAN. +In this case, UDP/IPv4 needs 42 bytes, GENEVE header is 8 bytes, +and 86 bytes for TCP/IPv6. In total 136 bytes that is more than +current 128 bytes. In this case expand header flow is reached. +The warning in skb_try_coalesce() caused by a wrong truesize +was already fixed here: +commit 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()"). +Still, we prefer to totally avoid the expand header flow for performance reasons. +Tested regular TCP_STREAM with iperf for 1 and 8 streams, no degradation was found. + +Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)") +Signed-off-by: Eugenia Emantayev +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -82,7 +82,7 @@ + #define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX) + + #define MLX5_UMR_ALIGN (2048) +-#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) ++#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (256) + + #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) + #define MLX5E_DEFAULT_LRO_TIMEOUT 32 diff --git a/queue-4.10/net-neigh-guard-against-null-solicit-method.patch b/queue-4.10/net-neigh-guard-against-null-solicit-method.patch new file mode 100644 index 00000000000..c8ca6155072 --- /dev/null +++ b/queue-4.10/net-neigh-guard-against-null-solicit-method.patch @@ -0,0 +1,38 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Eric Dumazet +Date: Thu, 23 Mar 2017 12:39:21 -0700 +Subject: net: neigh: guard against NULL solicit() method + +From: Eric Dumazet + + +[ Upstream commit 48481c8fa16410ffa45939b13b6c53c2ca609e5f ] + +Dmitry posted a nice reproducer of a bug triggering in neigh_probe() +when dereferencing a NULL neigh->ops->solicit method. + +This can happen for arp_direct_ops/ndisc_direct_ops and similar, +which can be used for NUD_NOARP neighbours (created when dev->header_ops +is NULL). Admin can then force changing nud_state to some other state +that would fire neigh timer. + +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/neighbour.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -860,7 +860,8 @@ static void neigh_probe(struct neighbour + if (skb) + skb = skb_clone(skb, GFP_ATOMIC); + write_unlock(&neigh->lock); +- neigh->ops->solicit(neigh, skb); ++ if (neigh->ops->solicit) ++ neigh->ops->solicit(neigh, skb); + atomic_inc(&neigh->probes); + kfree_skb(skb); + } diff --git a/queue-4.10/net-packet-fix-overflow-in-check-for-tp_frame_nr.patch b/queue-4.10/net-packet-fix-overflow-in-check-for-tp_frame_nr.patch new file mode 100644 index 00000000000..77c670e4d55 --- /dev/null +++ b/queue-4.10/net-packet-fix-overflow-in-check-for-tp_frame_nr.patch @@ -0,0 +1,37 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Andrey Konovalov +Date: Wed, 29 Mar 2017 16:11:21 +0200 +Subject: net/packet: fix overflow in check for tp_frame_nr + +From: Andrey Konovalov + + +[ Upstream commit 8f8d28e4d6d815a391285e121c3a53a0b6cb9e7b ] + +When calculating rb->frames_per_block * req->tp_block_nr the result +can overflow. + +Add a check that tp_block_size * tp_block_nr <= UINT_MAX. + +Since frames_per_block <= tp_block_size, the expression would +never overflow. + +Signed-off-by: Andrey Konovalov +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -4189,6 +4189,8 @@ static int packet_set_ring(struct sock * + rb->frames_per_block = req->tp_block_size / req->tp_frame_size; + if (unlikely(rb->frames_per_block == 0)) + goto out; ++ if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) ++ goto out; + if (unlikely((rb->frames_per_block * req->tp_block_nr) != + req->tp_frame_nr)) + goto out; diff --git a/queue-4.10/net-packet-fix-overflow-in-check-for-tp_reserve.patch b/queue-4.10/net-packet-fix-overflow-in-check-for-tp_reserve.patch new file mode 100644 index 00000000000..c47e085ea76 --- /dev/null +++ b/queue-4.10/net-packet-fix-overflow-in-check-for-tp_reserve.patch @@ -0,0 +1,33 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Andrey Konovalov +Date: Wed, 29 Mar 2017 16:11:22 +0200 +Subject: net/packet: fix overflow in check for tp_reserve + +From: Andrey Konovalov + + +[ Upstream commit bcc5364bdcfe131e6379363f089e7b4108d35b70 ] + +When calculating po->tp_hdrlen + po->tp_reserve the result can overflow. + +Fix by checking that tp_reserve <= INT_MAX on assign. + +Signed-off-by: Andrey Konovalov +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -3644,6 +3644,8 @@ packet_setsockopt(struct socket *sock, i + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; ++ if (val > INT_MAX) ++ return -EINVAL; + po->tp_reserve = val; + return 0; + } diff --git a/queue-4.10/net-phy-fix-auto-negotiation-stall-due-to-unavailable-interrupt.patch b/queue-4.10/net-phy-fix-auto-negotiation-stall-due-to-unavailable-interrupt.patch new file mode 100644 index 00000000000..206f6fa5d9b --- /dev/null +++ b/queue-4.10/net-phy-fix-auto-negotiation-stall-due-to-unavailable-interrupt.patch @@ -0,0 +1,129 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Alexander Kochetkov +Date: Thu, 20 Apr 2017 14:00:04 +0300 +Subject: net: phy: fix auto-negotiation stall due to unavailable interrupt + +From: Alexander Kochetkov + + +[ Upstream commit f555f34fdc586a56204cd16d9a7c104ec6cb6650 ] + +The Ethernet link on an interrupt driven PHY was not coming up if the Ethernet +cable was plugged before the Ethernet interface was brought up. + +The patch trigger PHY state machine to update link state if PHY was requested to +do auto-negotiation and auto-negotiation complete flag already set. + +During power-up cycle the PHY do auto-negotiation, generate interrupt and set +auto-negotiation complete flag. Interrupt is handled by PHY state machine but +doesn't update link state because PHY is in PHY_READY state. After some time +MAC bring up, start and request PHY to do auto-negotiation. If there are no new +settings to advertise genphy_config_aneg() doesn't start PHY auto-negotiation. +PHY continue to stay in auto-negotiation complete state and doesn't fire +interrupt. At the same time PHY state machine expect that PHY started +auto-negotiation and is waiting for interrupt from PHY and it won't get it. + +Fixes: 321beec5047a ("net: phy: Use interrupts when available in NOLINK state") +Signed-off-by: Alexander Kochetkov +Cc: stable # v4.9+ +Tested-by: Roger Quadros +Tested-by: Alexandre Belloni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy.c | 40 ++++++++++++++++++++++++++++++++++++---- + include/linux/phy.h | 1 + + 2 files changed, 37 insertions(+), 4 deletions(-) + +--- a/drivers/net/phy/phy.c ++++ b/drivers/net/phy/phy.c +@@ -591,16 +591,18 @@ int phy_mii_ioctl(struct phy_device *phy + EXPORT_SYMBOL(phy_mii_ioctl); + + /** +- * phy_start_aneg - start auto-negotiation for this PHY device ++ * phy_start_aneg_priv - start auto-negotiation for this PHY device + * @phydev: the phy_device struct ++ * @sync: indicate whether we should wait for the workqueue cancelation + * + * Description: Sanitizes the settings (if we're not autonegotiating + * them), and then calls the driver's config_aneg function. + * If the PHYCONTROL Layer is operating, we change the state to + * reflect the beginning of Auto-negotiation or forcing. + */ +-int phy_start_aneg(struct phy_device *phydev) ++static int phy_start_aneg_priv(struct phy_device *phydev, bool sync) + { ++ bool trigger = 0; + int err; + + mutex_lock(&phydev->lock); +@@ -625,10 +627,40 @@ int phy_start_aneg(struct phy_device *ph + } + } + ++ /* Re-schedule a PHY state machine to check PHY status because ++ * negotiation may already be done and aneg interrupt may not be ++ * generated. ++ */ ++ if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) { ++ err = phy_aneg_done(phydev); ++ if (err > 0) { ++ trigger = true; ++ err = 0; ++ } ++ } ++ + out_unlock: + mutex_unlock(&phydev->lock); ++ ++ if (trigger) ++ phy_trigger_machine(phydev, sync); ++ + return err; + } ++ ++/** ++ * phy_start_aneg - start auto-negotiation for this PHY device ++ * @phydev: the phy_device struct ++ * ++ * Description: Sanitizes the settings (if we're not autonegotiating ++ * them), and then calls the driver's config_aneg function. ++ * If the PHYCONTROL Layer is operating, we change the state to ++ * reflect the beginning of Auto-negotiation or forcing. ++ */ ++int phy_start_aneg(struct phy_device *phydev) ++{ ++ return phy_start_aneg_priv(phydev, true); ++} + EXPORT_SYMBOL(phy_start_aneg); + + /** +@@ -656,7 +688,7 @@ void phy_start_machine(struct phy_device + * state machine runs. + */ + +-static void phy_trigger_machine(struct phy_device *phydev, bool sync) ++void phy_trigger_machine(struct phy_device *phydev, bool sync) + { + if (sync) + cancel_delayed_work_sync(&phydev->state_queue); +@@ -1151,7 +1183,7 @@ void phy_state_machine(struct work_struc + mutex_unlock(&phydev->lock); + + if (needs_aneg) +- err = phy_start_aneg(phydev); ++ err = phy_start_aneg_priv(phydev, false); + else if (do_suspend) + phy_suspend(phydev); + +--- a/include/linux/phy.h ++++ b/include/linux/phy.h +@@ -840,6 +840,7 @@ void phy_change_work(struct work_struct + void phy_mac_interrupt(struct phy_device *phydev, int new_link); + void phy_start_machine(struct phy_device *phydev); + void phy_stop_machine(struct phy_device *phydev); ++void phy_trigger_machine(struct phy_device *phydev, bool sync); + int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); + int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); + int phy_ethtool_ksettings_get(struct phy_device *phydev, diff --git a/queue-4.10/net-phy-handle-state-correctly-in-phy_stop_machine.patch b/queue-4.10/net-phy-handle-state-correctly-in-phy_stop_machine.patch new file mode 100644 index 00000000000..845304d1a4b --- /dev/null +++ b/queue-4.10/net-phy-handle-state-correctly-in-phy_stop_machine.patch @@ -0,0 +1,37 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Nathan Sullivan +Date: Wed, 22 Mar 2017 15:27:01 -0500 +Subject: net: phy: handle state correctly in phy_stop_machine + +From: Nathan Sullivan + + +[ Upstream commit 49d52e8108a21749dc2114b924c907db43358984 ] + +If the PHY is halted on stop, then do not set the state to PHY_UP. This +ensures the phy will be restarted later in phy_start when the machine is +started again. + +Fixes: 00db8189d984 ("This patch adds a PHY Abstraction Layer to the Linux Kernel, enabling ethernet drivers to remain as ignorant as is reasonable of the connected PHY's design and operation details.") +Signed-off-by: Nathan Sullivan +Signed-off-by: Brad Mouring +Acked-by: Xander Huff +Acked-by: Kyle Roeschley +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/phy/phy.c ++++ b/drivers/net/phy/phy.c +@@ -678,7 +678,7 @@ void phy_stop_machine(struct phy_device + cancel_delayed_work_sync(&phydev->state_queue); + + mutex_lock(&phydev->lock); +- if (phydev->state > PHY_UP) ++ if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) + phydev->state = PHY_UP; + mutex_unlock(&phydev->lock); + } diff --git a/queue-4.10/net-timestamp-avoid-use-after-free-in-ip_recv_error.patch b/queue-4.10/net-timestamp-avoid-use-after-free-in-ip_recv_error.patch new file mode 100644 index 00000000000..99d37f98c47 --- /dev/null +++ b/queue-4.10/net-timestamp-avoid-use-after-free-in-ip_recv_error.patch @@ -0,0 +1,105 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Willem de Bruijn +Date: Wed, 12 Apr 2017 19:24:35 -0400 +Subject: net-timestamp: avoid use-after-free in ip_recv_error + +From: Willem de Bruijn + + +[ Upstream commit 1862d6208db0aeca9c8ace44915b08d5ab2cd667 ] + +Syzkaller reported a use-after-free in ip_recv_error at line + + info->ipi_ifindex = skb->dev->ifindex; + +This function is called on dequeue from the error queue, at which +point the device pointer may no longer be valid. + +Save ifindex on enqueue in __skb_complete_tx_timestamp, when the +pointer is valid or NULL. Store it in temporary storage skb->cb. + +It is safe to reference skb->dev here, as called from device drivers +or dev_queue_xmit. The exception is when called from tcp_ack_tstamp; +in that case it is NULL and ifindex is set to 0 (invalid). + +Do not return a pktinfo cmsg if ifindex is 0. This maintains the +current behavior of not returning a cmsg if skb->dev was NULL. + +On dequeue, the ipv4 path will cast from sock_exterr_skb to +in_pktinfo. Both have ifindex as their first element, so no explicit +conversion is needed. This is by design, introduced in commit +0b922b7a829c ("net: original ingress device index in PKTINFO"). For +ipv6 ip6_datagram_support_cmsg converts to in6_pktinfo. + +Fixes: 829ae9d61165 ("net-timestamp: allow reading recv cmsg on errqueue with origin tstamp") +Reported-by: Andrey Konovalov +Signed-off-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 1 + + net/ipv4/ip_sockglue.c | 9 ++++----- + net/ipv6/datagram.c | 10 +--------- + 3 files changed, 6 insertions(+), 14 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3789,6 +3789,7 @@ static void __skb_complete_tx_timestamp( + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + serr->ee.ee_info = tstype; ++ serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; + if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { + serr->ee.ee_data = skb_shinfo(skb)->tskey; + if (sk->sk_protocol == IPPROTO_TCP && +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -488,16 +488,15 @@ static bool ipv4_datagram_support_cmsg(c + return false; + + /* Support IP_PKTINFO on tstamp packets if requested, to correlate +- * timestamp with egress dev. Not possible for packets without dev ++ * timestamp with egress dev. Not possible for packets without iif + * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). + */ +- if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || +- (!skb->dev)) ++ info = PKTINFO_SKB_CB(skb); ++ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || ++ !info->ipi_ifindex) + return false; + +- info = PKTINFO_SKB_CB(skb); + info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; +- info->ipi_ifindex = skb->dev->ifindex; + return true; + } + +--- a/net/ipv6/datagram.c ++++ b/net/ipv6/datagram.c +@@ -405,9 +405,6 @@ static inline bool ipv6_datagram_support + * At one point, excluding local errors was a quick test to identify icmp/icmp6 + * errors. This is no longer true, but the test remained, so the v6 stack, + * unlike v4, also honors cmsg requests on all wifi and timestamp errors. +- * +- * Timestamp code paths do not initialize the fields expected by cmsg: +- * the PKTINFO fields in skb->cb[]. Fill those in here. + */ + static bool ip6_datagram_support_cmsg(struct sk_buff *skb, + struct sock_exterr_skb *serr) +@@ -419,14 +416,9 @@ static bool ip6_datagram_support_cmsg(st + if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) + return false; + +- if (!skb->dev) ++ if (!IP6CB(skb)->iif) + return false; + +- if (skb->protocol == htons(ETH_P_IPV6)) +- IP6CB(skb)->iif = skb->dev->ifindex; +- else +- PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex; +- + return true; + } + diff --git a/queue-4.10/net-vrf-fix-setting-nlm_f_excl-flag-when-adding-l3mdev-rule.patch b/queue-4.10/net-vrf-fix-setting-nlm_f_excl-flag-when-adding-l3mdev-rule.patch new file mode 100644 index 00000000000..720f23b45cc --- /dev/null +++ b/queue-4.10/net-vrf-fix-setting-nlm_f_excl-flag-when-adding-l3mdev-rule.patch @@ -0,0 +1,31 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: David Ahern +Date: Thu, 13 Apr 2017 10:57:15 -0600 +Subject: net: vrf: Fix setting NLM_F_EXCL flag when adding l3mdev rule + +From: David Ahern + + +[ Upstream commit 426c87caa2b4578b43cd3f689f02c65b743b2559 ] + +Only need 1 l3mdev FIB rule. Fix setting NLM_F_EXCL in the nlmsghdr. + +Fixes: 1aa6c4f6b8cd8 ("net: vrf: Add l3mdev rules on first device create") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -1126,7 +1126,7 @@ static int vrf_fib_rule(const struct net + goto nla_put_failure; + + /* rule only needs to appear once */ +- nlh->nlmsg_flags &= NLM_F_EXCL; ++ nlh->nlmsg_flags |= NLM_F_EXCL; + + frh = nlmsg_data(nlh); + memset(frh, 0, sizeof(*frh)); diff --git a/queue-4.10/netpoll-check-for-skb-queue_mapping.patch b/queue-4.10/netpoll-check-for-skb-queue_mapping.patch new file mode 100644 index 00000000000..0dc7ee0f1b2 --- /dev/null +++ b/queue-4.10/netpoll-check-for-skb-queue_mapping.patch @@ -0,0 +1,104 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Tushar Dave +Date: Thu, 20 Apr 2017 15:57:31 -0700 +Subject: netpoll: Check for skb->queue_mapping + +From: Tushar Dave + + +[ Upstream commit c70b17b775edb21280e9de7531acf6db3b365274 ] + +Reducing real_num_tx_queues needs to be in sync with skb queue_mapping +otherwise skbs with queue_mapping greater than real_num_tx_queues +can be sent to the underlying driver and can result in kernel panic. + +One such event is running netconsole and enabling VF on the same +device. Or running netconsole and changing number of tx queues via +ethtool on same device. + +e.g. +Unable to handle kernel NULL pointer dereference +tsk->{mm,active_mm}->context = 0000000000001525 +tsk->{mm,active_mm}->pgd = fff800130ff9a000 + \|/ ____ \|/ + "@'/ .. \`@" + /_| \__/ |_\ + \__U_/ +kworker/48:1(475): Oops [#1] +CPU: 48 PID: 475 Comm: kworker/48:1 Tainted: G OE +4.11.0-rc3-davem-net+ #7 +Workqueue: events queue_process +task: fff80013113299c0 task.stack: fff800131132c000 +TSTATE: 0000004480e01600 TPC: 00000000103f9e3c TNPC: 00000000103f9e40 Y: +00000000 Tainted: G OE +TPC: +g0: 0000000000000000 g1: 0000000000003fff g2: 0000000000000000 g3: +0000000000000001 +g4: fff80013113299c0 g5: fff8001fa6808000 g6: fff800131132c000 g7: +00000000000000c0 +o0: fff8001fa760c460 o1: fff8001311329a50 o2: fff8001fa7607504 o3: +0000000000000003 +o4: fff8001f96e63a40 o5: fff8001311d77ec0 sp: fff800131132f0e1 ret_pc: +000000000049ed94 +RPC: +l0: 0000000000000000 l1: 0000000000000800 l2: 0000000000000000 l3: +0000000000000000 +l4: 000b2aa30e34b10d l5: 0000000000000000 l6: 0000000000000000 l7: +fff8001fa7605028 +i0: fff80013111a8a00 i1: fff80013155a0780 i2: 0000000000000000 i3: +0000000000000000 +i4: 0000000000000000 i5: 0000000000100000 i6: fff800131132f1a1 i7: +00000000103fa4b0 +I7: +Call Trace: + [00000000103fa4b0] ixgbe_xmit_frame+0x30/0xa0 [ixgbe] + [0000000000998c74] netpoll_start_xmit+0xf4/0x200 + [0000000000998e10] queue_process+0x90/0x160 + [0000000000485fa8] process_one_work+0x188/0x480 + [0000000000486410] worker_thread+0x170/0x4c0 + [000000000048c6b8] kthread+0xd8/0x120 + [0000000000406064] ret_from_fork+0x1c/0x2c + [0000000000000000] (null) +Disabling lock debugging due to kernel taint +Caller[00000000103fa4b0]: ixgbe_xmit_frame+0x30/0xa0 [ixgbe] +Caller[0000000000998c74]: netpoll_start_xmit+0xf4/0x200 +Caller[0000000000998e10]: queue_process+0x90/0x160 +Caller[0000000000485fa8]: process_one_work+0x188/0x480 +Caller[0000000000486410]: worker_thread+0x170/0x4c0 +Caller[000000000048c6b8]: kthread+0xd8/0x120 +Caller[0000000000406064]: ret_from_fork+0x1c/0x2c +Caller[0000000000000000]: (null) + +Signed-off-by: Tushar Dave +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/netpoll.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/net/core/netpoll.c ++++ b/net/core/netpoll.c +@@ -105,15 +105,21 @@ static void queue_process(struct work_st + while ((skb = skb_dequeue(&npinfo->txq))) { + struct net_device *dev = skb->dev; + struct netdev_queue *txq; ++ unsigned int q_index; + + if (!netif_device_present(dev) || !netif_running(dev)) { + kfree_skb(skb); + continue; + } + +- txq = skb_get_tx_queue(dev, skb); +- + local_irq_save(flags); ++ /* check if skb->queue_mapping is still valid */ ++ q_index = skb_get_queue_mapping(skb); ++ if (unlikely(q_index >= dev->real_num_tx_queues)) { ++ q_index = q_index % dev->real_num_tx_queues; ++ skb_set_queue_mapping(skb, q_index); ++ } ++ txq = netdev_get_tx_queue(dev, q_index); + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (netif_xmit_frozen_or_stopped(txq) || + netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { diff --git a/queue-4.10/openvswitch-fix-ovs_flow_key_update.patch b/queue-4.10/openvswitch-fix-ovs_flow_key_update.patch new file mode 100644 index 00000000000..8871c2ddb7a --- /dev/null +++ b/queue-4.10/openvswitch-fix-ovs_flow_key_update.patch @@ -0,0 +1,53 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Yi-Hung Wei +Date: Thu, 30 Mar 2017 12:36:03 -0700 +Subject: openvswitch: Fix ovs_flow_key_update() + +From: Yi-Hung Wei + + +[ Upstream commit 6f56f6186c18e3fd54122b73da68e870687b8c59 ] + +ovs_flow_key_update() is called when the flow key is invalid, and it is +used to update and revalidate the flow key. Commit 329f45bc4f19 +("openvswitch: add mac_proto field to the flow key") introduces mac_proto +field to flow key and use it to determine whether the flow key is valid. +However, the commit does not update the code path in ovs_flow_key_update() +to revalidate the flow key which may cause BUG_ON() on execute_recirc(). +This patch addresses the aforementioned issue. + +Fixes: 329f45bc4f19 ("openvswitch: add mac_proto field to the flow key") +Signed-off-by: Yi-Hung Wei +Acked-by: Jiri Benc +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/flow.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/net/openvswitch/flow.c ++++ b/net/openvswitch/flow.c +@@ -527,7 +527,7 @@ static int key_extract(struct sk_buff *s + + /* Link layer. */ + clear_vlan(key); +- if (key->mac_proto == MAC_PROTO_NONE) { ++ if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { + if (unlikely(eth_type_vlan(skb->protocol))) + return -EINVAL; + +@@ -745,7 +745,13 @@ static int key_extract(struct sk_buff *s + + int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) + { +- return key_extract(skb, key); ++ int res; ++ ++ res = key_extract(skb, key); ++ if (!res) ++ key->mac_proto &= ~SW_FLOW_KEY_INVALID; ++ ++ return res; + } + + static int key_extract_mac_proto(struct sk_buff *skb) diff --git a/queue-4.10/sctp-listen-on-the-sock-only-when-it-s-state-is-listening-or-closed.patch b/queue-4.10/sctp-listen-on-the-sock-only-when-it-s-state-is-listening-or-closed.patch new file mode 100644 index 00000000000..bcb3a813248 --- /dev/null +++ b/queue-4.10/sctp-listen-on-the-sock-only-when-it-s-state-is-listening-or-closed.patch @@ -0,0 +1,39 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Xin Long +Date: Thu, 6 Apr 2017 13:10:52 +0800 +Subject: sctp: listen on the sock only when it's state is listening or closed + +From: Xin Long + + +[ Upstream commit 34b2789f1d9bf8dcca9b5cb553d076ca2cd898ee ] + +Now sctp doesn't check sock's state before listening on it. It could +even cause changing a sock with any state to become a listening sock +when doing sctp_listen. + +This patch is to fix it by checking sock's state in sctp_listen, so +that it will listen on the sock with right state. + +Reported-by: Andrey Konovalov +Tested-by: Andrey Konovalov +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -6860,6 +6860,9 @@ int sctp_inet_listen(struct socket *sock + if (sock->state != SS_UNCONNECTED) + goto out; + ++ if (!sctp_sstate(sk, LISTENING) && !sctp_sstate(sk, CLOSED)) ++ goto out; ++ + /* If backlog is zero, disable listening. */ + if (!backlog) { + if (sctp_sstate(sk, CLOSED)) diff --git a/queue-4.10/secure_seq-downgrade-to-per-host-timestamp-offsets.patch b/queue-4.10/secure_seq-downgrade-to-per-host-timestamp-offsets.patch new file mode 100644 index 00000000000..2f0296ba7c3 --- /dev/null +++ b/queue-4.10/secure_seq-downgrade-to-per-host-timestamp-offsets.patch @@ -0,0 +1,102 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Florian Westphal +Date: Sat, 25 Mar 2017 10:58:24 +0100 +Subject: secure_seq: downgrade to per-host timestamp offsets + +From: Florian Westphal + + +[ Upstream commit 28ee1b746f493b7c62347d714f58fbf4f70df4f0 ] + +Unfortunately too many devices (not under our control) use tcp_tw_recycle=1, +which depends on timestamps being identical of the same saddr. + +Although tcp_tw_recycle got removed in net-next we can't make +such end hosts disappear so downgrade to per-host timestamp offsets. + +4.10 note: original patch uses siphash (added in 4.11), since +ts_off is only used to obscure uptime (and doesn't use same secret +as isn generator) this uses jhash instead. + +Cc: Soheil Hassas Yeganeh +Cc: Eric Dumazet +Cc: Neal Cardwell +Cc: Yuchung Cheng +Reported-by: Yvan Vanrossomme +Fixes: 95a22caee396c ("tcp: randomize tcp timestamp offsets for each connection") +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/secure_seq.c | 29 +++++++++++++++++++++++++++-- + 1 file changed, 27 insertions(+), 2 deletions(-) + +--- a/net/core/secure_seq.c ++++ b/net/core/secure_seq.c +@@ -16,9 +16,11 @@ + #define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) + + static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; ++static u32 ts_secret[2]; + + static __always_inline void net_secret_init(void) + { ++ net_get_random_once(ts_secret, sizeof(ts_secret)); + net_get_random_once(net_secret, sizeof(net_secret)); + } + #endif +@@ -41,6 +43,21 @@ static u32 seq_scale(u32 seq) + #endif + + #if IS_ENABLED(CONFIG_IPV6) ++static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) ++{ ++ u32 hash[4 + 4 + 1]; ++ ++ if (sysctl_tcp_timestamps != 1) ++ return 0; ++ ++ memcpy(hash, saddr, 16); ++ memcpy(hash + 4, daddr, 16); ++ ++ hash[8] = ts_secret[0]; ++ ++ return jhash2(hash, ARRAY_SIZE(hash), ts_secret[1]); ++} ++ + u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport, u32 *tsoff) + { +@@ -59,7 +76,7 @@ u32 secure_tcpv6_sequence_number(const _ + + md5_transform(hash, secret); + +- *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0; ++ *tsoff = secure_tcpv6_ts_off(saddr, daddr); + return seq_scale(hash[0]); + } + EXPORT_SYMBOL(secure_tcpv6_sequence_number); +@@ -87,6 +104,14 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral + #endif + + #ifdef CONFIG_INET ++static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) ++{ ++ if (sysctl_tcp_timestamps != 1) ++ return 0; ++ ++ return jhash_3words((__force u32)saddr, (__force u32)daddr, ++ ts_secret[0], ts_secret[1]); ++} + + u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, u32 *tsoff) +@@ -101,7 +126,7 @@ u32 secure_tcp_sequence_number(__be32 sa + + md5_transform(hash, net_secret); + +- *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0; ++ *tsoff = secure_tcp_ts_off(saddr, daddr); + return seq_scale(hash[0]); + } + diff --git a/queue-4.10/series b/queue-4.10/series index 6486ed51938..b42039659b2 100644 --- a/queue-4.10/series +++ b/queue-4.10/series @@ -1 +1,43 @@ ping-implement-proper-locking.patch +sparc64-kern_addr_valid-regression.patch +sparc64-fix-kernel-panic-due-to-erroneous-ifdef-surrounding-pmd_write.patch +net-neigh-guard-against-null-solicit-method.patch +net-phy-handle-state-correctly-in-phy_stop_machine.patch +kcm-return-immediately-after-copy_from_user-failure.patch +secure_seq-downgrade-to-per-host-timestamp-offsets.patch +bpf-improve-verifier-packet-range-checks.patch +net-mlx5-avoid-dereferencing-uninitialized-pointer.patch +l2tp-hold-tunnel-socket-when-handling-control-frames-in-l2tp_ip-and-l2tp_ip6.patch +l2tp-purge-socket-queues-in-the-.destruct-callback.patch +net-packet-fix-overflow-in-check-for-tp_frame_nr.patch +net-packet-fix-overflow-in-check-for-tp_reserve.patch +openvswitch-fix-ovs_flow_key_update.patch +l2tp-take-reference-on-sessions-being-dumped.patch +l2tp-fix-ppp-pseudo-wire-auto-loading.patch +net-ipv4-fix-multipath-rtm_getroute-behavior-when-iif-is-given.patch +sctp-listen-on-the-sock-only-when-it-s-state-is-listening-or-closed.patch +tcp-clear-saved_syn-in-tcp_disconnect.patch +ipv6-fix-idev-addr_list-corruption.patch +net-timestamp-avoid-use-after-free-in-ip_recv_error.patch +net-vrf-fix-setting-nlm_f_excl-flag-when-adding-l3mdev-rule.patch +sh_eth-unmap-dma-buffers-when-freeing-rings.patch +ipv6-sr-fix-out-of-bounds-access-in-srh-validation.patch +dp83640-don-t-recieve-time-stamps-twice.patch +ipv6-sr-fix-double-free-of-skb-after-handling-invalid-srh.patch +ipv6-fix-source-routing.patch +gso-validate-assumption-of-frag_list-segementation.patch +net-ipv6-rtf_pcpu-should-not-be-settable-from-userspace.patch +netpoll-check-for-skb-queue_mapping.patch +ip6mr-fix-notification-device-destruction.patch +net-mlx5-fix-driver-load-bad-flow-when-having-fw-initializing-timeout.patch +net-mlx5-e-switch-correctly-deal-with-inline-mode-on-connectx-5.patch +net-mlx5e-fix-small-packet-threshold.patch +net-mlx5e-fix-ethtool_grxclsrlall-handling.patch +tcp-fix-scm_timestamping_opt_stats-for-normal-skbs.patch +tcp-mark-skbs-with-scm_timestamping_opt_stats.patch +macvlan-fix-device-ref-leak-when-purging-bc_queue.patch +net-ipv6-regenerate-host-route-if-moved-to-gc-list.patch +net-phy-fix-auto-negotiation-stall-due-to-unavailable-interrupt.patch +ipv6-check-skb-protocol-before-lookup-for-nexthop.patch +tcp-memset-ca_priv-data-to-0-properly.patch +ipv6-check-raw-payload-size-correctly-in-ioctl.patch diff --git a/queue-4.10/sh_eth-unmap-dma-buffers-when-freeing-rings.patch b/queue-4.10/sh_eth-unmap-dma-buffers-when-freeing-rings.patch new file mode 100644 index 00000000000..5d354e03096 --- /dev/null +++ b/queue-4.10/sh_eth-unmap-dma-buffers-when-freeing-rings.patch @@ -0,0 +1,211 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Sergei Shtylyov +Date: Mon, 17 Apr 2017 15:55:22 +0300 +Subject: sh_eth: unmap DMA buffers when freeing rings + +From: Sergei Shtylyov + + +[ Upstream commit 1debdc8f9ebd07daf140e417b3841596911e0066 ] + +The DMA API debugging (when enabled) causes: + +WARNING: CPU: 0 PID: 1445 at lib/dma-debug.c:519 add_dma_entry+0xe0/0x12c +DMA-API: exceeded 7 overlapping mappings of cacheline 0x01b2974d + +to be printed after repeated initialization of the Ether device, e.g. +suspend/resume or 'ifconfig' up/down. This is because DMA buffers mapped +using dma_map_single() in sh_eth_ring_format() and sh_eth_start_xmit() are +never unmapped. Resolve this problem by unmapping the buffers when freeing +the descriptor rings; in order to do it right, we'd have to add an extra +parameter to sh_eth_txfree() (we rename this function to sh_eth_tx_free(), +while at it). + +Based on the commit a47b70ea86bd ("ravb: unmap descriptors when freeing +rings"). + +Signed-off-by: Sergei Shtylyov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/renesas/sh_eth.c | 122 ++++++++++++++++++---------------- + 1 file changed, 67 insertions(+), 55 deletions(-) + +--- a/drivers/net/ethernet/renesas/sh_eth.c ++++ b/drivers/net/ethernet/renesas/sh_eth.c +@@ -1061,12 +1061,70 @@ static struct mdiobb_ops bb_ops = { + .get_mdio_data = sh_get_mdio, + }; + ++/* free Tx skb function */ ++static int sh_eth_tx_free(struct net_device *ndev, bool sent_only) ++{ ++ struct sh_eth_private *mdp = netdev_priv(ndev); ++ struct sh_eth_txdesc *txdesc; ++ int free_num = 0; ++ int entry; ++ bool sent; ++ ++ for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) { ++ entry = mdp->dirty_tx % mdp->num_tx_ring; ++ txdesc = &mdp->tx_ring[entry]; ++ sent = !(txdesc->status & cpu_to_le32(TD_TACT)); ++ if (sent_only && !sent) ++ break; ++ /* TACT bit must be checked before all the following reads */ ++ dma_rmb(); ++ netif_info(mdp, tx_done, ndev, ++ "tx entry %d status 0x%08x\n", ++ entry, le32_to_cpu(txdesc->status)); ++ /* Free the original skb. */ ++ if (mdp->tx_skbuff[entry]) { ++ dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr), ++ le32_to_cpu(txdesc->len) >> 16, ++ DMA_TO_DEVICE); ++ dev_kfree_skb_irq(mdp->tx_skbuff[entry]); ++ mdp->tx_skbuff[entry] = NULL; ++ free_num++; ++ } ++ txdesc->status = cpu_to_le32(TD_TFP); ++ if (entry >= mdp->num_tx_ring - 1) ++ txdesc->status |= cpu_to_le32(TD_TDLE); ++ ++ if (sent) { ++ ndev->stats.tx_packets++; ++ ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16; ++ } ++ } ++ return free_num; ++} ++ + /* free skb and descriptor buffer */ + static void sh_eth_ring_free(struct net_device *ndev) + { + struct sh_eth_private *mdp = netdev_priv(ndev); + int ringsize, i; + ++ if (mdp->rx_ring) { ++ for (i = 0; i < mdp->num_rx_ring; i++) { ++ if (mdp->rx_skbuff[i]) { ++ struct sh_eth_rxdesc *rxdesc = &mdp->rx_ring[i]; ++ ++ dma_unmap_single(&ndev->dev, ++ le32_to_cpu(rxdesc->addr), ++ ALIGN(mdp->rx_buf_sz, 32), ++ DMA_FROM_DEVICE); ++ } ++ } ++ ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring; ++ dma_free_coherent(NULL, ringsize, mdp->rx_ring, ++ mdp->rx_desc_dma); ++ mdp->rx_ring = NULL; ++ } ++ + /* Free Rx skb ringbuffer */ + if (mdp->rx_skbuff) { + for (i = 0; i < mdp->num_rx_ring; i++) +@@ -1075,27 +1133,18 @@ static void sh_eth_ring_free(struct net_ + kfree(mdp->rx_skbuff); + mdp->rx_skbuff = NULL; + +- /* Free Tx skb ringbuffer */ +- if (mdp->tx_skbuff) { +- for (i = 0; i < mdp->num_tx_ring; i++) +- dev_kfree_skb(mdp->tx_skbuff[i]); +- } +- kfree(mdp->tx_skbuff); +- mdp->tx_skbuff = NULL; +- +- if (mdp->rx_ring) { +- ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring; +- dma_free_coherent(NULL, ringsize, mdp->rx_ring, +- mdp->rx_desc_dma); +- mdp->rx_ring = NULL; +- } +- + if (mdp->tx_ring) { ++ sh_eth_tx_free(ndev, false); ++ + ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring; + dma_free_coherent(NULL, ringsize, mdp->tx_ring, + mdp->tx_desc_dma); + mdp->tx_ring = NULL; + } ++ ++ /* Free Tx skb ringbuffer */ ++ kfree(mdp->tx_skbuff); ++ mdp->tx_skbuff = NULL; + } + + /* format skb and descriptor buffer */ +@@ -1343,43 +1392,6 @@ static void sh_eth_dev_exit(struct net_d + update_mac_address(ndev); + } + +-/* free Tx skb function */ +-static int sh_eth_txfree(struct net_device *ndev) +-{ +- struct sh_eth_private *mdp = netdev_priv(ndev); +- struct sh_eth_txdesc *txdesc; +- int free_num = 0; +- int entry; +- +- for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) { +- entry = mdp->dirty_tx % mdp->num_tx_ring; +- txdesc = &mdp->tx_ring[entry]; +- if (txdesc->status & cpu_to_le32(TD_TACT)) +- break; +- /* TACT bit must be checked before all the following reads */ +- dma_rmb(); +- netif_info(mdp, tx_done, ndev, +- "tx entry %d status 0x%08x\n", +- entry, le32_to_cpu(txdesc->status)); +- /* Free the original skb. */ +- if (mdp->tx_skbuff[entry]) { +- dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr), +- le32_to_cpu(txdesc->len) >> 16, +- DMA_TO_DEVICE); +- dev_kfree_skb_irq(mdp->tx_skbuff[entry]); +- mdp->tx_skbuff[entry] = NULL; +- free_num++; +- } +- txdesc->status = cpu_to_le32(TD_TFP); +- if (entry >= mdp->num_tx_ring - 1) +- txdesc->status |= cpu_to_le32(TD_TDLE); +- +- ndev->stats.tx_packets++; +- ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16; +- } +- return free_num; +-} +- + /* Packet receive function */ + static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) + { +@@ -1622,7 +1634,7 @@ ignore_link: + intr_status, mdp->cur_tx, mdp->dirty_tx, + (u32)ndev->state, edtrr); + /* dirty buffer free */ +- sh_eth_txfree(ndev); ++ sh_eth_tx_free(ndev, true); + + /* SH7712 BUG */ + if (edtrr ^ sh_eth_get_edtrr_trns(mdp)) { +@@ -1681,7 +1693,7 @@ static irqreturn_t sh_eth_interrupt(int + /* Clear Tx interrupts */ + sh_eth_write(ndev, intr_status & cd->tx_check, EESR); + +- sh_eth_txfree(ndev); ++ sh_eth_tx_free(ndev, true); + netif_wake_queue(ndev); + } + +@@ -2309,7 +2321,7 @@ static int sh_eth_start_xmit(struct sk_b + + spin_lock_irqsave(&mdp->lock, flags); + if ((mdp->cur_tx - mdp->dirty_tx) >= (mdp->num_tx_ring - 4)) { +- if (!sh_eth_txfree(ndev)) { ++ if (!sh_eth_tx_free(ndev, true)) { + netif_warn(mdp, tx_queued, ndev, "TxFD exhausted.\n"); + netif_stop_queue(ndev); + spin_unlock_irqrestore(&mdp->lock, flags); diff --git a/queue-4.10/sparc64-fix-kernel-panic-due-to-erroneous-ifdef-surrounding-pmd_write.patch b/queue-4.10/sparc64-fix-kernel-panic-due-to-erroneous-ifdef-surrounding-pmd_write.patch new file mode 100644 index 00000000000..43c551840a7 --- /dev/null +++ b/queue-4.10/sparc64-fix-kernel-panic-due-to-erroneous-ifdef-surrounding-pmd_write.patch @@ -0,0 +1,101 @@ +From foo@baz Sat Apr 29 08:20:51 CEST 2017 +From: Tom Hromatka +Date: Fri, 31 Mar 2017 16:31:42 -0600 +Subject: sparc64: Fix kernel panic due to erroneous #ifdef surrounding pmd_write() + +From: Tom Hromatka + + +[ Upstream commit 9ae34dbd8afd790cb5f52467e4f816434379eafa ] + +This commit moves sparc64's prototype of pmd_write() outside +of the CONFIG_TRANSPARENT_HUGEPAGE ifdef. + +In 2013, commit a7b9403f0e6d ("sparc64: Encode huge PMDs using PTE +encoding.") exposed a path where pmd_write() could be called without +CONFIG_TRANSPARENT_HUGEPAGE defined. This can result in the panic below. + +The diff is awkward to read, but the changes are straightforward. +pmd_write() was moved outside of #ifdef CONFIG_TRANSPARENT_HUGEPAGE. +Also, __HAVE_ARCH_PMD_WRITE was defined. + +kernel BUG at include/asm-generic/pgtable.h:576! + \|/ ____ \|/ + "@'/ .. \`@" + /_| \__/ |_\ + \__U_/ +oracle_8114_cdb(8114): Kernel bad sw trap 5 [#1] +CPU: 120 PID: 8114 Comm: oracle_8114_cdb Not tainted +4.1.12-61.7.1.el6uek.rc1.sparc64 #1 +task: fff8400700a24d60 ti: fff8400700bc4000 task.ti: fff8400700bc4000 +TSTATE: 0000004411e01607 TPC: 00000000004609f8 TNPC: 00000000004609fc Y: +00000005 Not tainted +TPC: +g0: 000000000001c000 g1: 0000000000ef3954 g2: 0000000000000000 g3: 0000000000000001 +g4: fff8400700a24d60 g5: fff8001fa5c10000 g6: fff8400700bc4000 g7: 0000000000000720 +o0: 0000000000bc5058 o1: 0000000000000240 o2: 0000000000006000 o3: 0000000000001c00 +o4: 0000000000000000 o5: 0000048000080000 sp: fff8400700bc6ab1 ret_pc: 00000000004609f0 +RPC: +l0: fff8400700bc74fc l1: 0000000000020000 l2: 0000000000002000 l3: 0000000000000000 +l4: fff8001f93250950 l5: 000000000113f800 l6: 0000000000000004 l7: 0000000000000000 +i0: fff8400700ca46a0 i1: bd0000085e800453 i2: 000000026a0c4000 i3: 000000026a0c6000 +i4: 0000000000000001 i5: fff800070c958de8 i6: fff8400700bc6b61 i7: 0000000000460dd0 +I7: +Call Trace: + [0000000000460dd0] gup_pud_range+0x170/0x1a0 + [0000000000460e84] get_user_pages_fast+0x84/0x120 + [00000000006f5a18] iov_iter_get_pages+0x98/0x240 + [00000000005fa744] do_direct_IO+0xf64/0x1e00 + [00000000005fbbc0] __blockdev_direct_IO+0x360/0x15a0 + [00000000101f74fc] ext4_ind_direct_IO+0xdc/0x400 [ext4] + [00000000101af690] ext4_ext_direct_IO+0x1d0/0x2c0 [ext4] + [00000000101af86c] ext4_direct_IO+0xec/0x220 [ext4] + [0000000000553bd4] generic_file_read_iter+0x114/0x140 + [00000000005bdc2c] __vfs_read+0xac/0x100 + [00000000005bf254] vfs_read+0x54/0x100 + [00000000005bf368] SyS_pread64+0x68/0x80 + +Signed-off-by: Tom Hromatka +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/pgtable_64.h | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/arch/sparc/include/asm/pgtable_64.h ++++ b/arch/sparc/include/asm/pgtable_64.h +@@ -673,26 +673,27 @@ static inline unsigned long pmd_pfn(pmd_ + return pte_pfn(pte); + } + +-#ifdef CONFIG_TRANSPARENT_HUGEPAGE +-static inline unsigned long pmd_dirty(pmd_t pmd) ++#define __HAVE_ARCH_PMD_WRITE ++static inline unsigned long pmd_write(pmd_t pmd) + { + pte_t pte = __pte(pmd_val(pmd)); + +- return pte_dirty(pte); ++ return pte_write(pte); + } + +-static inline unsigned long pmd_young(pmd_t pmd) ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++static inline unsigned long pmd_dirty(pmd_t pmd) + { + pte_t pte = __pte(pmd_val(pmd)); + +- return pte_young(pte); ++ return pte_dirty(pte); + } + +-static inline unsigned long pmd_write(pmd_t pmd) ++static inline unsigned long pmd_young(pmd_t pmd) + { + pte_t pte = __pte(pmd_val(pmd)); + +- return pte_write(pte); ++ return pte_young(pte); + } + + static inline unsigned long pmd_trans_huge(pmd_t pmd) diff --git a/queue-4.10/sparc64-kern_addr_valid-regression.patch b/queue-4.10/sparc64-kern_addr_valid-regression.patch new file mode 100644 index 00000000000..30b6a5d0309 --- /dev/null +++ b/queue-4.10/sparc64-kern_addr_valid-regression.patch @@ -0,0 +1,38 @@ +From foo@baz Sat Apr 29 08:20:51 CEST 2017 +From: bob picco +Date: Fri, 10 Mar 2017 14:31:19 -0500 +Subject: sparc64: kern_addr_valid regression + +From: bob picco + + +[ Upstream commit adfae8a5d833fa2b46577a8081f350e408851f5b ] + +I encountered this bug when using /proc/kcore to examine the kernel. Plus a +coworker inquired about debugging tools. We computed pa but did +not use it during the maximum physical address bits test. Instead we used +the identity mapped virtual address which will always fail this test. + +I believe the defect came in here: +[bpicco@zareason linus.git]$ git describe --contains bb4e6e85daa52 +v3.18-rc1~87^2~4 +. + +Signed-off-by: Bob Picco +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/init_64.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -1495,7 +1495,7 @@ bool kern_addr_valid(unsigned long addr) + if ((long)addr < 0L) { + unsigned long pa = __pa(addr); + +- if ((addr >> max_phys_bits) != 0UL) ++ if ((pa >> max_phys_bits) != 0UL) + return false; + + return pfn_valid(pa >> PAGE_SHIFT); diff --git a/queue-4.10/tcp-clear-saved_syn-in-tcp_disconnect.patch b/queue-4.10/tcp-clear-saved_syn-in-tcp_disconnect.patch new file mode 100644 index 00000000000..8b8597862c5 --- /dev/null +++ b/queue-4.10/tcp-clear-saved_syn-in-tcp_disconnect.patch @@ -0,0 +1,56 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Eric Dumazet +Date: Sat, 8 Apr 2017 08:07:33 -0700 +Subject: tcp: clear saved_syn in tcp_disconnect() + +From: Eric Dumazet + + +[ Upstream commit 17c3060b1701fc69daedb4c90be6325d3d9fca8e ] + +In the (very unlikely) case a passive socket becomes a listener, +we do not want to duplicate its saved SYN headers. + +This would lead to double frees, use after free, and please hackers and +various fuzzers + +Tested: + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, IPPROTO_TCP, TCP_SAVE_SYN, [1], 4) = 0 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 5) = 0 + + +0 < S 0:0(0) win 32972 + +0 > S. 0:0(0) ack 1 <...> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0 connect(4, AF_UNSPEC, ...) = 0 + +0 close(3) = 0 + +0 bind(4, ..., ...) = 0 + +0 listen(4, 5) = 0 + + +0 < S 0:0(0) win 32972 + +0 > S. 0:0(0) ack 1 <...> + +.1 < . 1:1(0) ack 1 win 257 + +Fixes: cd8ae85299d5 ("tcp: provide SYN headers for passive connections") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2301,6 +2301,7 @@ int tcp_disconnect(struct sock *sk, int + tcp_init_send_head(sk); + memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); + __sk_dst_reset(sk); ++ tcp_saved_syn_free(tp); + + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); + diff --git a/queue-4.10/tcp-fix-scm_timestamping_opt_stats-for-normal-skbs.patch b/queue-4.10/tcp-fix-scm_timestamping_opt_stats-for-normal-skbs.patch new file mode 100644 index 00000000000..f6e2b7b14f6 --- /dev/null +++ b/queue-4.10/tcp-fix-scm_timestamping_opt_stats-for-normal-skbs.patch @@ -0,0 +1,94 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Soheil Hassas Yeganeh +Date: Sat, 18 Mar 2017 17:02:59 -0400 +Subject: tcp: fix SCM_TIMESTAMPING_OPT_STATS for normal skbs + +From: Soheil Hassas Yeganeh + + +[ Upstream commit 8605330aac5a5785630aec8f64378a54891937cc ] + +__sock_recv_timestamp can be called for both normal skbs (for +receive timestamps) and for skbs on the error queue (for transmit +timestamps). + +Commit 1c885808e456 +(tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING) +assumes any skb passed to __sock_recv_timestamp are from +the error queue, containing OPT_STATS in the content of the skb. +This results in accessing invalid memory or generating junk +data. + +To fix this, set skb->pkt_type to PACKET_OUTGOING for packets +on the error queue. This is safe because on the receive path +on local sockets skb->pkt_type is never set to PACKET_OUTGOING. +With that, copy OPT_STATS from a packet, only if its pkt_type +is PACKET_OUTGOING. + +Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING") +Reported-by: JongHwan Kim +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: Eric Dumazet +Signed-off-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 10 ++++++++++ + net/socket.c | 13 ++++++++++++- + 2 files changed, 22 insertions(+), 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3700,6 +3700,15 @@ static void sock_rmem_free(struct sk_buf + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + } + ++static void skb_set_err_queue(struct sk_buff *skb) ++{ ++ /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING. ++ * So, it is safe to (mis)use it to mark skbs on the error queue. ++ */ ++ skb->pkt_type = PACKET_OUTGOING; ++ BUILD_BUG_ON(PACKET_OUTGOING == 0); ++} ++ + /* + * Note: We dont mem charge error packets (no sk_forward_alloc changes) + */ +@@ -3713,6 +3722,7 @@ int sock_queue_err_skb(struct sock *sk, + skb->sk = sk; + skb->destructor = sock_rmem_free; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); ++ skb_set_err_queue(skb); + + /* before exiting rcu section, make sure dst is refcounted */ + skb_dst_force(skb); +--- a/net/socket.c ++++ b/net/socket.c +@@ -654,6 +654,16 @@ int kernel_sendmsg(struct socket *sock, + } + EXPORT_SYMBOL(kernel_sendmsg); + ++static bool skb_is_err_queue(const struct sk_buff *skb) ++{ ++ /* pkt_type of skbs enqueued on the error queue are set to ++ * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do ++ * in recvmsg, since skbs received on a local socket will never ++ * have a pkt_type of PACKET_OUTGOING. ++ */ ++ return skb->pkt_type == PACKET_OUTGOING; ++} ++ + /* + * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) + */ +@@ -697,7 +707,8 @@ void __sock_recv_timestamp(struct msghdr + put_cmsg(msg, SOL_SOCKET, + SCM_TIMESTAMPING, sizeof(tss), &tss); + +- if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS)) ++ if (skb_is_err_queue(skb) && skb->len && ++ (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS)) + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS, + skb->len, skb->data); + } diff --git a/queue-4.10/tcp-mark-skbs-with-scm_timestamping_opt_stats.patch b/queue-4.10/tcp-mark-skbs-with-scm_timestamping_opt_stats.patch new file mode 100644 index 00000000000..ca57cb91539 --- /dev/null +++ b/queue-4.10/tcp-mark-skbs-with-scm_timestamping_opt_stats.patch @@ -0,0 +1,118 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Soheil Hassas Yeganeh +Date: Sat, 18 Mar 2017 17:03:00 -0400 +Subject: tcp: mark skbs with SCM_TIMESTAMPING_OPT_STATS + +From: Soheil Hassas Yeganeh + + +[ Upstream commit 4ef1b2869447411ad3ef91ad7d4891a83c1a509a ] + +SOF_TIMESTAMPING_OPT_STATS can be enabled and disabled +while packets are collected on the error queue. +So, checking SOF_TIMESTAMPING_OPT_STATS in sk->sk_tsflags +is not enough to safely assume that the skb contains +OPT_STATS data. + +Add a bit in sock_exterr_skb to indicate whether the +skb contains opt_stats data. + +Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING") +Reported-by: JongHwan Kim +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: Eric Dumazet +Signed-off-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/errqueue.h | 2 ++ + net/core/skbuff.c | 17 +++++++++++------ + net/socket.c | 2 +- + 3 files changed, 14 insertions(+), 7 deletions(-) + +--- a/include/linux/errqueue.h ++++ b/include/linux/errqueue.h +@@ -20,6 +20,8 @@ struct sock_exterr_skb { + struct sock_extended_err ee; + u16 addr_offset; + __be16 port; ++ u8 opt_stats:1, ++ unused:7; + }; + + #endif +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3799,16 +3799,20 @@ EXPORT_SYMBOL(skb_clone_sk); + + static void __skb_complete_tx_timestamp(struct sk_buff *skb, + struct sock *sk, +- int tstype) ++ int tstype, ++ bool opt_stats) + { + struct sock_exterr_skb *serr; + int err; + ++ BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); ++ + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + serr->ee.ee_info = tstype; ++ serr->opt_stats = opt_stats; + serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; + if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { + serr->ee.ee_data = skb_shinfo(skb)->tskey; +@@ -3850,7 +3854,7 @@ void skb_complete_tx_timestamp(struct sk + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + *skb_hwtstamps(skb) = *hwtstamps; +- __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); ++ __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); + sock_put(sk); + } + } +@@ -3861,7 +3865,7 @@ void __skb_tstamp_tx(struct sk_buff *ori + struct sock *sk, int tstype) + { + struct sk_buff *skb; +- bool tsonly; ++ bool tsonly, opt_stats = false; + + if (!sk) + return; +@@ -3874,9 +3878,10 @@ void __skb_tstamp_tx(struct sk_buff *ori + #ifdef CONFIG_INET + if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && + sk->sk_protocol == IPPROTO_TCP && +- sk->sk_type == SOCK_STREAM) ++ sk->sk_type == SOCK_STREAM) { + skb = tcp_get_timestamping_opt_stats(sk); +- else ++ opt_stats = true; ++ } else + #endif + skb = alloc_skb(0, GFP_ATOMIC); + } else { +@@ -3895,7 +3900,7 @@ void __skb_tstamp_tx(struct sk_buff *ori + else + skb->tstamp = ktime_get_real(); + +- __skb_complete_tx_timestamp(skb, sk, tstype); ++ __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); + } + EXPORT_SYMBOL_GPL(__skb_tstamp_tx); + +--- a/net/socket.c ++++ b/net/socket.c +@@ -708,7 +708,7 @@ void __sock_recv_timestamp(struct msghdr + SCM_TIMESTAMPING, sizeof(tss), &tss); + + if (skb_is_err_queue(skb) && skb->len && +- (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS)) ++ SKB_EXT_ERR(skb)->opt_stats) + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS, + skb->len, skb->data); + } diff --git a/queue-4.10/tcp-memset-ca_priv-data-to-0-properly.patch b/queue-4.10/tcp-memset-ca_priv-data-to-0-properly.patch new file mode 100644 index 00000000000..decd8215d2f --- /dev/null +++ b/queue-4.10/tcp-memset-ca_priv-data-to-0-properly.patch @@ -0,0 +1,61 @@ +From foo@baz Sat Apr 29 08:22:40 CEST 2017 +From: Wei Wang +Date: Tue, 25 Apr 2017 17:38:02 -0700 +Subject: tcp: memset ca_priv data to 0 properly + +From: Wei Wang + + +[ Upstream commit c1201444075009507a6818de6518e2822b9a87c8 ] + +Always zero out ca_priv data in tcp_assign_congestion_control() so that +ca_priv data is cleared out during socket creation. +Also always zero out ca_priv data in tcp_reinit_congestion_control() so +that when cc algorithm is changed, ca_priv data is cleared out as well. +We should still zero out ca_priv data even in TCP_CLOSE state because +user could call connect() on AF_UNSPEC to disconnect the socket and +leave it in TCP_CLOSE state and later call setsockopt() to switch cc +algorithm on this socket. + +Fixes: 2b0a8c9ee ("tcp: add CDG congestion control") +Reported-by: Andrey Konovalov +Signed-off-by: Wei Wang +Acked-by: Eric Dumazet +Acked-by: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_cong.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +--- a/net/ipv4/tcp_cong.c ++++ b/net/ipv4/tcp_cong.c +@@ -168,12 +168,8 @@ void tcp_assign_congestion_control(struc + } + out: + rcu_read_unlock(); ++ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + +- /* Clear out private data before diag gets it and +- * the ca has not been initialized. +- */ +- if (ca->get_info) +- memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + if (ca->flags & TCP_CONG_NEEDS_ECN) + INET_ECN_xmit(sk); + else +@@ -200,11 +196,10 @@ static void tcp_reinit_congestion_contro + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = ca; + icsk->icsk_ca_setsockopt = 1; ++ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + +- if (sk->sk_state != TCP_CLOSE) { +- memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); ++ if (sk->sk_state != TCP_CLOSE) + tcp_init_congestion_control(sk); +- } + } + + /* Manage refcounts on socket close. */