From: Greg Kroah-Hartman Date: Mon, 29 Feb 2016 22:44:52 +0000 (-0800) Subject: 4.4-stable patches X-Git-Tag: v3.10.99~65 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4f4c349c50b24a7c224ac2bd31eebb6e9793b67a;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: af_iucv-validate-socket-address-length-in-iucv_sock_bind.patch af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch af_unix-fix-struct-pid-memory-leak.patch af_unix-guard-against-other-sk-in-unix_dgram_sendmsg.patch bonding-fix-arp-monitor-validation.patch bpf-fix-branch-offset-adjustment-on-backjumps-after-patching-ctx-expansion.patch enic-increment-devcmd2-result-ring-in-case-of-timeout.patch flow_dissector-fix-unaligned-access-in-__skb_flow_dissector-when-used-by-eth_get_headlen.patch gro-make-gro-aware-of-lightweight-tunnels.patch iff_no_queue-fix-for-drivers-not-calling-ether_setup.patch inet-frag-always-orphan-skbs-inside-ip_defrag.patch ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch ipv6-addrconf-fix-recursive-spin-lock-call.patch ipv6-enforce-flowi6_oif-usage-in-ip6_dst_lookup_tail.patch ipv6-fix-a-lockdep-splat.patch ipv6-udp-use-sticky-pktinfo-egress-ifindex-on-connect.patch l2tp-fix-error-creating-l2tp-tunnels.patch lwt-fix-rx-checksum-setting-for-lwt-devices-tunneling-over-ipv6.patch net-add-sysctl_max_skb_frags.patch net-copy-inner-l3-and-l4-headers-as-unaligned-on-gre-teb.patch net-dp83640-fix-tx-timestamp-overflow-handling.patch net-dsa-fix-mv88e6xxx-switches.patch net-mlx4_en-avoid-changing-dev-features-directly-in-run-time.patch net-mlx4_en-choose-time-stamping-shift-value-according-to-hw-frequency.patch net-mlx4_en-count-hw-buffer-overrun-only-once.patch net_sched-fix-reclassification-needs-to-consider-ether-protocol-changes.patch pppoe-fix-reference-counting-in-pppoe-proxy.patch pptp-fix-illegal-memory-access-caused-by-multiple-bind-s.patch qmi_wwan-add-4g-lte-usb-modem-u901.patch route-check-and-remove-route-cache-when-we-get-route.patch rtnl-rtm_getnetconf-fix-wrong-return-value.patch sctp-allow-setting-sctp_sack_immediately-by-the-application.patch sctp-fix-port-hash-table-size-computation.patch sctp-translate-network-order-to-host-order-when-users-get-a-hmacid.patch switchdev-require-rtnl-mutex-to-be-held-when-sending-fdb-notifications.patch tcp-beware-of-alignments-in-tcp_get_info.patch tcp-dccp-fix-another-race-at-listener-dismantle.patch tcp-do-not-drop-syn_recv-on-all-icmp-reports.patch tcp-fix-null-deref-in-tcp_v4_send_ack.patch tcp-md5-release-request-socket-instead-of-listener.patch tg3-fix-for-tg3-transmit-queue-0-timed-out-when-too-many-gso_segs.patch tipc-fix-connection-abort-during-subscription-cancel.patch tipc-fix-premature-addition-of-node-to-lookup-table.patch tipc-unlock-in-error-path.patch tunnels-allow-ipv6-udp-checksums-to-be-correctly-controlled.patch unix-correctly-track-in-flight-fds-in-sending-process-user_struct.patch unix_diag-fix-incorrect-sign-extension-in-unix_lookup_by_ino.patch --- diff --git a/queue-4.4/af_iucv-validate-socket-address-length-in-iucv_sock_bind.patch b/queue-4.4/af_iucv-validate-socket-address-length-in-iucv_sock_bind.patch new file mode 100644 index 00000000000..fd4325a03e5 --- /dev/null +++ b/queue-4.4/af_iucv-validate-socket-address-length-in-iucv_sock_bind.patch @@ -0,0 +1,30 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Ursula Braun +Date: Tue, 19 Jan 2016 10:41:33 +0100 +Subject: af_iucv: Validate socket address length in iucv_sock_bind() + +From: Ursula Braun + +[ Upstream commit 52a82e23b9f2a9e1d429c5207f8575784290d008 ] + +Signed-off-by: Ursula Braun +Reported-by: Dmitry Vyukov +Reviewed-by: Evgeny Cherkashin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/iucv/af_iucv.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/iucv/af_iucv.c ++++ b/net/iucv/af_iucv.c +@@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket + if (!addr || addr->sa_family != AF_IUCV) + return -EINVAL; + ++ if (addr_len < sizeof(struct sockaddr_iucv)) ++ return -EINVAL; ++ + lock_sock(sk); + if (sk->sk_state != IUCV_OPEN) { + err = -EBADFD; diff --git a/queue-4.4/af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch b/queue-4.4/af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch new file mode 100644 index 00000000000..a3ecfc8ced2 --- /dev/null +++ b/queue-4.4/af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch @@ -0,0 +1,74 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Rainer Weikusat +Date: Mon, 8 Feb 2016 18:47:19 +0000 +Subject: af_unix: Don't set err in unix_stream_read_generic unless there was an error + +From: Rainer Weikusat + +[ Upstream commit 1b92ee3d03af6643df395300ba7748f19ecdb0c5 ] + +The present unix_stream_read_generic contains various code sequences of +the form + +err = -EDISASTER; +if () + goto out; + +This has the unfortunate side effect of possibly causing the error code +to bleed through to the final + +out: + return copied ? : err; + +and then to be wrongly returned if no data was copied because the caller +didn't supply a data buffer, as demonstrated by the program available at + +http://pad.lv/1540731 + +Change it such that err is only set if an error condition was detected. + +Fixes: 3822b5c2fc62 ("af_unix: Revert 'lock_interruptible' in stream receive code") +Reported-by: Joseph Salisbury +Signed-off-by: Rainer Weikusat +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2270,13 +2270,15 @@ static int unix_stream_read_generic(stru + size_t size = state->size; + unsigned int last_len; + +- err = -EINVAL; +- if (sk->sk_state != TCP_ESTABLISHED) ++ if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { ++ err = -EINVAL; + goto out; ++ } + +- err = -EOPNOTSUPP; +- if (flags & MSG_OOB) ++ if (unlikely(flags & MSG_OOB)) { ++ err = -EOPNOTSUPP; + goto out; ++ } + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); + timeo = sock_rcvtimeo(sk, noblock); +@@ -2322,9 +2324,11 @@ again: + goto unlock; + + unix_state_unlock(sk); +- err = -EAGAIN; +- if (!timeo) ++ if (!timeo) { ++ err = -EAGAIN; + break; ++ } ++ + mutex_unlock(&u->readlock); + + timeo = unix_stream_data_wait(sk, timeo, last, diff --git a/queue-4.4/af_unix-fix-struct-pid-memory-leak.patch b/queue-4.4/af_unix-fix-struct-pid-memory-leak.patch new file mode 100644 index 00000000000..11c50cf0f94 --- /dev/null +++ b/queue-4.4/af_unix-fix-struct-pid-memory-leak.patch @@ -0,0 +1,34 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Eric Dumazet +Date: Sun, 24 Jan 2016 13:53:50 -0800 +Subject: af_unix: fix struct pid memory leak + +From: Eric Dumazet + +[ Upstream commit fa0dc04df259ba2df3ce1920e9690c7842f8fa4b ] + +Dmitry reported a struct pid leak detected by a syzkaller program. + +Bug happens in unix_stream_recvmsg() when we break the loop when a +signal is pending, without properly releasing scm. + +Fixes: b3ca9b02b007 ("net: fix multithreaded signal handling in unix recv routines") +Reported-by: Dmitry Vyukov +Signed-off-by: Eric Dumazet +Cc: Rainer Weikusat +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2332,6 +2332,7 @@ again: + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); ++ scm_destroy(&scm); + goto out; + } + diff --git a/queue-4.4/af_unix-guard-against-other-sk-in-unix_dgram_sendmsg.patch b/queue-4.4/af_unix-guard-against-other-sk-in-unix_dgram_sendmsg.patch new file mode 100644 index 00000000000..4fb6a6c5eaf --- /dev/null +++ b/queue-4.4/af_unix-guard-against-other-sk-in-unix_dgram_sendmsg.patch @@ -0,0 +1,50 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Rainer Weikusat +Date: Thu, 11 Feb 2016 19:37:27 +0000 +Subject: af_unix: Guard against other == sk in unix_dgram_sendmsg + +From: Rainer Weikusat + +[ Upstream commit a5527dda344fff0514b7989ef7a755729769daa1 ] + +The unix_dgram_sendmsg routine use the following test + +if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + +to determine if sk and other are in an n:1 association (either +established via connect or by using sendto to send messages to an +unrelated socket identified by address). This isn't correct as the +specified address could have been bound to the sending socket itself or +because this socket could have been connected to itself by the time of +the unix_peer_get but disconnected before the unix_state_lock(other). In +both cases, the if-block would be entered despite other == sk which +might either block the sender unintentionally or lead to trying to unlock +the same spin lock twice for a non-blocking send. Add a other != sk +check to guard against this. + +Fixes: 7d267278a9ec ("unix: avoid use-after-free in ep_remove_wait_queue") +Reported-By: Philipp Hahn +Signed-off-by: Rainer Weikusat +Tested-by: Philipp Hahn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/af_unix.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1781,7 +1781,12 @@ restart_locked: + goto out_unlock; + } + +- if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { ++ /* other == sk && unix_peer(other) != sk if ++ * - unix_peer(sk) == NULL, destination address bound to sk ++ * - unix_peer(sk) == sk by time of get but disconnected before lock ++ */ ++ if (other != sk && ++ unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + if (timeo) { + timeo = unix_wait_for_peer(other, timeo); + diff --git a/queue-4.4/bonding-fix-arp-monitor-validation.patch b/queue-4.4/bonding-fix-arp-monitor-validation.patch new file mode 100644 index 00000000000..17874a35371 --- /dev/null +++ b/queue-4.4/bonding-fix-arp-monitor-validation.patch @@ -0,0 +1,122 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Jay Vosburgh +Date: Tue, 2 Feb 2016 13:35:56 -0800 +Subject: bonding: Fix ARP monitor validation + +From: Jay Vosburgh + +[ Upstream commit 21a75f0915dde8674708b39abfcda113911c49b1 ] + +The current logic in bond_arp_rcv will accept an incoming ARP for +validation if (a) the receiving slave is either "active" (which includes +the currently active slave, or the current ARP slave) or, (b) there is a +currently active slave, and it has received an ARP since it became active. +For case (b), the receiving slave isn't the currently active slave, and is +receiving the original broadcast ARP request, not an ARP reply from the +target. + + This logic can fail if there is no currently active slave. In +this situation, the ARP probe logic cycles through all slaves, assigning +each in turn as the "current_arp_slave" for one arp_interval, then setting +that one as "active," and sending an ARP probe from that slave. The +current logic expects the ARP reply to arrive on the sending +current_arp_slave, however, due to switch FDB updating delays, the reply +may be directed to another slave. + + This can arise if the bonding slaves and switch are working, but +the ARP target is not responding. When the ARP target recovers, a +condition may result wherein the ARP target host replies faster than the +switch can update its forwarding table, causing each ARP reply to be sent +to the previous current_arp_slave. This will never pass the logic in +bond_arp_rcv, as neither of the above conditions (a) or (b) are met. + + Some experimentation on a LAN shows ARP reply round trips in the +200 usec range, but my available switches never update their FDB in less +than 4000 usec. + + This patch changes the logic in bond_arp_rcv to additionally +accept an ARP reply for validation on any slave if there is a current ARP +slave and it sent an ARP probe during the previous arp_interval. + +Fixes: aeea64ac717a ("bonding: don't trust arp requests unless active slave really works") +Cc: Veaceslav Falico +Cc: Andy Gospodarek +Signed-off-by: Jay Vosburgh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 39 ++++++++++++++++++++++++++++----------- + 1 file changed, 28 insertions(+), 11 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -214,6 +214,8 @@ static void bond_uninit(struct net_devic + static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, + struct rtnl_link_stats64 *stats); + static void bond_slave_arr_handler(struct work_struct *work); ++static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, ++ int mod); + + /*---------------------------- General routines -----------------------------*/ + +@@ -2418,7 +2420,7 @@ int bond_arp_rcv(const struct sk_buff *s + struct slave *slave) + { + struct arphdr *arp = (struct arphdr *)skb->data; +- struct slave *curr_active_slave; ++ struct slave *curr_active_slave, *curr_arp_slave; + unsigned char *arp_ptr; + __be32 sip, tip; + int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); +@@ -2465,26 +2467,41 @@ int bond_arp_rcv(const struct sk_buff *s + &sip, &tip); + + curr_active_slave = rcu_dereference(bond->curr_active_slave); ++ curr_arp_slave = rcu_dereference(bond->current_arp_slave); + +- /* Backup slaves won't see the ARP reply, but do come through +- * here for each ARP probe (so we swap the sip/tip to validate +- * the probe). In a "redundant switch, common router" type of +- * configuration, the ARP probe will (hopefully) travel from +- * the active, through one switch, the router, then the other +- * switch before reaching the backup. ++ /* We 'trust' the received ARP enough to validate it if: + * +- * We 'trust' the arp requests if there is an active slave and +- * it received valid arp reply(s) after it became active. This +- * is done to avoid endless looping when we can't reach the ++ * (a) the slave receiving the ARP is active (which includes the ++ * current ARP slave, if any), or ++ * ++ * (b) the receiving slave isn't active, but there is a currently ++ * active slave and it received valid arp reply(s) after it became ++ * the currently active slave, or ++ * ++ * (c) there is an ARP slave that sent an ARP during the prior ARP ++ * interval, and we receive an ARP reply on any slave. We accept ++ * these because switch FDB update delays may deliver the ARP ++ * reply to a slave other than the sender of the ARP request. ++ * ++ * Note: for (b), backup slaves are receiving the broadcast ARP ++ * request, not a reply. This request passes from the sending ++ * slave through the L2 switch(es) to the receiving slave. Since ++ * this is checking the request, sip/tip are swapped for ++ * validation. ++ * ++ * This is done to avoid endless looping when we can't reach the + * arp_ip_target and fool ourselves with our own arp requests. + */ +- + if (bond_is_active_slave(slave)) + bond_validate_arp(bond, slave, sip, tip); + else if (curr_active_slave && + time_after(slave_last_rx(bond, curr_active_slave), + curr_active_slave->last_link_up)) + bond_validate_arp(bond, slave, tip, sip); ++ else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) && ++ bond_time_in_interval(bond, ++ dev_trans_start(curr_arp_slave->dev), 1)) ++ bond_validate_arp(bond, slave, sip, tip); + + out_unlock: + if (arp != (struct arphdr *)skb->data) diff --git a/queue-4.4/bpf-fix-branch-offset-adjustment-on-backjumps-after-patching-ctx-expansion.patch b/queue-4.4/bpf-fix-branch-offset-adjustment-on-backjumps-after-patching-ctx-expansion.patch new file mode 100644 index 00000000000..3640f9cd597 --- /dev/null +++ b/queue-4.4/bpf-fix-branch-offset-adjustment-on-backjumps-after-patching-ctx-expansion.patch @@ -0,0 +1,91 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Daniel Borkmann +Date: Wed, 10 Feb 2016 16:47:11 +0100 +Subject: bpf: fix branch offset adjustment on backjumps after patching ctx expansion + +From: Daniel Borkmann + +[ Upstream commit a1b14d27ed0965838350f1377ff97c93ee383492 ] + +When ctx access is used, the kernel often needs to expand/rewrite +instructions, so after that patching, branch offsets have to be +adjusted for both forward and backward jumps in the new eBPF program, +but for backward jumps it fails to account the delta. Meaning, for +example, if the expansion happens exactly on the insn that sits at +the jump target, it doesn't fix up the back jump offset. + +Analysis on what the check in adjust_branches() is currently doing: + + /* adjust offset of jmps if necessary */ + if (i < pos && i + insn->off + 1 > pos) + insn->off += delta; + else if (i > pos && i + insn->off + 1 < pos) + insn->off -= delta; + +First condition (forward jumps): + + Before: After: + + insns[0] insns[0] + insns[1] <--- i/insn insns[1] <--- i/insn + insns[2] <--- pos insns[P] <--- pos + insns[3] insns[P] `------| delta + insns[4] <--- target_X insns[P] `-----| + insns[5] insns[3] + insns[4] <--- target_X + insns[5] + +First case is if we cross pos-boundary and the jump instruction was +before pos. This is handeled correctly. I.e. if i == pos, then this +would mean our jump that we currently check was the patchlet itself +that we just injected. Since such patchlets are self-contained and +have no awareness of any insns before or after the patched one, the +delta is correctly not adjusted. Also, for the second condition in +case of i + insn->off + 1 == pos, means we jump to that newly patched +instruction, so no offset adjustment are needed. That part is correct. + +Second condition (backward jumps): + + Before: After: + + insns[0] insns[0] + insns[1] <--- target_X insns[1] <--- target_X + insns[2] <--- pos <-- target_Y insns[P] <--- pos <-- target_Y + insns[3] insns[P] `------| delta + insns[4] <--- i/insn insns[P] `-----| + insns[5] insns[3] + insns[4] <--- i/insn + insns[5] + +Second interesting case is where we cross pos-boundary and the jump +instruction was after pos. Backward jump with i == pos would be +impossible and pose a bug somewhere in the patchlet, so the first +condition checking i > pos is okay only by itself. However, i + +insn->off + 1 < pos does not always work as intended to trigger the +adjustment. It works when jump targets would be far off where the +delta wouldn't matter. But, for example, where the fixed insn->off +before pointed to pos (target_Y), it now points to pos + delta, so +that additional room needs to be taken into account for the check. +This means that i) both tests here need to be adjusted into pos + delta, +and ii) for the second condition, the test needs to be <= as pos +itself can be a target in the backjump, too. + +Fixes: 9bac3d6d548e ("bpf: allow extended BPF programs access skb fields") +Signed-off-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2082,7 +2082,7 @@ static void adjust_branches(struct bpf_p + /* adjust offset of jmps if necessary */ + if (i < pos && i + insn->off + 1 > pos) + insn->off += delta; +- else if (i > pos && i + insn->off + 1 < pos) ++ else if (i > pos + delta && i + insn->off + 1 <= pos + delta) + insn->off -= delta; + } + } diff --git a/queue-4.4/enic-increment-devcmd2-result-ring-in-case-of-timeout.patch b/queue-4.4/enic-increment-devcmd2-result-ring-in-case-of-timeout.patch new file mode 100644 index 00000000000..7025a95d243 --- /dev/null +++ b/queue-4.4/enic-increment-devcmd2-result-ring-in-case-of-timeout.patch @@ -0,0 +1,73 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Sandeep Pillai +Date: Wed, 3 Feb 2016 14:40:44 +0530 +Subject: enic: increment devcmd2 result ring in case of timeout + +From: Sandeep Pillai + +[ Upstream commit ca7f41a4957b872577807169bd7464b36aae9b9c ] + +Firmware posts the devcmd result in result ring. In case of timeout, driver +does not increment the current result pointer and firmware could post the +result after timeout has occurred. During next devcmd, driver would be +reading the result of previous devcmd. + +Fix this by incrementing result even in case of timeout. + +Fixes: 373fb0873d43 ("enic: add devcmd2") +Signed-off-by: Sandeep Pillai +Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cisco/enic/enic.h | 2 +- + drivers/net/ethernet/cisco/enic/vnic_dev.c | 19 ++++++++++++------- + 2 files changed, 13 insertions(+), 8 deletions(-) + +--- a/drivers/net/ethernet/cisco/enic/enic.h ++++ b/drivers/net/ethernet/cisco/enic/enic.h +@@ -33,7 +33,7 @@ + + #define DRV_NAME "enic" + #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" +-#define DRV_VERSION "2.3.0.12" ++#define DRV_VERSION "2.3.0.20" + #define DRV_COPYRIGHT "Copyright 2008-2013 Cisco Systems, Inc" + + #define ENIC_BARS_MAX 6 +--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c ++++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c +@@ -298,7 +298,8 @@ static int _vnic_dev_cmd2(struct vnic_de + int wait) + { + struct devcmd2_controller *dc2c = vdev->devcmd2; +- struct devcmd2_result *result = dc2c->result + dc2c->next_result; ++ struct devcmd2_result *result; ++ u8 color; + unsigned int i; + int delay, err; + u32 fetch_index, new_posted; +@@ -336,13 +337,17 @@ static int _vnic_dev_cmd2(struct vnic_de + if (dc2c->cmd_ring[posted].flags & DEVCMD2_FNORESULT) + return 0; + ++ result = dc2c->result + dc2c->next_result; ++ color = dc2c->color; ++ ++ dc2c->next_result++; ++ if (dc2c->next_result == dc2c->result_size) { ++ dc2c->next_result = 0; ++ dc2c->color = dc2c->color ? 0 : 1; ++ } ++ + for (delay = 0; delay < wait; delay++) { +- if (result->color == dc2c->color) { +- dc2c->next_result++; +- if (dc2c->next_result == dc2c->result_size) { +- dc2c->next_result = 0; +- dc2c->color = dc2c->color ? 0 : 1; +- } ++ if (result->color == color) { + if (result->error) { + err = result->error; + if (err != ERR_ECMDUNKNOWN || diff --git a/queue-4.4/flow_dissector-fix-unaligned-access-in-__skb_flow_dissector-when-used-by-eth_get_headlen.patch b/queue-4.4/flow_dissector-fix-unaligned-access-in-__skb_flow_dissector-when-used-by-eth_get_headlen.patch new file mode 100644 index 00000000000..efd42bf1d85 --- /dev/null +++ b/queue-4.4/flow_dissector-fix-unaligned-access-in-__skb_flow_dissector-when-used-by-eth_get_headlen.patch @@ -0,0 +1,52 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Alexander Duyck +Date: Tue, 9 Feb 2016 02:49:54 -0800 +Subject: flow_dissector: Fix unaligned access in __skb_flow_dissector when used by eth_get_headlen + +From: Alexander Duyck + +[ Upstream commit 461547f3158978c180d74484d58e82be9b8e7357 ] + +This patch fixes an issue with unaligned accesses when using +eth_get_headlen on a page that was DMA aligned instead of being IP aligned. +The fact is when trying to check the length we don't need to be looking at +the flow label so we can reorder the checks to first check if we are +supposed to gather the flow label and then make the call to actually get +it. + +v2: Updated path so that either STOP_AT_FLOW_LABEL or KEY_FLOW_LABEL can + cause us to check for the flow label. + +Reported-by: Sowmini Varadhan +Signed-off-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/flow_dissector.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -208,7 +208,6 @@ ip: + case htons(ETH_P_IPV6): { + const struct ipv6hdr *iph; + struct ipv6hdr _iph; +- __be32 flow_label; + + ipv6: + iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); +@@ -230,8 +229,12 @@ ipv6: + key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + } + +- flow_label = ip6_flowlabel(iph); +- if (flow_label) { ++ if ((dissector_uses_key(flow_dissector, ++ FLOW_DISSECTOR_KEY_FLOW_LABEL) || ++ (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) && ++ ip6_flowlabel(iph)) { ++ __be32 flow_label = ip6_flowlabel(iph); ++ + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL)) { + key_tags = skb_flow_dissector_target(flow_dissector, diff --git a/queue-4.4/gro-make-gro-aware-of-lightweight-tunnels.patch b/queue-4.4/gro-make-gro-aware-of-lightweight-tunnels.patch new file mode 100644 index 00000000000..8d505408f09 --- /dev/null +++ b/queue-4.4/gro-make-gro-aware-of-lightweight-tunnels.patch @@ -0,0 +1,82 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Jesse Gross +Date: Wed, 20 Jan 2016 17:59:49 -0800 +Subject: gro: Make GRO aware of lightweight tunnels. + +From: Jesse Gross + +[ Upstream commit ce87fc6ce3f9f4488546187e3757cf666d9d4a2a ] + +GRO is currently not aware of tunnel metadata generated by lightweight +tunnels and stored in the dst. This leads to two possible problems: + * Incorrectly merging two frames that have different metadata. + * Leaking of allocated metadata from merged frames. + +This avoids those problems by comparing the tunnel information before +merging, similar to how we handle other metadata (such as vlan tags), +and releasing any state when we are done. + +Reported-by: John +Fixes: 2e15ea39 ("ip_gre: Add support to collect tunnel metadata.") +Signed-off-by: Jesse Gross +Acked-by: Eric Dumazet +Acked-by: Thomas Graf +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dst_metadata.h | 18 ++++++++++++++++++ + net/core/dev.c | 7 +++++-- + 2 files changed, 23 insertions(+), 2 deletions(-) + +--- a/include/net/dst_metadata.h ++++ b/include/net/dst_metadata.h +@@ -44,6 +44,24 @@ static inline bool skb_valid_dst(const s + return dst && !(dst->flags & DST_METADATA); + } + ++static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, ++ const struct sk_buff *skb_b) ++{ ++ const struct metadata_dst *a, *b; ++ ++ if (!(skb_a->_skb_refdst | skb_b->_skb_refdst)) ++ return 0; ++ ++ a = (const struct metadata_dst *) skb_dst(skb_a); ++ b = (const struct metadata_dst *) skb_dst(skb_b); ++ ++ if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len) ++ return 1; ++ ++ return memcmp(&a->u.tun_info, &b->u.tun_info, ++ sizeof(a->u.tun_info) + a->u.tun_info.options_len); ++} ++ + struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); + struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4145,6 +4145,7 @@ static void gro_list_prepare(struct napi + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; + diffs |= p->vlan_tci ^ skb->vlan_tci; ++ diffs |= skb_metadata_dst_cmp(p, skb); + if (maclen == ETH_HLEN) + diffs |= compare_ether_header(skb_mac_header(p), + skb_mac_header(skb)); +@@ -4342,10 +4343,12 @@ static gro_result_t napi_skb_finish(gro_ + break; + + case GRO_MERGED_FREE: +- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) ++ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) { ++ skb_dst_drop(skb); + kmem_cache_free(skbuff_head_cache, skb); +- else ++ } else { + __kfree_skb(skb); ++ } + break; + + case GRO_HELD: diff --git a/queue-4.4/iff_no_queue-fix-for-drivers-not-calling-ether_setup.patch b/queue-4.4/iff_no_queue-fix-for-drivers-not-calling-ether_setup.patch new file mode 100644 index 00000000000..950386c9ec6 --- /dev/null +++ b/queue-4.4/iff_no_queue-fix-for-drivers-not-calling-ether_setup.patch @@ -0,0 +1,47 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Phil Sutter +Date: Wed, 17 Feb 2016 15:37:43 +0100 +Subject: IFF_NO_QUEUE: Fix for drivers not calling ether_setup() + +From: Phil Sutter + +[ Upstream commit a813104d923339144078939175faf4e66aca19b4 ] + +My implementation around IFF_NO_QUEUE driver flag assumed that leaving +tx_queue_len untouched (specifically: not setting it to zero) by drivers +would make it possible to assign a regular qdisc to them without having +to worry about setting tx_queue_len to a useful value. This was only +partially true: I overlooked that some drivers don't call ether_setup() +and therefore not initialize tx_queue_len to the default value of 1000. +Consequently, removing the workarounds in place for that case in qdisc +implementations which cared about it (namely, pfifo, bfifo, gred, htb, +plug and sfb) leads to problems with these specific interface types and +qdiscs. + +Luckily, there's already a sanitization point for drivers setting +tx_queue_len to zero, which can be reused to assign the fallback value +most qdisc implementations used, which is 1. + +Fixes: 348e3435cbefa ("net: sched: drop all special handling of tx_queue_len == 0") +Tested-by: Mathieu Desnoyers +Signed-off-by: Phil Sutter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -7128,8 +7128,10 @@ struct net_device *alloc_netdev_mqs(int + dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; + setup(dev); + +- if (!dev->tx_queue_len) ++ if (!dev->tx_queue_len) { + dev->priv_flags |= IFF_NO_QUEUE; ++ dev->tx_queue_len = 1; ++ } + + dev->num_tx_queues = txqs; + dev->real_num_tx_queues = txqs; diff --git a/queue-4.4/inet-frag-always-orphan-skbs-inside-ip_defrag.patch b/queue-4.4/inet-frag-always-orphan-skbs-inside-ip_defrag.patch new file mode 100644 index 00000000000..087c594593e --- /dev/null +++ b/queue-4.4/inet-frag-always-orphan-skbs-inside-ip_defrag.patch @@ -0,0 +1,160 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Joe Stringer +Date: Fri, 22 Jan 2016 15:49:12 -0800 +Subject: inet: frag: Always orphan skbs inside ip_defrag() + +From: Joe Stringer + +[ Upstream commit 8282f27449bf15548cb82c77b6e04ee0ab827bdc ] + +Later parts of the stack (including fragmentation) expect that there is +never a socket attached to frag in a frag_list, however this invariant +was not enforced on all defrag paths. This could lead to the +BUG_ON(skb->sk) during ip_do_fragment(), as per the call stack at the +end of this commit message. + +While the call could be added to openvswitch to fix this particular +error, the head and tail of the frags list are already orphaned +indirectly inside ip_defrag(), so it seems like the remaining fragments +should all be orphaned in all circumstances. + +kernel BUG at net/ipv4/ip_output.c:586! +[...] +Call Trace: + + [] ? do_output.isra.29+0x1b0/0x1b0 [openvswitch] + [] ovs_fragment+0xcc/0x214 [openvswitch] + [] ? dst_discard_out+0x20/0x20 + [] ? dst_ifdown+0x80/0x80 + [] ? find_bucket.isra.2+0x62/0x70 [openvswitch] + [] ? mod_timer_pending+0x65/0x210 + [] ? __lock_acquire+0x3db/0x1b90 + [] ? nf_conntrack_in+0x252/0x500 [nf_conntrack] + [] ? __lock_is_held+0x54/0x70 + [] do_output.isra.29+0xe3/0x1b0 [openvswitch] + [] do_execute_actions+0xe11/0x11f0 [openvswitch] + [] ? __lock_is_held+0x54/0x70 + [] ovs_execute_actions+0x32/0xd0 [openvswitch] + [] ovs_dp_process_packet+0x85/0x140 [openvswitch] + [] ? __lock_is_held+0x54/0x70 + [] ovs_execute_actions+0xb2/0xd0 [openvswitch] + [] ovs_dp_process_packet+0x85/0x140 [openvswitch] + [] ? ovs_ct_get_labels+0x49/0x80 [openvswitch] + [] ovs_vport_receive+0x5d/0xa0 [openvswitch] + [] ? __lock_acquire+0x3db/0x1b90 + [] ? __lock_acquire+0x3db/0x1b90 + [] ? __lock_acquire+0x3db/0x1b90 + [] ? internal_dev_xmit+0x5/0x140 [openvswitch] + [] internal_dev_xmit+0x6c/0x140 [openvswitch] + [] ? internal_dev_xmit+0x5/0x140 [openvswitch] + [] dev_hard_start_xmit+0x2b9/0x5e0 + [] ? netif_skb_features+0xd1/0x1f0 + [] __dev_queue_xmit+0x800/0x930 + [] ? __dev_queue_xmit+0x50/0x930 + [] ? mark_held_locks+0x71/0x90 + [] ? neigh_resolve_output+0x106/0x220 + [] dev_queue_xmit+0x10/0x20 + [] neigh_resolve_output+0x178/0x220 + [] ? ip_finish_output2+0x1ff/0x590 + [] ip_finish_output2+0x1ff/0x590 + [] ? ip_finish_output2+0x7e/0x590 + [] ip_do_fragment+0x831/0x8a0 + [] ? ip_copy_metadata+0x1b0/0x1b0 + [] ip_fragment.constprop.49+0x43/0x80 + [] ip_finish_output+0x17c/0x340 + [] ? nf_hook_slow+0xe4/0x190 + [] ip_output+0x70/0x110 + [] ? ip_fragment.constprop.49+0x80/0x80 + [] ip_local_out+0x39/0x70 + [] ip_send_skb+0x19/0x40 + [] ip_push_pending_frames+0x33/0x40 + [] icmp_push_reply+0xea/0x120 + [] icmp_reply.constprop.23+0x1ed/0x230 + [] icmp_echo.part.21+0x4e/0x50 + [] ? __lock_is_held+0x54/0x70 + [] ? rcu_read_lock_held+0x5e/0x70 + [] icmp_echo+0x36/0x70 + [] icmp_rcv+0x271/0x450 + [] ip_local_deliver_finish+0x127/0x3a0 + [] ? ip_local_deliver_finish+0x41/0x3a0 + [] ip_local_deliver+0x60/0xd0 + [] ? ip_rcv_finish+0x560/0x560 + [] ip_rcv_finish+0xdd/0x560 + [] ip_rcv+0x283/0x3e0 + [] ? match_held_lock+0x192/0x200 + [] ? inet_del_offload+0x40/0x40 + [] __netif_receive_skb_core+0x392/0xae0 + [] ? process_backlog+0x8e/0x230 + [] ? mark_held_locks+0x71/0x90 + [] __netif_receive_skb+0x18/0x60 + [] process_backlog+0x78/0x230 + [] ? process_backlog+0xdd/0x230 + [] net_rx_action+0x155/0x400 + [] __do_softirq+0xcc/0x420 + [] ? ip_finish_output2+0x217/0x590 + [] do_softirq_own_stack+0x1c/0x30 + + [] do_softirq+0x4e/0x60 + [] __local_bh_enable_ip+0xa8/0xb0 + [] ip_finish_output2+0x240/0x590 + [] ? ip_do_fragment+0x831/0x8a0 + [] ip_do_fragment+0x831/0x8a0 + [] ? ip_copy_metadata+0x1b0/0x1b0 + [] ip_fragment.constprop.49+0x43/0x80 + [] ip_finish_output+0x17c/0x340 + [] ? nf_hook_slow+0xe4/0x190 + [] ip_output+0x70/0x110 + [] ? ip_fragment.constprop.49+0x80/0x80 + [] ip_local_out+0x39/0x70 + [] ip_send_skb+0x19/0x40 + [] ip_push_pending_frames+0x33/0x40 + [] raw_sendmsg+0x7d3/0xc30 + [] ? __lock_acquire+0x3db/0x1b90 + [] ? inet_sendmsg+0xc7/0x1d0 + [] ? __lock_is_held+0x54/0x70 + [] inet_sendmsg+0x10a/0x1d0 + [] ? inet_sendmsg+0x5/0x1d0 + [] sock_sendmsg+0x38/0x50 + [] ___sys_sendmsg+0x25f/0x270 + [] ? handle_mm_fault+0x8dd/0x1320 + [] ? _raw_spin_unlock+0x27/0x40 + [] ? __do_page_fault+0x1e2/0x460 + [] ? __fget_light+0x66/0x90 + [] __sys_sendmsg+0x42/0x80 + [] SyS_sendmsg+0x12/0x20 + [] entry_SYSCALL_64_fastpath+0x12/0x6f +Code: 00 00 44 89 e0 e9 7c fb ff ff 4c 89 ff e8 e7 e7 ff ff 41 8b 9d 80 00 00 00 2b 5d d4 89 d8 c1 f8 03 0f b7 c0 e9 33 ff ff f + 66 66 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 55 48 +RIP [] ip_do_fragment+0x892/0x8a0 + RSP + +Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") +Signed-off-by: Joe Stringer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_fragment.c | 1 + + net/ipv4/netfilter/nf_defrag_ipv4.c | 2 -- + 2 files changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv4/ip_fragment.c ++++ b/net/ipv4/ip_fragment.c +@@ -661,6 +661,7 @@ int ip_defrag(struct net *net, struct sk + struct ipq *qp; + + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); ++ skb_orphan(skb); + + /* Lookup (or create) queue header */ + qp = ip_find(net, ip_hdr(skb), user, vif); +--- a/net/ipv4/netfilter/nf_defrag_ipv4.c ++++ b/net/ipv4/netfilter/nf_defrag_ipv4.c +@@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struc + { + int err; + +- skb_orphan(skb); +- + local_bh_disable(); + err = ip_defrag(net, skb, user); + local_bh_enable(); diff --git a/queue-4.4/ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch b/queue-4.4/ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch new file mode 100644 index 00000000000..19bc874d42e --- /dev/null +++ b/queue-4.4/ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch @@ -0,0 +1,80 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Eric Dumazet +Date: Thu, 4 Feb 2016 06:23:28 -0800 +Subject: ipv4: fix memory leaks in ip_cmsg_send() callers + +From: Eric Dumazet + +[ Upstream commit 919483096bfe75dda338e98d56da91a263746a0a ] + +Dmitry reported memory leaks of IP options allocated in +ip_cmsg_send() when/if this function returns an error. + +Callers are responsible for the freeing. + +Many thanks to Dmitry for the report and diagnostic. + +Reported-by: Dmitry Vyukov +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_sockglue.c | 2 ++ + net/ipv4/ping.c | 4 +++- + net/ipv4/raw.c | 4 +++- + net/ipv4/udp.c | 4 +++- + 4 files changed, 11 insertions(+), 3 deletions(-) + +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct + switch (cmsg->cmsg_type) { + case IP_RETOPTS: + err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); ++ ++ /* Our caller is responsible for freeing ipc->opt */ + err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), + err < 40 ? err : 40); + if (err) +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -746,8 +746,10 @@ static int ping_v4_sendmsg(struct sock * + + if (msg->msg_controllen) { + err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); +- if (err) ++ if (unlikely(err)) { ++ kfree(ipc.opt); + return err; ++ } + if (ipc.opt) + free = 1; + } +--- a/net/ipv4/raw.c ++++ b/net/ipv4/raw.c +@@ -547,8 +547,10 @@ static int raw_sendmsg(struct sock *sk, + + if (msg->msg_controllen) { + err = ip_cmsg_send(net, msg, &ipc, false); +- if (err) ++ if (unlikely(err)) { ++ kfree(ipc.opt); + goto out; ++ } + if (ipc.opt) + free = 1; + } +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -966,8 +966,10 @@ int udp_sendmsg(struct sock *sk, struct + if (msg->msg_controllen) { + err = ip_cmsg_send(sock_net(sk), msg, &ipc, + sk->sk_family == AF_INET6); +- if (err) ++ if (unlikely(err)) { ++ kfree(ipc.opt); + return err; ++ } + if (ipc.opt) + free = 1; + connected = 0; diff --git a/queue-4.4/ipv6-addrconf-fix-recursive-spin-lock-call.patch b/queue-4.4/ipv6-addrconf-fix-recursive-spin-lock-call.patch new file mode 100644 index 00000000000..60be45131fb --- /dev/null +++ b/queue-4.4/ipv6-addrconf-fix-recursive-spin-lock-call.patch @@ -0,0 +1,79 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: "subashab@codeaurora.org" +Date: Tue, 2 Feb 2016 02:11:10 +0000 +Subject: ipv6: addrconf: Fix recursive spin lock call + +From: "subashab@codeaurora.org" + +[ Upstream commit 16186a82de1fdd868255448274e64ae2616e2640 ] + +A rcu stall with the following backtrace was seen on a system with +forwarding, optimistic_dad and use_optimistic set. To reproduce, +set these flags and allow ipv6 autoconf. + +This occurs because the device write_lock is acquired while already +holding the read_lock. Back trace below - + +INFO: rcu_preempt self-detected stall on CPU { 1} (t=2100 jiffies + g=3992 c=3991 q=4471) +<6> Task dump for CPU 1: +<2> kworker/1:0 R running task 12168 15 2 0x00000002 +<2> Workqueue: ipv6_addrconf addrconf_dad_work +<6> Call trace: +<2> [] el1_irq+0x68/0xdc +<2> [] _raw_write_lock_bh+0x20/0x30 +<2> [] __ipv6_dev_ac_inc+0x64/0x1b4 +<2> [] addrconf_join_anycast+0x9c/0xc4 +<2> [] __ipv6_ifa_notify+0x160/0x29c +<2> [] ipv6_ifa_notify+0x50/0x70 +<2> [] addrconf_dad_work+0x314/0x334 +<2> [] process_one_work+0x244/0x3fc +<2> [] worker_thread+0x2f8/0x418 +<2> [] kthread+0xe0/0xec + +v2: do addrconf_dad_kick inside read lock and then acquire write +lock for ipv6_ifa_notify as suggested by Eric + +Fixes: 7fd2561e4ebdd ("net: ipv6: Add a sysctl to make optimistic +addresses useful candidates") + +Cc: Eric Dumazet +Cc: Erik Kline +Cc: Hannes Frederic Sowa +Signed-off-by: Subash Abhinov Kasiviswanathan +Acked-by: Hannes Frederic Sowa +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -3506,6 +3506,7 @@ static void addrconf_dad_begin(struct in + { + struct inet6_dev *idev = ifp->idev; + struct net_device *dev = idev->dev; ++ bool notify = false; + + addrconf_join_solict(dev, &ifp->addr); + +@@ -3551,7 +3552,7 @@ static void addrconf_dad_begin(struct in + /* Because optimistic nodes can use this address, + * notify listeners. If DAD fails, RTM_DELADDR is sent. + */ +- ipv6_ifa_notify(RTM_NEWADDR, ifp); ++ notify = true; + } + } + +@@ -3559,6 +3560,8 @@ static void addrconf_dad_begin(struct in + out: + spin_unlock(&ifp->lock); + read_unlock_bh(&idev->lock); ++ if (notify) ++ ipv6_ifa_notify(RTM_NEWADDR, ifp); + } + + static void addrconf_dad_start(struct inet6_ifaddr *ifp) diff --git a/queue-4.4/ipv6-enforce-flowi6_oif-usage-in-ip6_dst_lookup_tail.patch b/queue-4.4/ipv6-enforce-flowi6_oif-usage-in-ip6_dst_lookup_tail.patch new file mode 100644 index 00000000000..317912a64b4 --- /dev/null +++ b/queue-4.4/ipv6-enforce-flowi6_oif-usage-in-ip6_dst_lookup_tail.patch @@ -0,0 +1,109 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Paolo Abeni +Date: Fri, 29 Jan 2016 12:30:19 +0100 +Subject: ipv6: enforce flowi6_oif usage in ip6_dst_lookup_tail() + +From: Paolo Abeni + +[ Upstream commit 6f21c96a78b835259546d8f3fb4edff0f651d478 ] + +The current implementation of ip6_dst_lookup_tail basically +ignore the egress ifindex match: if the saddr is set, +ip6_route_output() purposefully ignores flowi6_oif, due +to the commit d46a9d678e4c ("net: ipv6: Dont add RT6_LOOKUP_F_IFACE +flag if saddr set"), if the saddr is 'any' the first route lookup +in ip6_dst_lookup_tail fails, but upon failure a second lookup will +be performed with saddr set, thus ignoring the ifindex constraint. + +This commit adds an output route lookup function variant, which +allows the caller to specify lookup flags, and modify +ip6_dst_lookup_tail() to enforce the ifindex match on the second +lookup via said helper. + +ip6_route_output() becames now a static inline function build on +top of ip6_route_output_flags(); as a side effect, out-of-tree +modules need now a GPL license to access the output route lookup +functionality. + +Signed-off-by: Paolo Abeni +Acked-by: Hannes Frederic Sowa +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_route.h | 12 ++++++++++-- + net/ipv6/ip6_output.c | 6 +++++- + net/ipv6/route.c | 7 +++---- + 3 files changed, 18 insertions(+), 7 deletions(-) + +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -64,8 +64,16 @@ static inline bool rt6_need_strict(const + + void ip6_route_input(struct sk_buff *skb); + +-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, +- struct flowi6 *fl6); ++struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, ++ struct flowi6 *fl6, int flags); ++ ++static inline struct dst_entry *ip6_route_output(struct net *net, ++ const struct sock *sk, ++ struct flowi6 *fl6) ++{ ++ return ip6_route_output_flags(net, sk, fl6, 0); ++} ++ + struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, + int flags); + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct ne + struct rt6_info *rt; + #endif + int err; ++ int flags = 0; + + /* The correct way to handle this would be to do + * ip6_route_get_saddr, and then ip6_route_output; however, +@@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct ne + dst_release(*dst); + *dst = NULL; + } ++ ++ if (fl6->flowi6_oif) ++ flags |= RT6_LOOKUP_F_IFACE; + } + + if (!*dst) +- *dst = ip6_route_output(net, sk, fl6); ++ *dst = ip6_route_output_flags(net, sk, fl6, flags); + + err = (*dst)->error; + if (err) +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1174,11 +1174,10 @@ static struct rt6_info *ip6_pol_route_ou + return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); + } + +-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, +- struct flowi6 *fl6) ++struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, ++ struct flowi6 *fl6, int flags) + { + struct dst_entry *dst; +- int flags = 0; + bool any_src; + + dst = l3mdev_rt6_dst_by_oif(net, fl6); +@@ -1199,7 +1198,7 @@ struct dst_entry *ip6_route_output(struc + + return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); + } +-EXPORT_SYMBOL(ip6_route_output); ++EXPORT_SYMBOL_GPL(ip6_route_output_flags); + + struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) + { diff --git a/queue-4.4/ipv6-fix-a-lockdep-splat.patch b/queue-4.4/ipv6-fix-a-lockdep-splat.patch new file mode 100644 index 00000000000..f85e130433a --- /dev/null +++ b/queue-4.4/ipv6-fix-a-lockdep-splat.patch @@ -0,0 +1,44 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Eric Dumazet +Date: Tue, 2 Feb 2016 17:55:01 -0800 +Subject: ipv6: fix a lockdep splat + +From: Eric Dumazet + +[ Upstream commit 44c3d0c1c0a880354e9de5d94175742e2c7c9683 ] + +Silence lockdep false positive about rcu_dereference() being +used in the wrong context. + +First one should use rcu_dereference_protected() as we own the spinlock. + +Second one should be a normal assignation, as no barrier is needed. + +Fixes: 18367681a10bd ("ipv6 flowlabel: Convert np->ipv6_fl_list to RCU.") +Reported-by: Dave Jones +Signed-off-by: Eric Dumazet +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_flowlabel.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_flowlabel.c ++++ b/net/ipv6/ip6_flowlabel.c +@@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, + } + spin_lock_bh(&ip6_sk_fl_lock); + for (sflp = &np->ipv6_fl_list; +- (sfl = rcu_dereference(*sflp)) != NULL; ++ (sfl = rcu_dereference_protected(*sflp, ++ lockdep_is_held(&ip6_sk_fl_lock))) != NULL; + sflp = &sfl->next) { + if (sfl->fl->label == freq.flr_label) { + if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) + np->flow_label &= ~IPV6_FLOWLABEL_MASK; +- *sflp = rcu_dereference(sfl->next); ++ *sflp = sfl->next; + spin_unlock_bh(&ip6_sk_fl_lock); + fl_release(sfl->fl); + kfree_rcu(sfl, rcu); diff --git a/queue-4.4/ipv6-udp-use-sticky-pktinfo-egress-ifindex-on-connect.patch b/queue-4.4/ipv6-udp-use-sticky-pktinfo-egress-ifindex-on-connect.patch new file mode 100644 index 00000000000..a99eeb30e24 --- /dev/null +++ b/queue-4.4/ipv6-udp-use-sticky-pktinfo-egress-ifindex-on-connect.patch @@ -0,0 +1,36 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Paolo Abeni +Date: Fri, 29 Jan 2016 12:30:20 +0100 +Subject: ipv6/udp: use sticky pktinfo egress ifindex on connect() + +From: Paolo Abeni + +[ Upstream commit 1cdda91871470f15e79375991bd2eddc6e86ddb1 ] + +Currently, the egress interface index specified via IPV6_PKTINFO +is ignored by __ip6_datagram_connect(), so that RFC 3542 section 6.7 +can be subverted when the user space application calls connect() +before sendmsg(). +Fix it by initializing properly flowi6_oif in connect() before +performing the route lookup. + +Signed-off-by: Paolo Abeni +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/datagram.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ipv6/datagram.c ++++ b/net/ipv6/datagram.c +@@ -162,6 +162,9 @@ ipv4_connected: + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; + ++ if (!fl6.flowi6_oif) ++ fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; ++ + if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) + fl6.flowi6_oif = np->mcast_oif; + diff --git a/queue-4.4/l2tp-fix-error-creating-l2tp-tunnels.patch b/queue-4.4/l2tp-fix-error-creating-l2tp-tunnels.patch new file mode 100644 index 00000000000..40f859a4630 --- /dev/null +++ b/queue-4.4/l2tp-fix-error-creating-l2tp-tunnels.patch @@ -0,0 +1,59 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Mark Tomlinson +Date: Mon, 15 Feb 2016 16:24:44 +1300 +Subject: l2tp: Fix error creating L2TP tunnels + +From: Mark Tomlinson + +[ Upstream commit 853effc55b0f975abd6d318cca486a9c1b67e10f ] + +A previous commit (33f72e6) added notification via netlink for tunnels +when created/modified/deleted. If the notification returned an error, +this error was returned from the tunnel function. If there were no +listeners, the error code ESRCH was returned, even though having no +listeners is not an error. Other calls to this and other similar +notification functions either ignore the error code, or filter ESRCH. +This patch checks for ESRCH and does not flag this as an error. + +Reviewed-by: Hamish Martin +Signed-off-by: Mark Tomlinson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_netlink.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/net/l2tp/l2tp_netlink.c ++++ b/net/l2tp/l2tp_netlink.c +@@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct gen + ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, + NLM_F_ACK, tunnel, cmd); + +- if (ret >= 0) +- return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); ++ if (ret >= 0) { ++ ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); ++ /* We don't care if no one is listening */ ++ if (ret == -ESRCH) ++ ret = 0; ++ return ret; ++ } + + nlmsg_free(msg); + +@@ -147,8 +152,13 @@ static int l2tp_session_notify(struct ge + ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, + NLM_F_ACK, session, cmd); + +- if (ret >= 0) +- return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); ++ if (ret >= 0) { ++ ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); ++ /* We don't care if no one is listening */ ++ if (ret == -ESRCH) ++ ret = 0; ++ return ret; ++ } + + nlmsg_free(msg); + diff --git a/queue-4.4/lwt-fix-rx-checksum-setting-for-lwt-devices-tunneling-over-ipv6.patch b/queue-4.4/lwt-fix-rx-checksum-setting-for-lwt-devices-tunneling-over-ipv6.patch new file mode 100644 index 00000000000..701bd7db41d --- /dev/null +++ b/queue-4.4/lwt-fix-rx-checksum-setting-for-lwt-devices-tunneling-over-ipv6.patch @@ -0,0 +1,37 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Paolo Abeni +Date: Wed, 17 Feb 2016 19:30:01 +0100 +Subject: lwt: fix rx checksum setting for lwt devices tunneling over ipv6 + +From: Paolo Abeni + +[ Upstream commit c868ee7063bdb53f3ef9eac7bcec84960980b471 ] + +the commit 35e2d1152b22 ("tunnels: Allow IPv6 UDP checksums to be +correctly controlled.") changed the default xmit checksum setting +for lwt vxlan/geneve ipv6 tunnels, so that now the checksum is not +set into external UDP header. +This commit changes the rx checksum setting for both lwt vxlan/geneve +devices created by openvswitch accordingly, so that lwt over ipv6 +tunnel pairs are again able to communicate with default values. + +Signed-off-by: Paolo Abeni +Acked-by: Jiri Benc +Acked-by: Jesse Gross +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/vport-vxlan.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/openvswitch/vport-vxlan.c ++++ b/net/openvswitch/vport-vxlan.c +@@ -90,7 +90,7 @@ static struct vport *vxlan_tnl_create(co + int err; + struct vxlan_config conf = { + .no_share = true, +- .flags = VXLAN_F_COLLECT_METADATA, ++ .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX, + }; + + if (!options) { diff --git a/queue-4.4/net-add-sysctl_max_skb_frags.patch b/queue-4.4/net-add-sysctl_max_skb_frags.patch new file mode 100644 index 00000000000..c2e30c9bc0c --- /dev/null +++ b/queue-4.4/net-add-sysctl_max_skb_frags.patch @@ -0,0 +1,99 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Hans Westgaard Ry +Date: Wed, 3 Feb 2016 09:26:57 +0100 +Subject: net:Add sysctl_max_skb_frags +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Hans Westgaard Ry + +[ Upstream commit 5f74f82ea34c0da80ea0b49192bb5ea06e063593 ] + +Devices may have limits on the number of fragments in an skb they support. +Current codebase uses a constant as maximum for number of fragments one +skb can hold and use. +When enabling scatter/gather and running traffic with many small messages +the codebase uses the maximum number of fragments and may thereby violate +the max for certain devices. +The patch introduces a global variable as max number of fragments. + +Signed-off-by: Hans Westgaard Ry +Reviewed-by: Håkon Bugge +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 1 + + net/core/skbuff.c | 2 ++ + net/core/sysctl_net_core.c | 10 ++++++++++ + net/ipv4/tcp.c | 4 ++-- + 4 files changed, 15 insertions(+), 2 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -219,6 +219,7 @@ struct sk_buff; + #else + #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1) + #endif ++extern int sysctl_max_skb_frags; + + typedef struct skb_frag_struct skb_frag_t; + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -79,6 +79,8 @@ + + struct kmem_cache *skbuff_head_cache __read_mostly; + static struct kmem_cache *skbuff_fclone_cache __read_mostly; ++int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; ++EXPORT_SYMBOL(sysctl_max_skb_frags); + + /** + * skb_panic - private function for out-of-line support +--- a/net/core/sysctl_net_core.c ++++ b/net/core/sysctl_net_core.c +@@ -26,6 +26,7 @@ static int zero = 0; + static int one = 1; + static int min_sndbuf = SOCK_MIN_SNDBUF; + static int min_rcvbuf = SOCK_MIN_RCVBUF; ++static int max_skb_frags = MAX_SKB_FRAGS; + + static int net_msg_warn; /* Unused, but still a sysctl */ + +@@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] + .mode = 0644, + .proc_handler = proc_dointvec + }, ++ { ++ .procname = "max_skb_frags", ++ .data = &sysctl_max_skb_frags, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = &one, ++ .extra2 = &max_skb_frags, ++ }, + { } + }; + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -939,7 +939,7 @@ new_segment: + + i = skb_shinfo(skb)->nr_frags; + can_coalesce = skb_can_coalesce(skb, i, page, offset); +- if (!can_coalesce && i >= MAX_SKB_FRAGS) { ++ if (!can_coalesce && i >= sysctl_max_skb_frags) { + tcp_mark_push(tp, skb); + goto new_segment; + } +@@ -1212,7 +1212,7 @@ new_segment: + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { +- if (i == MAX_SKB_FRAGS || !sg) { ++ if (i == sysctl_max_skb_frags || !sg) { + tcp_mark_push(tp, skb); + goto new_segment; + } diff --git a/queue-4.4/net-copy-inner-l3-and-l4-headers-as-unaligned-on-gre-teb.patch b/queue-4.4/net-copy-inner-l3-and-l4-headers-as-unaligned-on-gre-teb.patch new file mode 100644 index 00000000000..eeb02ceb348 --- /dev/null +++ b/queue-4.4/net-copy-inner-l3-and-l4-headers-as-unaligned-on-gre-teb.patch @@ -0,0 +1,38 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Alexander Duyck +Date: Tue, 9 Feb 2016 06:14:43 -0800 +Subject: net: Copy inner L3 and L4 headers as unaligned on GRE TEB + +From: Alexander Duyck + +[ Upstream commit 78565208d73ca9b654fb9a6b142214d52eeedfd1 ] + +This patch corrects the unaligned accesses seen on GRE TEB tunnels when +generating hash keys. Specifically what this patch does is make it so that +we force the use of skb_copy_bits when the GRE inner headers will be +unaligned due to NET_IP_ALIGNED being a non-zero value. + +Signed-off-by: Alexander Duyck +Acked-by: Tom Herbert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/flow_dissector.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -396,6 +396,13 @@ ip_proto_again: + goto out_bad; + proto = eth->h_proto; + nhoff += sizeof(*eth); ++ ++ /* Cap headers that we access via pointers at the ++ * end of the Ethernet header as our maximum alignment ++ * at that point is only 2 bytes. ++ */ ++ if (NET_IP_ALIGN) ++ hlen = nhoff; + } + + key_control->flags |= FLOW_DIS_ENCAPSULATION; diff --git a/queue-4.4/net-dp83640-fix-tx-timestamp-overflow-handling.patch b/queue-4.4/net-dp83640-fix-tx-timestamp-overflow-handling.patch new file mode 100644 index 00000000000..326615e86ac --- /dev/null +++ b/queue-4.4/net-dp83640-fix-tx-timestamp-overflow-handling.patch @@ -0,0 +1,68 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Manfred Rudigier +Date: Wed, 20 Jan 2016 11:22:28 +0100 +Subject: net: dp83640: Fix tx timestamp overflow handling. + +From: Manfred Rudigier + +[ Upstream commit 81e8f2e930fe76b9814c71b9d87c30760b5eb705 ] + +PHY status frames are not reliable, the PHY may not be able to send them +during heavy receive traffic. This overflow condition is signaled by the +PHY in the next status frame, but the driver did not make use of it. +Instead it always reported wrong tx timestamps to user space after an +overflow happened because it assigned newly received tx timestamps to old +packets in the queue. + +This commit fixes this issue by clearing the tx timestamp queue every time +an overflow happens, so that no timestamps are delivered for overflow +packets. This way time stamping will continue correctly after an overflow. + +Signed-off-by: Manfred Rudigier +Acked-by: Richard Cochran +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/dp83640.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +--- a/drivers/net/phy/dp83640.c ++++ b/drivers/net/phy/dp83640.c +@@ -845,6 +845,11 @@ static void decode_rxts(struct dp83640_p + struct skb_shared_hwtstamps *shhwtstamps = NULL; + struct sk_buff *skb; + unsigned long flags; ++ u8 overflow; ++ ++ overflow = (phy_rxts->ns_hi >> 14) & 0x3; ++ if (overflow) ++ pr_debug("rx timestamp queue overflow, count %d\n", overflow); + + spin_lock_irqsave(&dp83640->rx_lock, flags); + +@@ -887,6 +892,7 @@ static void decode_txts(struct dp83640_p + struct skb_shared_hwtstamps shhwtstamps; + struct sk_buff *skb; + u64 ns; ++ u8 overflow; + + /* We must already have the skb that triggered this. */ + +@@ -896,6 +902,17 @@ static void decode_txts(struct dp83640_p + pr_debug("have timestamp but tx_queue empty\n"); + return; + } ++ ++ overflow = (phy_txts->ns_hi >> 14) & 0x3; ++ if (overflow) { ++ pr_debug("tx timestamp queue overflow, count %d\n", overflow); ++ while (skb) { ++ skb_complete_tx_timestamp(skb, NULL); ++ skb = skb_dequeue(&dp83640->tx_queue); ++ } ++ return; ++ } ++ + ns = phy2txts(phy_txts); + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + shhwtstamps.hwtstamp = ns_to_ktime(ns); diff --git a/queue-4.4/net-dsa-fix-mv88e6xxx-switches.patch b/queue-4.4/net-dsa-fix-mv88e6xxx-switches.patch new file mode 100644 index 00000000000..3b849c41044 --- /dev/null +++ b/queue-4.4/net-dsa-fix-mv88e6xxx-switches.patch @@ -0,0 +1,71 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Russell King +Date: Sun, 24 Jan 2016 09:22:05 +0000 +Subject: net: dsa: fix mv88e6xxx switches + +From: Russell King + +[ Upstream commit db0e51afa481088e6396f11e02018d64113a6578 ] + +Since commit 76e398a62712 ("net: dsa: use switchdev obj for VLAN add/del +ops"), the Marvell 88E6xxx switch has been unable to pass traffic +between ports - any received traffic is discarded by the switch. +Taking a port out of bridge mode and configuring a vlan on it also the +port to start passing traffic. + +With the debugfs files re-instated to allow debug of this issue by +comparing the register settings between the working and non-working +case, the reason becomes clear: + + GLOBAL GLOBAL2 SERDES 0 1 2 3 4 5 6 +- 7: 1111 707f 2001 2 2 2 2 2 0 2 ++ 7: 1111 707f 2001 1 1 1 1 1 0 1 + +Register 7 for the ports is the default vlan tag register, and in the +non-working setup, it has been set to 2, despite vlan 2 not being +configured. This causes the switch to drop all packets coming in to +these ports. The working setup has the default vlan tag register set +to 1, which is the default vlan when none is configured. + +Inspection of the code reveals why. The code prior to this commit +was: + +- for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { +... +- if (!err && vlan->flags & BRIDGE_VLAN_INFO_PVID) +- err = ds->drv->port_pvid_set(ds, p->port, vid); + +but the new code is: + ++ for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { +... ++ } +... ++ if (pvid) ++ err = _mv88e6xxx_port_pvid_set(ds, port, vid); + +This causes the new code to always set the default vlan to one higher +than the old code. + +Fix this. + +Fixes: 76e398a62712 ("net: dsa: use switchdev obj for VLAN add/del ops") +Cc: +Signed-off-by: Russell King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/mv88e6xxx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/dsa/mv88e6xxx.c ++++ b/drivers/net/dsa/mv88e6xxx.c +@@ -1519,7 +1519,7 @@ int mv88e6xxx_port_vlan_add(struct dsa_s + + /* no PVID with ranges, otherwise it's a bug */ + if (pvid) +- err = _mv88e6xxx_port_pvid_set(ds, port, vid); ++ err = _mv88e6xxx_port_pvid_set(ds, port, vlan->vid_end); + unlock: + mutex_unlock(&ps->smi_mutex); + diff --git a/queue-4.4/net-mlx4_en-avoid-changing-dev-features-directly-in-run-time.patch b/queue-4.4/net-mlx4_en-avoid-changing-dev-features-directly-in-run-time.patch new file mode 100644 index 00000000000..617d0fb029d --- /dev/null +++ b/queue-4.4/net-mlx4_en-avoid-changing-dev-features-directly-in-run-time.patch @@ -0,0 +1,56 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Eugenia Emantayev +Date: Wed, 17 Feb 2016 17:24:27 +0200 +Subject: net/mlx4_en: Avoid changing dev->features directly in run-time + +From: Eugenia Emantayev + +[ Upstream commit 925ab1aa9394bbaeac47ee5b65d3fdf0fb8135cf ] + +It's forbidden to manually change dev->features in run-time. Currently, this is +done in the driver to make sure that GSO_UDP_TUNNEL is advertized only when +VXLAN tunnel is set. However, since the stack actually does features intersection +with hw_enc_features, we can safely revert to advertizing features early when +registering the netdevice. + +Fixes: f4a1edd56120 ('net/mlx4_en: Advertize encapsulation offloads [...]') +Signed-off-by: Eugenia Emantayev +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +@@ -2381,8 +2381,6 @@ out: + /* set offloads */ + priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; +- priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; +- priv->dev->features |= NETIF_F_GSO_UDP_TUNNEL; + } + + static void mlx4_en_del_vxlan_offloads(struct work_struct *work) +@@ -2393,8 +2391,6 @@ static void mlx4_en_del_vxlan_offloads(s + /* unset offloads */ + priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL); +- priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; +- priv->dev->features &= ~NETIF_F_GSO_UDP_TUNNEL; + + ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, + VXLAN_STEER_BY_OUTER_MAC, 0); +@@ -3020,6 +3016,11 @@ int mlx4_en_init_netdev(struct mlx4_en_d + priv->rss_hash_fn = ETH_RSS_HASH_TOP; + } + ++ if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { ++ dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; ++ dev->features |= NETIF_F_GSO_UDP_TUNNEL; ++ } ++ + mdev->pndev[port] = dev; + mdev->upper[port] = NULL; + diff --git a/queue-4.4/net-mlx4_en-choose-time-stamping-shift-value-according-to-hw-frequency.patch b/queue-4.4/net-mlx4_en-choose-time-stamping-shift-value-according-to-hw-frequency.patch new file mode 100644 index 00000000000..ebb251048db --- /dev/null +++ b/queue-4.4/net-mlx4_en-choose-time-stamping-shift-value-according-to-hw-frequency.patch @@ -0,0 +1,74 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Eugenia Emantayev +Date: Wed, 17 Feb 2016 17:24:23 +0200 +Subject: net/mlx4_en: Choose time-stamping shift value according to HW frequency + +From: Eugenia Emantayev + +[ Upstream commit 31c128b66e5b28f468076e4f3ca3025c35342041 ] + +Previously, the shift value used for time-stamping was constant and didn't +depend on the HW chip frequency. Change that to take the frequency into account +and calculate the maximal value in cycles per wraparound of ten seconds. This +time slot was chosen since it gives a good accuracy in time synchronization. + +Algorithm for shift value calculation: + * Round up the maximal value in cycles to nearest power of two + + * Calculate maximal multiplier by division of all 64 bits set + to above result + + * Then, invert the function clocksource_khz2mult() to get the shift from + maximal mult value + +Fixes: ec693d47010e ('net/mlx4_en: Add HW timestamping (TS) support') +Signed-off-by: Eugenia Emantayev +Reviewed-by: Matan Barak +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_clock.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c +@@ -236,6 +236,24 @@ static const struct ptp_clock_info mlx4_ + .enable = mlx4_en_phc_enable, + }; + ++#define MLX4_EN_WRAP_AROUND_SEC 10ULL ++ ++/* This function calculates the max shift that enables the user range ++ * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register. ++ */ ++static u32 freq_to_shift(u16 freq) ++{ ++ u32 freq_khz = freq * 1000; ++ u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; ++ u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? ++ max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1; ++ /* calculate max possible multiplier in order to fit in 64bit */ ++ u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); ++ ++ /* This comes from the reverse of clocksource_khz2mult */ ++ return ilog2(div_u64(max_mul * freq_khz, 1000000)); ++} ++ + void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) + { + struct mlx4_dev *dev = mdev->dev; +@@ -254,12 +272,7 @@ void mlx4_en_init_timestamp(struct mlx4_ + memset(&mdev->cycles, 0, sizeof(mdev->cycles)); + mdev->cycles.read = mlx4_en_read_clock; + mdev->cycles.mask = CLOCKSOURCE_MASK(48); +- /* Using shift to make calculation more accurate. Since current HW +- * clock frequency is 427 MHz, and cycles are given using a 48 bits +- * register, the biggest shift when calculating using u64, is 14 +- * (max_cycles * multiplier < 2^64) +- */ +- mdev->cycles.shift = 14; ++ mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock); + mdev->cycles.mult = + clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); + mdev->nominal_c_mult = mdev->cycles.mult; diff --git a/queue-4.4/net-mlx4_en-count-hw-buffer-overrun-only-once.patch b/queue-4.4/net-mlx4_en-count-hw-buffer-overrun-only-once.patch new file mode 100644 index 00000000000..b208a18839a --- /dev/null +++ b/queue-4.4/net-mlx4_en-count-hw-buffer-overrun-only-once.patch @@ -0,0 +1,44 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Amir Vadai +Date: Wed, 17 Feb 2016 17:24:22 +0200 +Subject: net/mlx4_en: Count HW buffer overrun only once + +From: Amir Vadai + +[ Upstream commit 281e8b2fdf8e4ef366b899453cae50e09b577ada ] + +RdropOvflw counts overrun of HW buffer, therefore should +be used for rx_fifo_errors only. + +Currently RdropOvflw counter is mistakenly also set into +rx_missed_errors and rx_over_errors too, which makes the +device total dropped packets accounting to show wrong results. + +Fix that. Use it for rx_fifo_errors only. + +Fixes: c27a02cd94d6 ('mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC') +Signed-off-by: Amir Vadai +Signed-off-by: Eugenia Emantayev +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_port.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c +@@ -238,11 +238,11 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_e + stats->collisions = 0; + stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP); + stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); +- stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); ++ stats->rx_over_errors = 0; + stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); + stats->rx_frame_errors = 0; + stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); +- stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); ++ stats->rx_missed_errors = 0; + stats->tx_aborted_errors = 0; + stats->tx_carrier_errors = 0; + stats->tx_fifo_errors = 0; diff --git a/queue-4.4/net_sched-fix-reclassification-needs-to-consider-ether-protocol-changes.patch b/queue-4.4/net_sched-fix-reclassification-needs-to-consider-ether-protocol-changes.patch new file mode 100644 index 00000000000..47733dc96b3 --- /dev/null +++ b/queue-4.4/net_sched-fix-reclassification-needs-to-consider-ether-protocol-changes.patch @@ -0,0 +1,39 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Jamal Hadi Salim +Date: Thu, 18 Feb 2016 07:38:04 -0500 +Subject: net_sched fix: reclassification needs to consider ether protocol changes + +From: Jamal Hadi Salim + +[ Upstream commit 619fe32640b4b01f370574d50344ae0f62689816 ] + +actions could change the etherproto in particular with ethernet +tunnelled data. Typically such actions, after peeling the outer header, +will ask for the packet to be reclassified. We then need to restart +the classification with the new proto header. + +Example setup used to catch this: +sudo tc qdisc add dev $ETH ingress +sudo $TC filter add dev $ETH parent ffff: pref 1 protocol 802.1Q \ +u32 match u32 0 0 flowid 1:1 \ +action vlan pop reclassify + +Fixes: 3b3ae880266d ("net: sched: consolidate tc_classify{,_compat}") +Signed-off-by: Jamal Hadi Salim +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_api.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -1852,6 +1852,7 @@ reset: + } + + tp = old_tp; ++ protocol = tc_skb_protocol(skb); + goto reclassify; + #endif + } diff --git a/queue-4.4/pppoe-fix-reference-counting-in-pppoe-proxy.patch b/queue-4.4/pppoe-fix-reference-counting-in-pppoe-proxy.patch new file mode 100644 index 00000000000..34e4e3a9b7e --- /dev/null +++ b/queue-4.4/pppoe-fix-reference-counting-in-pppoe-proxy.patch @@ -0,0 +1,30 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Guillaume Nault +Date: Mon, 15 Feb 2016 17:01:10 +0100 +Subject: pppoe: fix reference counting in PPPoE proxy + +From: Guillaume Nault + +[ Upstream commit 29e73269aa4d36f92b35610c25f8b01c789b0dc8 ] + +Drop reference on the relay_po socket when __pppoe_xmit() succeeds. +This is already handled correctly in the error path. + +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pppoe.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -395,6 +395,8 @@ static int pppoe_rcv_core(struct sock *s + + if (!__pppoe_xmit(sk_pppox(relay_po), skb)) + goto abort_put; ++ ++ sock_put(sk_pppox(relay_po)); + } else { + if (sock_queue_rcv_skb(sk, skb)) + goto abort_kfree; diff --git a/queue-4.4/pptp-fix-illegal-memory-access-caused-by-multiple-bind-s.patch b/queue-4.4/pptp-fix-illegal-memory-access-caused-by-multiple-bind-s.patch new file mode 100644 index 00000000000..a4d6ca0ecdd --- /dev/null +++ b/queue-4.4/pptp-fix-illegal-memory-access-caused-by-multiple-bind-s.patch @@ -0,0 +1,114 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Hannes Frederic Sowa +Date: Fri, 22 Jan 2016 01:39:43 +0100 +Subject: pptp: fix illegal memory access caused by multiple bind()s + +From: Hannes Frederic Sowa + +[ Upstream commit 9a368aff9cb370298fa02feeffa861f2db497c18 ] + +Several times already this has been reported as kasan reports caused by +syzkaller and trinity and people always looked at RCU races, but it is +much more simple. :) + +In case we bind a pptp socket multiple times, we simply add it to +the callid_sock list but don't remove the old binding. Thus the old +socket stays in the bucket with unused call_id indexes and doesn't get +cleaned up. This causes various forms of kasan reports which were hard +to pinpoint. + +Simply don't allow multiple binds and correct error handling in +pptp_bind. Also keep sk_state bits in place in pptp_connect. + +Fixes: 00959ade36acad ("PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol)") +Cc: Dmitry Kozlov +Cc: Sasha Levin +Cc: Dmitry Vyukov +Reported-by: Dmitry Vyukov +Cc: Dave Jones +Reported-by: Dave Jones +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pptp.c | 34 ++++++++++++++++++++++++---------- + 1 file changed, 24 insertions(+), 10 deletions(-) + +--- a/drivers/net/ppp/pptp.c ++++ b/drivers/net/ppp/pptp.c +@@ -129,24 +129,27 @@ static int lookup_chan_dst(u16 call_id, + return i < MAX_CALLID; + } + +-static int add_chan(struct pppox_sock *sock) ++static int add_chan(struct pppox_sock *sock, ++ struct pptp_addr *sa) + { + static int call_id; + + spin_lock(&chan_lock); +- if (!sock->proto.pptp.src_addr.call_id) { ++ if (!sa->call_id) { + call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1); + if (call_id == MAX_CALLID) { + call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1); + if (call_id == MAX_CALLID) + goto out_err; + } +- sock->proto.pptp.src_addr.call_id = call_id; +- } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap)) ++ sa->call_id = call_id; ++ } else if (test_bit(sa->call_id, callid_bitmap)) { + goto out_err; ++ } + +- set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); +- rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock); ++ sock->proto.pptp.src_addr = *sa; ++ set_bit(sa->call_id, callid_bitmap); ++ rcu_assign_pointer(callid_sock[sa->call_id], sock); + spin_unlock(&chan_lock); + + return 0; +@@ -416,7 +419,6 @@ static int pptp_bind(struct socket *sock + struct sock *sk = sock->sk; + struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; + struct pppox_sock *po = pppox_sk(sk); +- struct pptp_opt *opt = &po->proto.pptp; + int error = 0; + + if (sockaddr_len < sizeof(struct sockaddr_pppox)) +@@ -424,10 +426,22 @@ static int pptp_bind(struct socket *sock + + lock_sock(sk); + +- opt->src_addr = sp->sa_addr.pptp; +- if (add_chan(po)) ++ if (sk->sk_state & PPPOX_DEAD) { ++ error = -EALREADY; ++ goto out; ++ } ++ ++ if (sk->sk_state & PPPOX_BOUND) { + error = -EBUSY; ++ goto out; ++ } ++ ++ if (add_chan(po, &sp->sa_addr.pptp)) ++ error = -EBUSY; ++ else ++ sk->sk_state |= PPPOX_BOUND; + ++out: + release_sock(sk); + return error; + } +@@ -498,7 +512,7 @@ static int pptp_connect(struct socket *s + } + + opt->dst_addr = sp->sa_addr.pptp; +- sk->sk_state = PPPOX_CONNECTED; ++ sk->sk_state |= PPPOX_CONNECTED; + + end: + release_sock(sk); diff --git a/queue-4.4/qmi_wwan-add-4g-lte-usb-modem-u901.patch b/queue-4.4/qmi_wwan-add-4g-lte-usb-modem-u901.patch new file mode 100644 index 00000000000..8ca239b3ed6 --- /dev/null +++ b/queue-4.4/qmi_wwan-add-4g-lte-usb-modem-u901.patch @@ -0,0 +1,45 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= +Date: Fri, 12 Feb 2016 16:42:14 +0100 +Subject: qmi_wwan: add "4G LTE usb-modem U901" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= + +[ Upstream commit aac8d3c282e024c344c5b86dc1eab7af88bb9716 ] + +Thomas reports: + +T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 +D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 +P: Vendor=05c6 ProdID=6001 Rev=00.00 +S: Manufacturer=USB Modem +S: Product=USB Modem +S: SerialNumber=1234567890ABCDEF +C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA +I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option +I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option +I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option +I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan +I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage + +Reported-by: Thomas Schäfer +Signed-off-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -492,6 +492,7 @@ static const struct usb_device_id produc + + /* 3. Combined interface devices matching on interface number */ + {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ ++ {QMI_FIXED_INTF(0x05c6, 0x6001, 3)}, /* 4G LTE usb-modem U901 */ + {QMI_FIXED_INTF(0x05c6, 0x7000, 0)}, + {QMI_FIXED_INTF(0x05c6, 0x7001, 1)}, + {QMI_FIXED_INTF(0x05c6, 0x7002, 1)}, diff --git a/queue-4.4/route-check-and-remove-route-cache-when-we-get-route.patch b/queue-4.4/route-check-and-remove-route-cache-when-we-get-route.patch new file mode 100644 index 00000000000..9496118b429 --- /dev/null +++ b/queue-4.4/route-check-and-remove-route-cache-when-we-get-route.patch @@ -0,0 +1,161 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Xin Long +Date: Thu, 18 Feb 2016 21:21:19 +0800 +Subject: route: check and remove route cache when we get route + +From: Xin Long + +[ Upstream commit deed49df7390d5239024199e249190328f1651e7 ] + +Since the gc of ipv4 route was removed, the route cached would has +no chance to be removed, and even it has been timeout, it still could +be used, cause no code to check it's expires. + +Fix this issue by checking and removing route cache when we get route. + +Signed-off-by: Xin Long +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip_fib.h | 1 + net/ipv4/route.c | 77 +++++++++++++++++++++++++++++++++++++++++---------- + 2 files changed, 64 insertions(+), 14 deletions(-) + +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -61,6 +61,7 @@ struct fib_nh_exception { + struct rtable __rcu *fnhe_rth_input; + struct rtable __rcu *fnhe_rth_output; + unsigned long fnhe_stamp; ++ struct rcu_head rcu; + }; + + struct fnhe_hash_bucket { +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_most + static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; + static int ip_rt_min_advmss __read_mostly = 256; + ++static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; + /* + * Interface to generic destination cache. + */ +@@ -755,7 +756,7 @@ static void __ip_do_redirect(struct rtab + struct fib_nh *nh = &FIB_RES_NH(res); + + update_or_create_fnhe(nh, fl4->daddr, new_gw, +- 0, 0); ++ 0, jiffies + ip_rt_gc_timeout); + } + if (kill_route) + rt->dst.obsolete = DST_OBSOLETE_KILL; +@@ -1556,6 +1557,36 @@ static void ip_handle_martian_source(str + #endif + } + ++static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) ++{ ++ struct fnhe_hash_bucket *hash; ++ struct fib_nh_exception *fnhe, __rcu **fnhe_p; ++ u32 hval = fnhe_hashfun(daddr); ++ ++ spin_lock_bh(&fnhe_lock); ++ ++ hash = rcu_dereference_protected(nh->nh_exceptions, ++ lockdep_is_held(&fnhe_lock)); ++ hash += hval; ++ ++ fnhe_p = &hash->chain; ++ fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); ++ while (fnhe) { ++ if (fnhe->fnhe_daddr == daddr) { ++ rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( ++ fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); ++ fnhe_flush_routes(fnhe); ++ kfree_rcu(fnhe, rcu); ++ break; ++ } ++ fnhe_p = &fnhe->fnhe_next; ++ fnhe = rcu_dereference_protected(fnhe->fnhe_next, ++ lockdep_is_held(&fnhe_lock)); ++ } ++ ++ spin_unlock_bh(&fnhe_lock); ++} ++ + /* called in rcu_read_lock() section */ + static int __mkroute_input(struct sk_buff *skb, + const struct fib_result *res, +@@ -1609,11 +1640,20 @@ static int __mkroute_input(struct sk_buf + + fnhe = find_exception(&FIB_RES_NH(*res), daddr); + if (do_cache) { +- if (fnhe) ++ if (fnhe) { + rth = rcu_dereference(fnhe->fnhe_rth_input); +- else +- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); ++ if (rth && rth->dst.expires && ++ time_after(jiffies, rth->dst.expires)) { ++ ip_del_fnhe(&FIB_RES_NH(*res), daddr); ++ fnhe = NULL; ++ } else { ++ goto rt_cache; ++ } ++ } ++ ++ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + ++rt_cache: + if (rt_cache_valid(rth)) { + skb_dst_set_noref(skb, &rth->dst); + goto out; +@@ -2014,19 +2054,29 @@ static struct rtable *__mkroute_output(c + struct fib_nh *nh = &FIB_RES_NH(*res); + + fnhe = find_exception(nh, fl4->daddr); +- if (fnhe) ++ if (fnhe) { + prth = &fnhe->fnhe_rth_output; +- else { +- if (unlikely(fl4->flowi4_flags & +- FLOWI_FLAG_KNOWN_NH && +- !(nh->nh_gw && +- nh->nh_scope == RT_SCOPE_LINK))) { +- do_cache = false; +- goto add; ++ rth = rcu_dereference(*prth); ++ if (rth && rth->dst.expires && ++ time_after(jiffies, rth->dst.expires)) { ++ ip_del_fnhe(nh, fl4->daddr); ++ fnhe = NULL; ++ } else { ++ goto rt_cache; + } +- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); + } ++ ++ if (unlikely(fl4->flowi4_flags & ++ FLOWI_FLAG_KNOWN_NH && ++ !(nh->nh_gw && ++ nh->nh_scope == RT_SCOPE_LINK))) { ++ do_cache = false; ++ goto add; ++ } ++ prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); + rth = rcu_dereference(*prth); ++ ++rt_cache: + if (rt_cache_valid(rth)) { + dst_hold(&rth->dst); + return rth; +@@ -2569,7 +2619,6 @@ void ip_rt_multicast_event(struct in_dev + } + + #ifdef CONFIG_SYSCTL +-static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; + static int ip_rt_gc_interval __read_mostly = 60 * HZ; + static int ip_rt_gc_min_interval __read_mostly = HZ / 2; + static int ip_rt_gc_elasticity __read_mostly = 8; diff --git a/queue-4.4/rtnl-rtm_getnetconf-fix-wrong-return-value.patch b/queue-4.4/rtnl-rtm_getnetconf-fix-wrong-return-value.patch new file mode 100644 index 00000000000..5282f6dbb50 --- /dev/null +++ b/queue-4.4/rtnl-rtm_getnetconf-fix-wrong-return-value.patch @@ -0,0 +1,43 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Anton Protopopov +Date: Tue, 16 Feb 2016 21:43:16 -0500 +Subject: rtnl: RTM_GETNETCONF: fix wrong return value + +From: Anton Protopopov + +[ Upstream commit a97eb33ff225f34a8124774b3373fd244f0e83ce ] + +An error response from a RTM_GETNETCONF request can return the positive +error value EINVAL in the struct nlmsgerr that can mislead userspace. + +Signed-off-by: Anton Protopopov +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/devinet.c | 2 +- + net/ipv6/addrconf.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/devinet.c ++++ b/net/ipv4/devinet.c +@@ -1847,7 +1847,7 @@ static int inet_netconf_get_devconf(stru + if (err < 0) + goto errout; + +- err = EINVAL; ++ err = -EINVAL; + if (!tb[NETCONFA_IFINDEX]) + goto errout; + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -583,7 +583,7 @@ static int inet6_netconf_get_devconf(str + if (err < 0) + goto errout; + +- err = EINVAL; ++ err = -EINVAL; + if (!tb[NETCONFA_IFINDEX]) + goto errout; + diff --git a/queue-4.4/sctp-allow-setting-sctp_sack_immediately-by-the-application.patch b/queue-4.4/sctp-allow-setting-sctp_sack_immediately-by-the-application.patch new file mode 100644 index 00000000000..a2060d9f344 --- /dev/null +++ b/queue-4.4/sctp-allow-setting-sctp_sack_immediately-by-the-application.patch @@ -0,0 +1,46 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Marcelo Ricardo Leitner +Date: Fri, 22 Jan 2016 18:29:49 -0200 +Subject: sctp: allow setting SCTP_SACK_IMMEDIATELY by the application + +From: Marcelo Ricardo Leitner + +[ Upstream commit 27f7ed2b11d42ab6d796e96533c2076ec220affc ] + +This patch extends commit b93d6471748d ("sctp: implement the sender side +for SACK-IMMEDIATELY extension") as it didn't white list +SCTP_SACK_IMMEDIATELY on sctp_msghdr_parse(), causing it to be +understood as an invalid flag and returning -EINVAL to the application. + +Note that the actual handling of the flag is already there in +sctp_datamsg_from_user(). + +https://tools.ietf.org/html/rfc7053#section-7 + +Fixes: b93d6471748d ("sctp: implement the sender side for SACK-IMMEDIATELY extension") +Signed-off-by: Marcelo Ricardo Leitner +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -6640,6 +6640,7 @@ static int sctp_msghdr_parse(const struc + + if (cmsgs->srinfo->sinfo_flags & + ~(SCTP_UNORDERED | SCTP_ADDR_OVER | ++ SCTP_SACK_IMMEDIATELY | + SCTP_ABORT | SCTP_EOF)) + return -EINVAL; + break; +@@ -6663,6 +6664,7 @@ static int sctp_msghdr_parse(const struc + + if (cmsgs->sinfo->snd_flags & + ~(SCTP_UNORDERED | SCTP_ADDR_OVER | ++ SCTP_SACK_IMMEDIATELY | + SCTP_ABORT | SCTP_EOF)) + return -EINVAL; + break; diff --git a/queue-4.4/sctp-fix-port-hash-table-size-computation.patch b/queue-4.4/sctp-fix-port-hash-table-size-computation.patch new file mode 100644 index 00000000000..7d6f3c212ae --- /dev/null +++ b/queue-4.4/sctp-fix-port-hash-table-size-computation.patch @@ -0,0 +1,131 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Neil Horman +Date: Thu, 18 Feb 2016 16:10:57 -0500 +Subject: sctp: Fix port hash table size computation + +From: Neil Horman + +[ Upstream commit d9749fb5942f51555dc9ce1ac0dbb1806960a975 ] + +Dmitry Vyukov noted recently that the sctp_port_hashtable had an error in +its size computation, observing that the current method never guaranteed +that the hashsize (measured in number of entries) would be a power of two, +which the input hash function for that table requires. The root cause of +the problem is that two values need to be computed (one, the allocation +order of the storage requries, as passed to __get_free_pages, and two the +number of entries for the hash table). Both need to be ^2, but for +different reasons, and the existing code is simply computing one order +value, and using it as the basis for both, which is wrong (i.e. it assumes +that ((1< +Reported-by: Dmitry Vyukov +CC: Dmitry Vyukov +CC: Vladislav Yasevich +CC: "David S. Miller" +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/protocol.c | 43 ++++++++++++++++++++++++++++++++++++------- + 1 file changed, 36 insertions(+), 7 deletions(-) + +--- a/net/sctp/protocol.c ++++ b/net/sctp/protocol.c +@@ -60,6 +60,8 @@ + #include + #include + ++#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024) ++ + /* Global data structures. */ + struct sctp_globals sctp_globals __read_mostly; + +@@ -1352,6 +1354,8 @@ static __init int sctp_init(void) + unsigned long limit; + int max_share; + int order; ++ int num_entries; ++ int max_entry_order; + + sock_skb_cb_check_size(sizeof(struct sctp_ulpevent)); + +@@ -1404,14 +1408,24 @@ static __init int sctp_init(void) + + /* Size and allocate the association hash table. + * The methodology is similar to that of the tcp hash tables. ++ * Though not identical. Start by getting a goal size + */ + if (totalram_pages >= (128 * 1024)) + goal = totalram_pages >> (22 - PAGE_SHIFT); + else + goal = totalram_pages >> (24 - PAGE_SHIFT); + +- for (order = 0; (1UL << order) < goal; order++) +- ; ++ /* Then compute the page order for said goal */ ++ order = get_order(goal); ++ ++ /* Now compute the required page order for the maximum sized table we ++ * want to create ++ */ ++ max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES * ++ sizeof(struct sctp_bind_hashbucket)); ++ ++ /* Limit the page order by that maximum hash table size */ ++ order = min(order, max_entry_order); + + do { + sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE / +@@ -1445,20 +1459,35 @@ static __init int sctp_init(void) + INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain); + } + +- /* Allocate and initialize the SCTP port hash table. */ ++ /* Allocate and initialize the SCTP port hash table. ++ * Note that order is initalized to start at the max sized ++ * table we want to support. If we can't get that many pages ++ * reduce the order and try again ++ */ + do { +- sctp_port_hashsize = (1UL << order) * PAGE_SIZE / +- sizeof(struct sctp_bind_hashbucket); +- if ((sctp_port_hashsize > (64 * 1024)) && order > 0) +- continue; + sctp_port_hashtable = (struct sctp_bind_hashbucket *) + __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); + } while (!sctp_port_hashtable && --order > 0); ++ + if (!sctp_port_hashtable) { + pr_err("Failed bind hash alloc\n"); + status = -ENOMEM; + goto err_bhash_alloc; + } ++ ++ /* Now compute the number of entries that will fit in the ++ * port hash space we allocated ++ */ ++ num_entries = (1UL << order) * PAGE_SIZE / ++ sizeof(struct sctp_bind_hashbucket); ++ ++ /* And finish by rounding it down to the nearest power of two ++ * this wastes some memory of course, but its needed because ++ * the hash function operates based on the assumption that ++ * that the number of entries is a power of two ++ */ ++ sctp_port_hashsize = rounddown_pow_of_two(num_entries); ++ + for (i = 0; i < sctp_port_hashsize; i++) { + spin_lock_init(&sctp_port_hashtable[i].lock); + INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); diff --git a/queue-4.4/sctp-translate-network-order-to-host-order-when-users-get-a-hmacid.patch b/queue-4.4/sctp-translate-network-order-to-host-order-when-users-get-a-hmacid.patch new file mode 100644 index 00000000000..cd3ac0d5255 --- /dev/null +++ b/queue-4.4/sctp-translate-network-order-to-host-order-when-users-get-a-hmacid.patch @@ -0,0 +1,50 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Xin Long +Date: Wed, 3 Feb 2016 23:33:30 +0800 +Subject: sctp: translate network order to host order when users get a hmacid + +From: Xin Long + +[ Upstream commit 7a84bd46647ff181eb2659fdc99590e6f16e501d ] + +Commit ed5a377d87dc ("sctp: translate host order to network order when +setting a hmacid") corrected the hmacid byte-order when setting a hmacid. +but the same issue also exists on getting a hmacid. + +We fix it by changing hmacids to host order when users get them with +getsockopt. + +Fixes: Commit ed5a377d87dc ("sctp: translate host order to network order when setting a hmacid") +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -5542,6 +5542,7 @@ static int sctp_getsockopt_hmac_ident(st + struct sctp_hmac_algo_param *hmacs; + __u16 data_len = 0; + u32 num_idents; ++ int i; + + if (!ep->auth_enable) + return -EACCES; +@@ -5559,8 +5560,12 @@ static int sctp_getsockopt_hmac_ident(st + return -EFAULT; + if (put_user(num_idents, &p->shmac_num_idents)) + return -EFAULT; +- if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len)) +- return -EFAULT; ++ for (i = 0; i < num_idents; i++) { ++ __u16 hmacid = ntohs(hmacs->hmac_ids[i]); ++ ++ if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16))) ++ return -EFAULT; ++ } + return 0; + } + diff --git a/queue-4.4/series b/queue-4.4/series new file mode 100644 index 00000000000..8cd67313565 --- /dev/null +++ b/queue-4.4/series @@ -0,0 +1,47 @@ +af_iucv-validate-socket-address-length-in-iucv_sock_bind.patch +gro-make-gro-aware-of-lightweight-tunnels.patch +net-dp83640-fix-tx-timestamp-overflow-handling.patch +tunnels-allow-ipv6-udp-checksums-to-be-correctly-controlled.patch +lwt-fix-rx-checksum-setting-for-lwt-devices-tunneling-over-ipv6.patch +tcp-fix-null-deref-in-tcp_v4_send_ack.patch +af_unix-fix-struct-pid-memory-leak.patch +pptp-fix-illegal-memory-access-caused-by-multiple-bind-s.patch +sctp-allow-setting-sctp_sack_immediately-by-the-application.patch +net-dsa-fix-mv88e6xxx-switches.patch +tipc-fix-connection-abort-during-subscription-cancel.patch +inet-frag-always-orphan-skbs-inside-ip_defrag.patch +switchdev-require-rtnl-mutex-to-be-held-when-sending-fdb-notifications.patch +tcp-beware-of-alignments-in-tcp_get_info.patch +ipv6-enforce-flowi6_oif-usage-in-ip6_dst_lookup_tail.patch +ipv6-udp-use-sticky-pktinfo-egress-ifindex-on-connect.patch +ipv6-addrconf-fix-recursive-spin-lock-call.patch +ipv6-fix-a-lockdep-splat.patch +unix-correctly-track-in-flight-fds-in-sending-process-user_struct.patch +tcp-do-not-drop-syn_recv-on-all-icmp-reports.patch +net-add-sysctl_max_skb_frags.patch +tg3-fix-for-tg3-transmit-queue-0-timed-out-when-too-many-gso_segs.patch +enic-increment-devcmd2-result-ring-in-case-of-timeout.patch +sctp-translate-network-order-to-host-order-when-users-get-a-hmacid.patch +net-copy-inner-l3-and-l4-headers-as-unaligned-on-gre-teb.patch +flow_dissector-fix-unaligned-access-in-__skb_flow_dissector-when-used-by-eth_get_headlen.patch +bpf-fix-branch-offset-adjustment-on-backjumps-after-patching-ctx-expansion.patch +bonding-fix-arp-monitor-validation.patch +ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch +af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch +af_unix-guard-against-other-sk-in-unix_dgram_sendmsg.patch +tipc-fix-premature-addition-of-node-to-lookup-table.patch +tcp-md5-release-request-socket-instead-of-listener.patch +qmi_wwan-add-4g-lte-usb-modem-u901.patch +net-mlx4_en-count-hw-buffer-overrun-only-once.patch +net-mlx4_en-choose-time-stamping-shift-value-according-to-hw-frequency.patch +net-mlx4_en-avoid-changing-dev-features-directly-in-run-time.patch +l2tp-fix-error-creating-l2tp-tunnels.patch +pppoe-fix-reference-counting-in-pppoe-proxy.patch +net_sched-fix-reclassification-needs-to-consider-ether-protocol-changes.patch +route-check-and-remove-route-cache-when-we-get-route.patch +tcp-dccp-fix-another-race-at-listener-dismantle.patch +iff_no_queue-fix-for-drivers-not-calling-ether_setup.patch +rtnl-rtm_getnetconf-fix-wrong-return-value.patch +tipc-unlock-in-error-path.patch +unix_diag-fix-incorrect-sign-extension-in-unix_lookup_by_ino.patch +sctp-fix-port-hash-table-size-computation.patch diff --git a/queue-4.4/switchdev-require-rtnl-mutex-to-be-held-when-sending-fdb-notifications.patch b/queue-4.4/switchdev-require-rtnl-mutex-to-be-held-when-sending-fdb-notifications.patch new file mode 100644 index 00000000000..cd859bb1b13 --- /dev/null +++ b/queue-4.4/switchdev-require-rtnl-mutex-to-be-held-when-sending-fdb-notifications.patch @@ -0,0 +1,167 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Ido Schimmel +Date: Wed, 27 Jan 2016 15:16:43 +0100 +Subject: switchdev: Require RTNL mutex to be held when sending FDB notifications + +From: Ido Schimmel + +[ Upstream commit 4f2c6ae5c64c353fb1b0425e4747e5603feadba1 ] + +When switchdev drivers process FDB notifications from the underlying +device they resolve the netdev to which the entry points to and notify +the bridge using the switchdev notifier. + +However, since the RTNL mutex is not held there is nothing preventing +the netdev from disappearing in the middle, which will cause +br_switchdev_event() to dereference a non-existing netdev. + +Make switchdev drivers hold the lock at the beginning of the +notification processing session and release it once it ends, after +notifying the bridge. + +Also, remove switchdev_mutex and fdb_lock, as they are no longer needed +when RTNL mutex is held. + +Fixes: 03bf0c281234 ("switchdev: introduce switchdev notifier") +Signed-off-by: Ido Schimmel +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 3 +++ + drivers/net/ethernet/rocker/rocker.c | 2 ++ + net/bridge/br.c | 3 +-- + net/switchdev/switchdev.c | 15 ++++++++------- + 4 files changed, 14 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +@@ -45,6 +45,7 @@ + #include + #include + #include ++#include + #include + + #include "spectrum.h" +@@ -812,6 +813,7 @@ static void mlxsw_sp_fdb_notify_work(str + + mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work); + ++ rtnl_lock(); + do { + mlxsw_reg_sfn_pack(sfn_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); +@@ -824,6 +826,7 @@ static void mlxsw_sp_fdb_notify_work(str + mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); + + } while (num_rec); ++ rtnl_unlock(); + + kfree(sfn_pl); + mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); +--- a/drivers/net/ethernet/rocker/rocker.c ++++ b/drivers/net/ethernet/rocker/rocker.c +@@ -3531,12 +3531,14 @@ static void rocker_port_fdb_learn_work(s + info.addr = lw->addr; + info.vid = lw->vid; + ++ rtnl_lock(); + if (learned && removing) + call_switchdev_notifiers(SWITCHDEV_FDB_DEL, + lw->rocker_port->dev, &info.info); + else if (learned && !removing) + call_switchdev_notifiers(SWITCHDEV_FDB_ADD, + lw->rocker_port->dev, &info.info); ++ rtnl_unlock(); + + rocker_port_kfree(lw->trans, work); + } +--- a/net/bridge/br.c ++++ b/net/bridge/br.c +@@ -121,6 +121,7 @@ static struct notifier_block br_device_n + .notifier_call = br_device_event + }; + ++/* called with RTNL */ + static int br_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) + { +@@ -130,7 +131,6 @@ static int br_switchdev_event(struct not + struct switchdev_notifier_fdb_info *fdb_info; + int err = NOTIFY_DONE; + +- rtnl_lock(); + p = br_port_get_rtnl(dev); + if (!p) + goto out; +@@ -155,7 +155,6 @@ static int br_switchdev_event(struct not + } + + out: +- rtnl_unlock(); + return err; + } + +--- a/net/switchdev/switchdev.c ++++ b/net/switchdev/switchdev.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -565,7 +566,6 @@ int switchdev_port_obj_dump(struct net_d + } + EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); + +-static DEFINE_MUTEX(switchdev_mutex); + static RAW_NOTIFIER_HEAD(switchdev_notif_chain); + + /** +@@ -580,9 +580,9 @@ int register_switchdev_notifier(struct n + { + int err; + +- mutex_lock(&switchdev_mutex); ++ rtnl_lock(); + err = raw_notifier_chain_register(&switchdev_notif_chain, nb); +- mutex_unlock(&switchdev_mutex); ++ rtnl_unlock(); + return err; + } + EXPORT_SYMBOL_GPL(register_switchdev_notifier); +@@ -598,9 +598,9 @@ int unregister_switchdev_notifier(struct + { + int err; + +- mutex_lock(&switchdev_mutex); ++ rtnl_lock(); + err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); +- mutex_unlock(&switchdev_mutex); ++ rtnl_unlock(); + return err; + } + EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); +@@ -614,16 +614,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_n + * Call all network notifier blocks. This should be called by driver + * when it needs to propagate hardware event. + * Return values are same as for atomic_notifier_call_chain(). ++ * rtnl_lock must be held. + */ + int call_switchdev_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info) + { + int err; + ++ ASSERT_RTNL(); ++ + info->dev = dev; +- mutex_lock(&switchdev_mutex); + err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); +- mutex_unlock(&switchdev_mutex); + return err; + } + EXPORT_SYMBOL_GPL(call_switchdev_notifiers); diff --git a/queue-4.4/tcp-beware-of-alignments-in-tcp_get_info.patch b/queue-4.4/tcp-beware-of-alignments-in-tcp_get_info.patch new file mode 100644 index 00000000000..d2fc6f46ed8 --- /dev/null +++ b/queue-4.4/tcp-beware-of-alignments-in-tcp_get_info.patch @@ -0,0 +1,67 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Eric Dumazet +Date: Wed, 27 Jan 2016 10:52:43 -0800 +Subject: tcp: beware of alignments in tcp_get_info() + +From: Eric Dumazet + +[ Upstream commit ff5d749772018602c47509bdc0093ff72acd82ec ] + +With some combinations of user provided flags in netlink command, +it is possible to call tcp_get_info() with a buffer that is not 8-bytes +aligned. + +It does matter on some arches, so we need to use put_unaligned() to +store the u64 fields. + +Current iproute2 package does not trigger this particular issue. + +Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info") +Fixes: 977cb0ecf82e ("tcp: add pacing_rate information into tcp_info") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -279,6 +279,7 @@ + + #include + #include ++#include + #include + + int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; +@@ -2637,6 +2638,7 @@ void tcp_get_info(struct sock *sk, struc + const struct inet_connection_sock *icsk = inet_csk(sk); + u32 now = tcp_time_stamp; + unsigned int start; ++ u64 rate64; + u32 rate; + + memset(info, 0, sizeof(*info)); +@@ -2702,15 +2704,17 @@ void tcp_get_info(struct sock *sk, struc + info->tcpi_total_retrans = tp->total_retrans; + + rate = READ_ONCE(sk->sk_pacing_rate); +- info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; ++ rate64 = rate != ~0U ? rate : ~0ULL; ++ put_unaligned(rate64, &info->tcpi_pacing_rate); + + rate = READ_ONCE(sk->sk_max_pacing_rate); +- info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; ++ rate64 = rate != ~0U ? rate : ~0ULL; ++ put_unaligned(rate64, &info->tcpi_max_pacing_rate); + + do { + start = u64_stats_fetch_begin_irq(&tp->syncp); +- info->tcpi_bytes_acked = tp->bytes_acked; +- info->tcpi_bytes_received = tp->bytes_received; ++ put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); ++ put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); + } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); + info->tcpi_segs_out = tp->segs_out; + info->tcpi_segs_in = tp->segs_in; diff --git a/queue-4.4/tcp-dccp-fix-another-race-at-listener-dismantle.patch b/queue-4.4/tcp-dccp-fix-another-race-at-listener-dismantle.patch new file mode 100644 index 00000000000..a1a4a100e29 --- /dev/null +++ b/queue-4.4/tcp-dccp-fix-another-race-at-listener-dismantle.patch @@ -0,0 +1,263 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Eric Dumazet +Date: Thu, 18 Feb 2016 05:39:18 -0800 +Subject: tcp/dccp: fix another race at listener dismantle + +From: Eric Dumazet + +[ Upstream commit 7716682cc58e305e22207d5bb315f26af6b1e243 ] + +Ilya reported following lockdep splat: + +kernel: ========================= +kernel: [ BUG: held lock freed! ] +kernel: 4.5.0-rc1-ceph-00026-g5e0a311 #1 Not tainted +kernel: ------------------------- +kernel: swapper/5/0 is freeing memory +ffff880035c9d200-ffff880035c9dbff, with a lock still held there! +kernel: (&(&queue->rskq_lock)->rlock){+.-...}, at: +[] inet_csk_reqsk_queue_add+0x28/0xa0 +kernel: 4 locks held by swapper/5/0: +kernel: #0: (rcu_read_lock){......}, at: [] +netif_receive_skb_internal+0x4b/0x1f0 +kernel: #1: (rcu_read_lock){......}, at: [] +ip_local_deliver_finish+0x3f/0x380 +kernel: #2: (slock-AF_INET){+.-...}, at: [] +sk_clone_lock+0x19b/0x440 +kernel: #3: (&(&queue->rskq_lock)->rlock){+.-...}, at: +[] inet_csk_reqsk_queue_add+0x28/0xa0 + +To properly fix this issue, inet_csk_reqsk_queue_add() needs +to return to its callers if the child as been queued +into accept queue. + +We also need to make sure listener is still there before +calling sk->sk_data_ready(), by holding a reference on it, +since the reference carried by the child can disappear as +soon as the child is put on accept queue. + +Reported-by: Ilya Dryomov +Fixes: ebb516af60e1 ("tcp/dccp: fix race at listener dismantle phase") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_connection_sock.h | 5 +++-- + net/dccp/ipv4.c | 14 +++++++------- + net/dccp/ipv6.c | 14 +++++++------- + net/ipv4/inet_connection_sock.c | 14 +++++++------- + net/ipv4/tcp_ipv4.c | 14 +++++++------- + net/ipv6/tcp_ipv6.c | 14 +++++++------- + 6 files changed, 38 insertions(+), 37 deletions(-) + +--- a/include/net/inet_connection_sock.h ++++ b/include/net/inet_connection_sock.h +@@ -270,8 +270,9 @@ struct dst_entry *inet_csk_route_child_s + struct sock *newsk, + const struct request_sock *req); + +-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, +- struct sock *child); ++struct sock *inet_csk_reqsk_queue_add(struct sock *sk, ++ struct request_sock *req, ++ struct sock *child); + void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, + unsigned long timeout); + struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -824,26 +824,26 @@ lookup: + + if (sk->sk_state == DCCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); +- struct sock *nsk = NULL; ++ struct sock *nsk; + + sk = req->rsk_listener; +- if (likely(sk->sk_state == DCCP_LISTEN)) { +- nsk = dccp_check_req(sk, skb, req); +- } else { ++ if (unlikely(sk->sk_state != DCCP_LISTEN)) { + inet_csk_reqsk_queue_drop_and_put(sk, req); + goto lookup; + } ++ sock_hold(sk); ++ nsk = dccp_check_req(sk, skb, req); + if (!nsk) { + reqsk_put(req); +- goto discard_it; ++ goto discard_and_relse; + } + if (nsk == sk) { +- sock_hold(sk); + reqsk_put(req); + } else if (dccp_child_process(sk, nsk, skb)) { + dccp_v4_ctl_send_reset(sk, skb); +- goto discard_it; ++ goto discard_and_relse; + } else { ++ sock_put(sk); + return 0; + } + } +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -691,26 +691,26 @@ lookup: + + if (sk->sk_state == DCCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); +- struct sock *nsk = NULL; ++ struct sock *nsk; + + sk = req->rsk_listener; +- if (likely(sk->sk_state == DCCP_LISTEN)) { +- nsk = dccp_check_req(sk, skb, req); +- } else { ++ if (unlikely(sk->sk_state != DCCP_LISTEN)) { + inet_csk_reqsk_queue_drop_and_put(sk, req); + goto lookup; + } ++ sock_hold(sk); ++ nsk = dccp_check_req(sk, skb, req); + if (!nsk) { + reqsk_put(req); +- goto discard_it; ++ goto discard_and_relse; + } + if (nsk == sk) { +- sock_hold(sk); + reqsk_put(req); + } else if (dccp_child_process(sk, nsk, skb)) { + dccp_v6_ctl_send_reset(sk, skb); +- goto discard_it; ++ goto discard_and_relse; + } else { ++ sock_put(sk); + return 0; + } + } +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -789,14 +789,16 @@ static void inet_child_forget(struct soc + reqsk_put(req); + } + +-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, +- struct sock *child) ++struct sock *inet_csk_reqsk_queue_add(struct sock *sk, ++ struct request_sock *req, ++ struct sock *child) + { + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + + spin_lock(&queue->rskq_lock); + if (unlikely(sk->sk_state != TCP_LISTEN)) { + inet_child_forget(sk, req, child); ++ child = NULL; + } else { + req->sk = child; + req->dl_next = NULL; +@@ -808,6 +810,7 @@ void inet_csk_reqsk_queue_add(struct soc + sk_acceptq_added(sk); + } + spin_unlock(&queue->rskq_lock); ++ return child; + } + EXPORT_SYMBOL(inet_csk_reqsk_queue_add); + +@@ -817,11 +820,8 @@ struct sock *inet_csk_complete_hashdance + if (own_req) { + inet_csk_reqsk_queue_drop(sk, req); + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); +- inet_csk_reqsk_queue_add(sk, req, child); +- /* Warning: caller must not call reqsk_put(req); +- * child stole last reference on it. +- */ +- return child; ++ if (inet_csk_reqsk_queue_add(sk, req, child)) ++ return child; + } + /* Too bad, another child took ownership of the request, undo. */ + bh_unlock_sock(child); +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1594,30 +1594,30 @@ process: + + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); +- struct sock *nsk = NULL; ++ struct sock *nsk; + + sk = req->rsk_listener; + if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { + reqsk_put(req); + goto discard_it; + } +- if (likely(sk->sk_state == TCP_LISTEN)) { +- nsk = tcp_check_req(sk, skb, req, false); +- } else { ++ if (unlikely(sk->sk_state != TCP_LISTEN)) { + inet_csk_reqsk_queue_drop_and_put(sk, req); + goto lookup; + } ++ sock_hold(sk); ++ nsk = tcp_check_req(sk, skb, req, false); + if (!nsk) { + reqsk_put(req); +- goto discard_it; ++ goto discard_and_relse; + } + if (nsk == sk) { +- sock_hold(sk); + reqsk_put(req); + } else if (tcp_child_process(sk, nsk, skb)) { + tcp_v4_send_reset(nsk, skb); +- goto discard_it; ++ goto discard_and_relse; + } else { ++ sock_put(sk); + return 0; + } + } +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1388,7 +1388,7 @@ process: + + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); +- struct sock *nsk = NULL; ++ struct sock *nsk; + + sk = req->rsk_listener; + tcp_v6_fill_cb(skb, hdr, th); +@@ -1396,24 +1396,24 @@ process: + reqsk_put(req); + goto discard_it; + } +- if (likely(sk->sk_state == TCP_LISTEN)) { +- nsk = tcp_check_req(sk, skb, req, false); +- } else { ++ if (unlikely(sk->sk_state != TCP_LISTEN)) { + inet_csk_reqsk_queue_drop_and_put(sk, req); + goto lookup; + } ++ sock_hold(sk); ++ nsk = tcp_check_req(sk, skb, req, false); + if (!nsk) { + reqsk_put(req); +- goto discard_it; ++ goto discard_and_relse; + } + if (nsk == sk) { +- sock_hold(sk); + reqsk_put(req); + tcp_v6_restore_cb(skb); + } else if (tcp_child_process(sk, nsk, skb)) { + tcp_v6_send_reset(nsk, skb); +- goto discard_it; ++ goto discard_and_relse; + } else { ++ sock_put(sk); + return 0; + } + } diff --git a/queue-4.4/tcp-do-not-drop-syn_recv-on-all-icmp-reports.patch b/queue-4.4/tcp-do-not-drop-syn_recv-on-all-icmp-reports.patch new file mode 100644 index 00000000000..55a018a5aba --- /dev/null +++ b/queue-4.4/tcp-do-not-drop-syn_recv-on-all-icmp-reports.patch @@ -0,0 +1,105 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Eric Dumazet +Date: Tue, 2 Feb 2016 19:31:12 -0800 +Subject: tcp: do not drop syn_recv on all icmp reports + +From: Eric Dumazet + +[ Upstream commit 9cf7490360bf2c46a16b7525f899e4970c5fc144 ] + +Petr Novopashenniy reported that ICMP redirects on SYN_RECV sockets +were leading to RST. + +This is of course incorrect. + +A specific list of ICMP messages should be able to drop a SYN_RECV. + +For instance, a REDIRECT on SYN_RECV shall be ignored, as we do +not hold a dst per SYN_RECV pseudo request. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111751 +Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") +Reported-by: Petr Novopashenniy +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 2 +- + net/ipv4/tcp_ipv4.c | 11 ++++++++--- + net/ipv6/tcp_ipv6.c | 5 +++-- + 3 files changed, 12 insertions(+), 6 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -449,7 +449,7 @@ const u8 *tcp_parse_md5sig_option(const + + void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); + void tcp_v4_mtu_reduced(struct sock *sk); +-void tcp_req_err(struct sock *sk, u32 seq); ++void tcp_req_err(struct sock *sk, u32 seq, bool abort); + int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); + struct sock *tcp_create_openreq_child(const struct sock *sk, + struct request_sock *req, +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -312,7 +312,7 @@ static void do_redirect(struct sk_buff * + + + /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ +-void tcp_req_err(struct sock *sk, u32 seq) ++void tcp_req_err(struct sock *sk, u32 seq, bool abort) + { + struct request_sock *req = inet_reqsk(sk); + struct net *net = sock_net(sk); +@@ -324,7 +324,7 @@ void tcp_req_err(struct sock *sk, u32 se + + if (seq != tcp_rsk(req)->snt_isn) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); +- } else { ++ } else if (abort) { + /* + * Still in SYN_RECV, just remove it silently. + * There is no good way to pass the error to the newly +@@ -384,7 +384,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb + } + seq = ntohl(th->seq); + if (sk->sk_state == TCP_NEW_SYN_RECV) +- return tcp_req_err(sk, seq); ++ return tcp_req_err(sk, seq, ++ type == ICMP_PARAMETERPROB || ++ type == ICMP_TIME_EXCEEDED || ++ (type == ICMP_DEST_UNREACH && ++ (code == ICMP_NET_UNREACH || ++ code == ICMP_HOST_UNREACH))); + + bh_lock_sock(sk); + /* If too many ICMPs get dropped on busy +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -328,6 +328,7 @@ static void tcp_v6_err(struct sk_buff *s + struct tcp_sock *tp; + __u32 seq, snd_una; + struct sock *sk; ++ bool fatal; + int err; + + sk = __inet6_lookup_established(net, &tcp_hashinfo, +@@ -346,8 +347,9 @@ static void tcp_v6_err(struct sk_buff *s + return; + } + seq = ntohl(th->seq); ++ fatal = icmpv6_err_convert(type, code, &err); + if (sk->sk_state == TCP_NEW_SYN_RECV) +- return tcp_req_err(sk, seq); ++ return tcp_req_err(sk, seq, fatal); + + bh_lock_sock(sk); + if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) +@@ -401,7 +403,6 @@ static void tcp_v6_err(struct sk_buff *s + goto out; + } + +- icmpv6_err_convert(type, code, &err); + + /* Might be for an request_sock */ + switch (sk->sk_state) { diff --git a/queue-4.4/tcp-fix-null-deref-in-tcp_v4_send_ack.patch b/queue-4.4/tcp-fix-null-deref-in-tcp_v4_send_ack.patch new file mode 100644 index 00000000000..ebdf2950f11 --- /dev/null +++ b/queue-4.4/tcp-fix-null-deref-in-tcp_v4_send_ack.patch @@ -0,0 +1,95 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Eric Dumazet +Date: Thu, 21 Jan 2016 08:02:54 -0800 +Subject: tcp: fix NULL deref in tcp_v4_send_ack() + +From: Eric Dumazet + +[ Upstream commit e62a123b8ef7c5dc4db2c16383d506860ad21b47 ] + +Neal reported crashes with this stack trace : + + RIP: 0010:[] tcp_v4_send_ack+0x41/0x20f +... + CR2: 0000000000000018 CR3: 000000044005c000 CR4: 00000000001427e0 +... + [] tcp_v4_reqsk_send_ack+0xa5/0xb4 + [] tcp_check_req+0x2ea/0x3e0 + [] tcp_rcv_state_process+0x850/0x2500 + [] tcp_v4_do_rcv+0x141/0x330 + [] sk_backlog_rcv+0x21/0x30 + [] tcp_recvmsg+0x75d/0xf90 + [] inet_recvmsg+0x80/0xa0 + [] sock_aio_read+0xee/0x110 + [] do_sync_read+0x6f/0xa0 + [] SyS_read+0x1e1/0x290 + [] system_call_fastpath+0x16/0x1b + +The problem here is the skb we provide to tcp_v4_send_ack() had to +be parked in the backlog of a new TCP fastopen child because this child +was owned by the user at the time an out of window packet arrived. + +Before queuing a packet, TCP has to set skb->dev to NULL as the device +could disappear before packet is removed from the queue. + +Fix this issue by using the net pointer provided by the socket (being a +timewait or a request socket). + +IPv6 is immune to the bug : tcp_v6_send_response() already gets the net +pointer from the socket if provided. + +Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") +Reported-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Cc: Jerry Chu +Cc: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -705,7 +705,8 @@ release_sk1: + outside socket context is ugly, certainly. What can I do? + */ + +-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ++static void tcp_v4_send_ack(struct net *net, ++ struct sk_buff *skb, u32 seq, u32 ack, + u32 win, u32 tsval, u32 tsecr, int oif, + struct tcp_md5sig_key *key, + int reply_flags, u8 tos) +@@ -720,7 +721,6 @@ static void tcp_v4_send_ack(struct sk_bu + ]; + } rep; + struct ip_reply_arg arg; +- struct net *net = dev_net(skb_dst(skb)->dev); + + memset(&rep.th, 0, sizeof(struct tcphdr)); + memset(&arg, 0, sizeof(arg)); +@@ -782,7 +782,8 @@ static void tcp_v4_timewait_ack(struct s + struct inet_timewait_sock *tw = inet_twsk(sk); + struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + +- tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, ++ tcp_v4_send_ack(sock_net(sk), skb, ++ tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcp_time_stamp + tcptw->tw_ts_offset, + tcptw->tw_ts_recent, +@@ -801,8 +802,10 @@ static void tcp_v4_reqsk_send_ack(const + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ +- tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? +- tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, ++ u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : ++ tcp_sk(sk)->snd_nxt; ++ ++ tcp_v4_send_ack(sock_net(sk), skb, seq, + tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, + tcp_time_stamp, + req->ts_recent, diff --git a/queue-4.4/tcp-md5-release-request-socket-instead-of-listener.patch b/queue-4.4/tcp-md5-release-request-socket-instead-of-listener.patch new file mode 100644 index 00000000000..b916bc6c833 --- /dev/null +++ b/queue-4.4/tcp-md5-release-request-socket-instead-of-listener.patch @@ -0,0 +1,37 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Eric Dumazet +Date: Thu, 11 Feb 2016 22:50:29 -0800 +Subject: tcp: md5: release request socket instead of listener + +From: Eric Dumazet + +[ Upstream commit 729235554d805c63e5e274fcc6a98e71015dd847 ] + +If tcp_v4_inbound_md5_hash() returns an error, we must release +the refcount on the request socket, not on the listener. + +The bug was added for IPv4 only. + +Fixes: 079096f103fac ("tcp/dccp: install syn_recv requests into ehash table") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1597,8 +1597,10 @@ process: + struct sock *nsk = NULL; + + sk = req->rsk_listener; +- if (tcp_v4_inbound_md5_hash(sk, skb)) +- goto discard_and_relse; ++ if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { ++ reqsk_put(req); ++ goto discard_it; ++ } + if (likely(sk->sk_state == TCP_LISTEN)) { + nsk = tcp_check_req(sk, skb, req, false); + } else { diff --git a/queue-4.4/tg3-fix-for-tg3-transmit-queue-0-timed-out-when-too-many-gso_segs.patch b/queue-4.4/tg3-fix-for-tg3-transmit-queue-0-timed-out-when-too-many-gso_segs.patch new file mode 100644 index 00000000000..ba5f07f3ed6 --- /dev/null +++ b/queue-4.4/tg3-fix-for-tg3-transmit-queue-0-timed-out-when-too-many-gso_segs.patch @@ -0,0 +1,86 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Siva Reddy Kallam +Date: Wed, 3 Feb 2016 14:09:38 +0530 +Subject: tg3: Fix for tg3 transmit queue 0 timed out when too many gso_segs + +From: Siva Reddy Kallam + +[ Upstream commit b7d987295c74500b733a0ba07f9a9bcc4074fa83 ] + +tg3_tso_bug() can hit a condition where the entire tx ring is not big +enough to segment the GSO packet. For example, if MSS is very small, +gso_segs can exceed the tx ring size. When we hit the condition, it +will cause tx timeout. + +tg3_tso_bug() is called to handle TSO and DMA hardware bugs. +For TSO bugs, if tg3_tso_bug() cannot succeed, we have to drop the packet. +For DMA bugs, we can still fall back to linearize the SKB and let the +hardware transmit the TSO packet. + +This patch adds a function tg3_tso_bug_gso_check() to check if there +are enough tx descriptors for GSO before calling tg3_tso_bug(). +The caller will then handle the error appropriately - drop or +lineraize the SKB. + +v2: Corrected patch description to avoid confusion. + +Signed-off-by: Siva Reddy Kallam +Signed-off-by: Michael Chan +Acked-by: Prashant Sreedharan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -7833,6 +7833,14 @@ static int tigon3_dma_hwbug_workaround(s + return ret; + } + ++static bool tg3_tso_bug_gso_check(struct tg3_napi *tnapi, struct sk_buff *skb) ++{ ++ /* Check if we will never have enough descriptors, ++ * as gso_segs can be more than current ring size ++ */ ++ return skb_shinfo(skb)->gso_segs < tnapi->tx_pending / 3; ++} ++ + static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *); + + /* Use GSO to workaround all TSO packets that meet HW bug conditions +@@ -7936,14 +7944,19 @@ static netdev_tx_t tg3_start_xmit(struct + * vlan encapsulated. + */ + if (skb->protocol == htons(ETH_P_8021Q) || +- skb->protocol == htons(ETH_P_8021AD)) +- return tg3_tso_bug(tp, tnapi, txq, skb); ++ skb->protocol == htons(ETH_P_8021AD)) { ++ if (tg3_tso_bug_gso_check(tnapi, skb)) ++ return tg3_tso_bug(tp, tnapi, txq, skb); ++ goto drop; ++ } + + if (!skb_is_gso_v6(skb)) { + if (unlikely((ETH_HLEN + hdr_len) > 80) && +- tg3_flag(tp, TSO_BUG)) +- return tg3_tso_bug(tp, tnapi, txq, skb); +- ++ tg3_flag(tp, TSO_BUG)) { ++ if (tg3_tso_bug_gso_check(tnapi, skb)) ++ return tg3_tso_bug(tp, tnapi, txq, skb); ++ goto drop; ++ } + ip_csum = iph->check; + ip_tot_len = iph->tot_len; + iph->check = 0; +@@ -8075,7 +8088,7 @@ static netdev_tx_t tg3_start_xmit(struct + if (would_hit_hwbug) { + tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i); + +- if (mss) { ++ if (mss && tg3_tso_bug_gso_check(tnapi, skb)) { + /* If it's a TSO packet, do GSO instead of + * allocating and copying to a large linear SKB + */ diff --git a/queue-4.4/tipc-fix-connection-abort-during-subscription-cancel.patch b/queue-4.4/tipc-fix-connection-abort-during-subscription-cancel.patch new file mode 100644 index 00000000000..abffa355a90 --- /dev/null +++ b/queue-4.4/tipc-fix-connection-abort-during-subscription-cancel.patch @@ -0,0 +1,57 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Parthasarathy Bhuvaragan +Date: Wed, 27 Jan 2016 11:35:59 +0100 +Subject: tipc: fix connection abort during subscription cancel + +From: Parthasarathy Bhuvaragan + +[ Upstream commit 4d5cfcba2f6ec494d8810b9e3c0a7b06255c8067 ] + +In 'commit 7fe8097cef5f ("tipc: fix nullpointer bug when subscribing +to events")', we terminate the connection if the subscription +creation fails. +In the same commit, the subscription creation result was based on +the value of the subscription pointer (set in the function) instead +of the return code. + +Unfortunately, the same function tipc_subscrp_create() handles +subscription cancel request. For a subscription cancellation request, +the subscription pointer cannot be set. Thus if a subscriber has +several subscriptions and cancels any of them, the connection is +terminated. + +In this commit, we terminate the connection based on the return value +of tipc_subscrp_create(). +Fixes: commit 7fe8097cef5f ("tipc: fix nullpointer bug when subscribing to events") + +Reviewed-by: Jon Maloy +Signed-off-by: Parthasarathy Bhuvaragan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/subscr.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +--- a/net/tipc/subscr.c ++++ b/net/tipc/subscr.c +@@ -289,15 +289,14 @@ static void tipc_subscrb_rcv_cb(struct n + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len) + { +- struct tipc_subscriber *subscriber = usr_data; ++ struct tipc_subscriber *subscrb = usr_data; + struct tipc_subscription *sub = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); + +- tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub); +- if (sub) +- tipc_nametbl_subscribe(sub); +- else +- tipc_conn_terminate(tn->topsrv, subscriber->conid); ++ if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub)) ++ return tipc_conn_terminate(tn->topsrv, subscrb->conid); ++ ++ tipc_nametbl_subscribe(sub); + } + + /* Handle one request to establish a new subscriber */ diff --git a/queue-4.4/tipc-fix-premature-addition-of-node-to-lookup-table.patch b/queue-4.4/tipc-fix-premature-addition-of-node-to-lookup-table.patch new file mode 100644 index 00000000000..63855d7236f --- /dev/null +++ b/queue-4.4/tipc-fix-premature-addition-of-node-to-lookup-table.patch @@ -0,0 +1,59 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Jon Paul Maloy +Date: Wed, 10 Feb 2016 16:14:57 -0500 +Subject: tipc: fix premature addition of node to lookup table + +From: Jon Paul Maloy + +[ Upstream commit d5c91fb72f1652ea3026925240a0998a42ddb16b ] + +In commit 5266698661401a ("tipc: let broadcast packet reception +use new link receive function") we introduced a new per-node +broadcast reception link instance. This link is created at the +moment the node itself is created. Unfortunately, the allocation +is done after the node instance has already been added to the node +lookup hash table. This creates a potential race condition, where +arriving broadcast packets are able to find and access the node +before it has been fully initialized, and before the above mentioned +link has been created. The result is occasional crashes in the function +tipc_bcast_rcv(), which is trying to access the not-yet existing link. + +We fix this by deferring the addition of the node instance until after +it has been fully initialized in the function tipc_node_create(). + +Acked-by: Ying Xue +Signed-off-by: Jon Maloy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/node.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/net/tipc/node.c ++++ b/net/tipc/node.c +@@ -168,12 +168,6 @@ struct tipc_node *tipc_node_create(struc + skb_queue_head_init(&n_ptr->bc_entry.inputq1); + __skb_queue_head_init(&n_ptr->bc_entry.arrvq); + skb_queue_head_init(&n_ptr->bc_entry.inputq2); +- hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); +- list_for_each_entry_rcu(temp_node, &tn->node_list, list) { +- if (n_ptr->addr < temp_node->addr) +- break; +- } +- list_add_tail_rcu(&n_ptr->list, &temp_node->list); + n_ptr->state = SELF_DOWN_PEER_LEAVING; + n_ptr->signature = INVALID_NODE_SIG; + n_ptr->active_links[0] = INVALID_BEARER_ID; +@@ -193,6 +187,12 @@ struct tipc_node *tipc_node_create(struc + tipc_node_get(n_ptr); + setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); + n_ptr->keepalive_intv = U32_MAX; ++ hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); ++ list_for_each_entry_rcu(temp_node, &tn->node_list, list) { ++ if (n_ptr->addr < temp_node->addr) ++ break; ++ } ++ list_add_tail_rcu(&n_ptr->list, &temp_node->list); + exit: + spin_unlock_bh(&tn->node_list_lock); + return n_ptr; diff --git a/queue-4.4/tipc-unlock-in-error-path.patch b/queue-4.4/tipc-unlock-in-error-path.patch new file mode 100644 index 00000000000..8d6956fd875 --- /dev/null +++ b/queue-4.4/tipc-unlock-in-error-path.patch @@ -0,0 +1,32 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Insu Yun +Date: Wed, 17 Feb 2016 11:47:35 -0500 +Subject: tipc: unlock in error path + +From: Insu Yun + +[ Upstream commit b53ce3e7d407aa4196877a48b8601181162ab158 ] + +tipc_bcast_unlock need to be unlocked in error path. + +Signed-off-by: Insu Yun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/bcast.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/tipc/bcast.c ++++ b/net/tipc/bcast.c +@@ -399,8 +399,10 @@ int tipc_nl_add_bc_link(struct net *net, + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_LINK_GET); +- if (!hdr) ++ if (!hdr) { ++ tipc_bcast_unlock(net); + return -EMSGSIZE; ++ } + + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) diff --git a/queue-4.4/tunnels-allow-ipv6-udp-checksums-to-be-correctly-controlled.patch b/queue-4.4/tunnels-allow-ipv6-udp-checksums-to-be-correctly-controlled.patch new file mode 100644 index 00000000000..7e78df5b307 --- /dev/null +++ b/queue-4.4/tunnels-allow-ipv6-udp-checksums-to-be-correctly-controlled.patch @@ -0,0 +1,71 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Jesse Gross +Date: Wed, 20 Jan 2016 16:22:47 -0800 +Subject: tunnels: Allow IPv6 UDP checksums to be correctly controlled. + +From: Jesse Gross + +[ Upstream commit 35e2d1152b22eae99c961affbe85374bef05a775 ] + +When configuring checksums on UDP tunnels, the flags are different +for IPv4 vs. IPv6 (and reversed). However, when lightweight tunnels +are enabled the flags used are always the IPv4 versions, which are +ignored in the IPv6 code paths. This uses the correct IPv6 flags, so +checksums can be controlled appropriately. + +Fixes: a725e514 ("vxlan: metadata based tunneling for IPv6") +Fixes: abe492b4 ("geneve: UDP checksum configuration via netlink") +Signed-off-by: Jesse Gross +Acked-by: Jiri Benc +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -1984,11 +1984,6 @@ static void vxlan_xmit_one(struct sk_buf + vxlan->cfg.port_max, true); + + if (info) { +- if (info->key.tun_flags & TUNNEL_CSUM) +- flags |= VXLAN_F_UDP_CSUM; +- else +- flags &= ~VXLAN_F_UDP_CSUM; +- + ttl = info->key.ttl; + tos = info->key.tos; + +@@ -2003,8 +1998,15 @@ static void vxlan_xmit_one(struct sk_buf + goto drop; + sk = vxlan->vn4_sock->sock->sk; + +- if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)) +- df = htons(IP_DF); ++ if (info) { ++ if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) ++ df = htons(IP_DF); ++ ++ if (info->key.tun_flags & TUNNEL_CSUM) ++ flags |= VXLAN_F_UDP_CSUM; ++ else ++ flags &= ~VXLAN_F_UDP_CSUM; ++ } + + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0; +@@ -2102,6 +2104,13 @@ static void vxlan_xmit_one(struct sk_buf + return; + } + ++ if (info) { ++ if (info->key.tun_flags & TUNNEL_CSUM) ++ flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX; ++ else ++ flags |= VXLAN_F_UDP_ZERO_CSUM6_TX; ++ } ++ + ttl = ttl ? : ip6_dst_hoplimit(ndst); + err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr, + 0, ttl, src_port, dst_port, htonl(vni << 8), md, diff --git a/queue-4.4/unix-correctly-track-in-flight-fds-in-sending-process-user_struct.patch b/queue-4.4/unix-correctly-track-in-flight-fds-in-sending-process-user_struct.patch new file mode 100644 index 00000000000..904f803d977 --- /dev/null +++ b/queue-4.4/unix-correctly-track-in-flight-fds-in-sending-process-user_struct.patch @@ -0,0 +1,150 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: Hannes Frederic Sowa +Date: Wed, 3 Feb 2016 02:11:03 +0100 +Subject: unix: correctly track in-flight fds in sending process user_struct + +From: Hannes Frederic Sowa + +[ Upstream commit 415e3d3e90ce9e18727e8843ae343eda5a58fad6 ] + +The commit referenced in the Fixes tag incorrectly accounted the number +of in-flight fds over a unix domain socket to the original opener +of the file-descriptor. This allows another process to arbitrary +deplete the original file-openers resource limit for the maximum of +open files. Instead the sending processes and its struct cred should +be credited. + +To do so, we add a reference counted struct user_struct pointer to the +scm_fp_list and use it to account for the number of inflight unix fds. + +Fixes: 712f4aad406bb1 ("unix: properly account for FDs passed over unix sockets") +Reported-by: David Herrmann +Cc: David Herrmann +Cc: Willy Tarreau +Cc: Linus Torvalds +Suggested-by: Linus Torvalds +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/af_unix.h | 4 ++-- + include/net/scm.h | 1 + + net/core/scm.c | 7 +++++++ + net/unix/af_unix.c | 4 ++-- + net/unix/garbage.c | 8 ++++---- + 5 files changed, 16 insertions(+), 8 deletions(-) + +--- a/include/net/af_unix.h ++++ b/include/net/af_unix.h +@@ -6,8 +6,8 @@ + #include + #include + +-void unix_inflight(struct file *fp); +-void unix_notinflight(struct file *fp); ++void unix_inflight(struct user_struct *user, struct file *fp); ++void unix_notinflight(struct user_struct *user, struct file *fp); + void unix_gc(void); + void wait_for_unix_gc(void); + struct sock *unix_get_socket(struct file *filp); +--- a/include/net/scm.h ++++ b/include/net/scm.h +@@ -21,6 +21,7 @@ struct scm_creds { + struct scm_fp_list { + short count; + short max; ++ struct user_struct *user; + struct file *fp[SCM_MAX_FD]; + }; + +--- a/net/core/scm.c ++++ b/net/core/scm.c +@@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *c + *fplp = fpl; + fpl->count = 0; + fpl->max = SCM_MAX_FD; ++ fpl->user = NULL; + } + fpp = &fpl->fp[fpl->count]; + +@@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *c + *fpp++ = file; + fpl->count++; + } ++ ++ if (!fpl->user) ++ fpl->user = get_uid(current_user()); ++ + return num; + } + +@@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *sc + scm->fp = NULL; + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); ++ free_uid(fpl->user); + kfree(fpl); + } + } +@@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct sc + for (i = 0; i < fpl->count; i++) + get_file(fpl->fp[i]); + new_fpl->max = new_fpl->count; ++ new_fpl->user = get_uid(fpl->user); + } + return new_fpl; + } +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_c + UNIXCB(skb).fp = NULL; + + for (i = scm->fp->count-1; i >= 0; i--) +- unix_notinflight(scm->fp->fp[i]); ++ unix_notinflight(scm->fp->user, scm->fp->fp[i]); + } + + static void unix_destruct_scm(struct sk_buff *skb) +@@ -1561,7 +1561,7 @@ static int unix_attach_fds(struct scm_co + return -ENOMEM; + + for (i = scm->fp->count - 1; i >= 0; i--) +- unix_inflight(scm->fp->fp[i]); ++ unix_inflight(scm->fp->user, scm->fp->fp[i]); + return max_level; + } + +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file + * descriptor if it is for an AF_UNIX socket. + */ + +-void unix_inflight(struct file *fp) ++void unix_inflight(struct user_struct *user, struct file *fp) + { + struct sock *s = unix_get_socket(fp); + +@@ -133,11 +133,11 @@ void unix_inflight(struct file *fp) + } + unix_tot_inflight++; + } +- fp->f_cred->user->unix_inflight++; ++ user->unix_inflight++; + spin_unlock(&unix_gc_lock); + } + +-void unix_notinflight(struct file *fp) ++void unix_notinflight(struct user_struct *user, struct file *fp) + { + struct sock *s = unix_get_socket(fp); + +@@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp) + list_del_init(&u->link); + unix_tot_inflight--; + } +- fp->f_cred->user->unix_inflight--; ++ user->unix_inflight--; + spin_unlock(&unix_gc_lock); + } + diff --git a/queue-4.4/unix_diag-fix-incorrect-sign-extension-in-unix_lookup_by_ino.patch b/queue-4.4/unix_diag-fix-incorrect-sign-extension-in-unix_lookup_by_ino.patch new file mode 100644 index 00000000000..f476e236604 --- /dev/null +++ b/queue-4.4/unix_diag-fix-incorrect-sign-extension-in-unix_lookup_by_ino.patch @@ -0,0 +1,39 @@ +From foo@baz Mon Feb 29 14:33:50 PST 2016 +From: "Dmitry V. Levin" +Date: Fri, 19 Feb 2016 04:27:48 +0300 +Subject: unix_diag: fix incorrect sign extension in unix_lookup_by_ino + +From: "Dmitry V. Levin" + +[ Upstream commit b5f0549231ffb025337be5a625b0ff9f52b016f0 ] + +The value passed by unix_diag_get_exact to unix_lookup_by_ino has type +__u32, but unix_lookup_by_ino's argument ino has type int, which is not +a problem yet. +However, when ino is compared with sock_i_ino return value of type +unsigned long, ino is sign extended to signed long, and this results +to incorrect comparison on 64-bit architectures for inode numbers +greater than INT_MAX. + +This bug was found by strace test suite. + +Fixes: 5d3cae8bc39d ("unix_diag: Dumping exact socket core") +Signed-off-by: Dmitry V. Levin +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/diag.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/unix/diag.c ++++ b/net/unix/diag.c +@@ -220,7 +220,7 @@ done: + return skb->len; + } + +-static struct sock *unix_lookup_by_ino(int ino) ++static struct sock *unix_lookup_by_ino(unsigned int ino) + { + int i; + struct sock *sk;