]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 11 Aug 2017 16:21:04 +0000 (09:21 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 11 Aug 2017 16:21:04 +0000 (09:21 -0700)
added patches:
bpf-s390-fix-jit-branch-offset-related-to-ldimm64.patch
igmp-fix-regression-caused-by-igmp-sysctl-namespace-code.patch
net-avoid-skb_warn_bad_offload-false-positives-on-ufo.patch
net-fix-keepalive-code-vs-tcp_fastopen_connect.patch
net-mlx4_en-don-t-set-checksum_complete-on-sctp-packets.patch
net-sched-set-xt_tgchk_param-par.nft_compat-as-0-in-ipt_init_target.patch
packet-fix-tp_reserve-race-in-packet_set_ring.patch
ppp-fix-false-xmit-recursion-detect-with-two-ppp-devices.patch
ppp-fix-xmit-recursion-detection-on-ppp-channels.patch
revert-ipv4-should-use-consistent-conditional-judgement-for-ip-fragment-in-__ip_append_data-and-ip_finish_output.patch
revert-net-account-for-current-skb-length-when-deciding-about-ufo.patch
tcp-avoid-setting-cwnd-to-invalid-ssthresh-after-cwnd-reduction-states.patch
tcp-fastopen-tcp_connect-must-refresh-the-route.patch
udp-consistently-apply-ufo-or-fragmentation.patch

14 files changed:
queue-4.9/bpf-s390-fix-jit-branch-offset-related-to-ldimm64.patch [new file with mode: 0644]
queue-4.9/igmp-fix-regression-caused-by-igmp-sysctl-namespace-code.patch [new file with mode: 0644]
queue-4.9/net-avoid-skb_warn_bad_offload-false-positives-on-ufo.patch [new file with mode: 0644]
queue-4.9/net-fix-keepalive-code-vs-tcp_fastopen_connect.patch [new file with mode: 0644]
queue-4.9/net-mlx4_en-don-t-set-checksum_complete-on-sctp-packets.patch [new file with mode: 0644]
queue-4.9/net-sched-set-xt_tgchk_param-par.nft_compat-as-0-in-ipt_init_target.patch [new file with mode: 0644]
queue-4.9/packet-fix-tp_reserve-race-in-packet_set_ring.patch [new file with mode: 0644]
queue-4.9/ppp-fix-false-xmit-recursion-detect-with-two-ppp-devices.patch [new file with mode: 0644]
queue-4.9/ppp-fix-xmit-recursion-detection-on-ppp-channels.patch [new file with mode: 0644]
queue-4.9/revert-ipv4-should-use-consistent-conditional-judgement-for-ip-fragment-in-__ip_append_data-and-ip_finish_output.patch [new file with mode: 0644]
queue-4.9/revert-net-account-for-current-skb-length-when-deciding-about-ufo.patch [new file with mode: 0644]
queue-4.9/tcp-avoid-setting-cwnd-to-invalid-ssthresh-after-cwnd-reduction-states.patch [new file with mode: 0644]
queue-4.9/tcp-fastopen-tcp_connect-must-refresh-the-route.patch [new file with mode: 0644]
queue-4.9/udp-consistently-apply-ufo-or-fragmentation.patch [new file with mode: 0644]

diff --git a/queue-4.9/bpf-s390-fix-jit-branch-offset-related-to-ldimm64.patch b/queue-4.9/bpf-s390-fix-jit-branch-offset-related-to-ldimm64.patch
new file mode 100644 (file)
index 0000000..fab77d3
--- /dev/null
@@ -0,0 +1,82 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 4 Aug 2017 14:20:54 +0200
+Subject: bpf, s390: fix jit branch offset related to ldimm64
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit b0a0c2566f28e71e5e32121992ac8060cec75510 ]
+
+While testing some other work that required JIT modifications, I
+run into test_bpf causing a hang when JIT enabled on s390. The
+problematic test case was the one from ddc665a4bb4b (bpf, arm64:
+fix jit branch offset related to ldimm64), and turns out that we
+do have a similar issue on s390 as well. In bpf_jit_prog() we
+update next instruction address after returning from bpf_jit_insn()
+with an insn_count. bpf_jit_insn() returns either -1 in case of
+error (e.g. unsupported insn), 1 or 2. The latter is only the
+case for ldimm64 due to spanning 2 insns, however, next address
+is only set to i + 1 not taking actual insn_count into account,
+thus fix is to use insn_count instead of 1. bpf_jit_enable in
+mode 2 provides also disasm on s390:
+
+Before fix:
+
+  000003ff800349b6: a7f40003   brc     15,3ff800349bc                 ; target
+  000003ff800349ba: 0000               unknown
+  000003ff800349bc: e3b0f0700024       stg     %r11,112(%r15)
+  000003ff800349c2: e3e0f0880024       stg     %r14,136(%r15)
+  000003ff800349c8: 0db0               basr    %r11,%r0
+  000003ff800349ca: c0ef00000000       llilf   %r14,0
+  000003ff800349d0: e320b0360004       lg      %r2,54(%r11)
+  000003ff800349d6: e330b03e0004       lg      %r3,62(%r11)
+  000003ff800349dc: ec23ffeda065       clgrj   %r2,%r3,10,3ff800349b6 ; jmp
+  000003ff800349e2: e3e0b0460004       lg      %r14,70(%r11)
+  000003ff800349e8: e3e0b04e0004       lg      %r14,78(%r11)
+  000003ff800349ee: b904002e   lgr     %r2,%r14
+  000003ff800349f2: e3b0f0700004       lg      %r11,112(%r15)
+  000003ff800349f8: e3e0f0880004       lg      %r14,136(%r15)
+  000003ff800349fe: 07fe               bcr     15,%r14
+
+After fix:
+
+  000003ff80ef3db4: a7f40003   brc     15,3ff80ef3dba
+  000003ff80ef3db8: 0000               unknown
+  000003ff80ef3dba: e3b0f0700024       stg     %r11,112(%r15)
+  000003ff80ef3dc0: e3e0f0880024       stg     %r14,136(%r15)
+  000003ff80ef3dc6: 0db0               basr    %r11,%r0
+  000003ff80ef3dc8: c0ef00000000       llilf   %r14,0
+  000003ff80ef3dce: e320b0360004       lg      %r2,54(%r11)
+  000003ff80ef3dd4: e330b03e0004       lg      %r3,62(%r11)
+  000003ff80ef3dda: ec230006a065       clgrj   %r2,%r3,10,3ff80ef3de6 ; jmp
+  000003ff80ef3de0: e3e0b0460004       lg      %r14,70(%r11)
+  000003ff80ef3de6: e3e0b04e0004       lg      %r14,78(%r11)          ; target
+  000003ff80ef3dec: b904002e   lgr     %r2,%r14
+  000003ff80ef3df0: e3b0f0700004       lg      %r11,112(%r15)
+  000003ff80ef3df6: e3e0f0880004       lg      %r14,136(%r15)
+  000003ff80ef3dfc: 07fe               bcr     15,%r14
+
+test_bpf.ko suite runs fine after the fix.
+
+Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/net/bpf_jit_comp.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -1252,7 +1252,8 @@ static int bpf_jit_prog(struct bpf_jit *
+               insn_count = bpf_jit_insn(jit, fp, i);
+               if (insn_count < 0)
+                       return -1;
+-              jit->addrs[i + 1] = jit->prg; /* Next instruction address */
++              /* Next instruction address */
++              jit->addrs[i + insn_count] = jit->prg;
+       }
+       bpf_jit_epilogue(jit);
diff --git a/queue-4.9/igmp-fix-regression-caused-by-igmp-sysctl-namespace-code.patch b/queue-4.9/igmp-fix-regression-caused-by-igmp-sysctl-namespace-code.patch
new file mode 100644 (file)
index 0000000..00598cd
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Nikolay Borisov <nborisov@suse.com>
+Date: Wed, 9 Aug 2017 14:38:04 +0300
+Subject: igmp: Fix regression caused by igmp sysctl namespace code.
+
+From: Nikolay Borisov <nborisov@suse.com>
+
+
+[ Upstream commit 1714020e42b17135032c8606f7185b3fb2ba5d78 ]
+
+Commit dcd87999d415 ("igmp: net: Move igmp namespace init to correct file")
+moved the igmp sysctls initialization from tcp_sk_init to igmp_net_init. This
+function is only called as part of per-namespace initialization, only if
+CONFIG_IP_MULTICAST is defined, otherwise igmp_mc_init() call in ip_init is
+compiled out, casuing the igmp pernet ops to not be registerd and those sysctl
+being left initialized with 0. However, there are certain functions, such as
+ip_mc_join_group which are always compiled and make use of some of those
+sysctls. Let's do a partial revert of the aforementioned commit and move the
+sysctl initialization into inet_init_net, that way they will always have
+sane values.
+
+Fixes: dcd87999d415 ("igmp: net: Move igmp namespace init to correct file")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=196595
+Reported-by: Gerardo Exequiel Pozzi <vmlinuz386@gmail.com>
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/af_inet.c |    7 +++++++
+ net/ipv4/igmp.c    |    6 ------
+ 2 files changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -1693,6 +1693,13 @@ static __net_init int inet_init_net(stru
+       net->ipv4.sysctl_ip_dynaddr = 0;
+       net->ipv4.sysctl_ip_early_demux = 1;
++      /* Some igmp sysctl, whose values are always used */
++      net->ipv4.sysctl_igmp_max_memberships = 20;
++      net->ipv4.sysctl_igmp_max_msf = 10;
++      /* IGMP reports for link-local multicast groups are enabled by default */
++      net->ipv4.sysctl_igmp_llm_reports = 1;
++      net->ipv4.sysctl_igmp_qrv = 2;
++
+       return 0;
+ }
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -2974,12 +2974,6 @@ static int __net_init igmp_net_init(stru
+               goto out_sock;
+       }
+-      /* Sysctl initialization */
+-      net->ipv4.sysctl_igmp_max_memberships = 20;
+-      net->ipv4.sysctl_igmp_max_msf = 10;
+-      /* IGMP reports for link-local multicast groups are enabled by default */
+-      net->ipv4.sysctl_igmp_llm_reports = 1;
+-      net->ipv4.sysctl_igmp_qrv = 2;
+       return 0;
+ out_sock:
diff --git a/queue-4.9/net-avoid-skb_warn_bad_offload-false-positives-on-ufo.patch b/queue-4.9/net-avoid-skb_warn_bad_offload-false-positives-on-ufo.patch
new file mode 100644 (file)
index 0000000..99d8f37
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Tue, 8 Aug 2017 14:22:55 -0400
+Subject: net: avoid skb_warn_bad_offload false positives on UFO
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 8d63bee643f1fb53e472f0e135cae4eb99d62d19 ]
+
+skb_warn_bad_offload triggers a warning when an skb enters the GSO
+stack at __skb_gso_segment that does not have CHECKSUM_PARTIAL
+checksum offload set.
+
+Commit b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise")
+observed that SKB_GSO_DODGY producers can trigger the check and
+that passing those packets through the GSO handlers will fix it
+up. But, the software UFO handler will set ip_summed to
+CHECKSUM_NONE.
+
+When __skb_gso_segment is called from the receive path, this
+triggers the warning again.
+
+Make UFO set CHECKSUM_UNNECESSARY instead of CHECKSUM_NONE. On
+Tx these two are equivalent. On Rx, this better matches the
+skb state (checksum computed), as CHECKSUM_NONE here means no
+checksum computed.
+
+See also this thread for context:
+http://patchwork.ozlabs.org/patch/799015/
+
+Fixes: b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise")
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c         |    2 +-
+ net/ipv4/udp_offload.c |    2 +-
+ net/ipv6/udp_offload.c |    2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2703,7 +2703,7 @@ static inline bool skb_needs_check(struc
+ {
+       if (tx_path)
+               return skb->ip_summed != CHECKSUM_PARTIAL &&
+-                     skb->ip_summed != CHECKSUM_NONE;
++                     skb->ip_summed != CHECKSUM_UNNECESSARY;
+       return skb->ip_summed == CHECKSUM_NONE;
+ }
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -232,7 +232,7 @@ static struct sk_buff *udp4_ufo_fragment
+       if (uh->check == 0)
+               uh->check = CSUM_MANGLED_0;
+-      skb->ip_summed = CHECKSUM_NONE;
++      skb->ip_summed = CHECKSUM_UNNECESSARY;
+       /* If there is no outer header we can fake a checksum offload
+        * due to the fact that we have already done the checksum in
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -72,7 +72,7 @@ static struct sk_buff *udp6_ufo_fragment
+               if (uh->check == 0)
+                       uh->check = CSUM_MANGLED_0;
+-              skb->ip_summed = CHECKSUM_NONE;
++              skb->ip_summed = CHECKSUM_UNNECESSARY;
+               /* If there is no outer header we can fake a checksum offload
+                * due to the fact that we have already done the checksum in
diff --git a/queue-4.9/net-fix-keepalive-code-vs-tcp_fastopen_connect.patch b/queue-4.9/net-fix-keepalive-code-vs-tcp_fastopen_connect.patch
new file mode 100644 (file)
index 0000000..c4f8a55
--- /dev/null
@@ -0,0 +1,89 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 2 Aug 2017 23:10:46 -0700
+Subject: net: fix keepalive code vs TCP_FASTOPEN_CONNECT
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 2dda640040876cd8ae646408b69eea40c24f9ae9 ]
+
+syzkaller was able to trigger a divide by 0 in TCP stack [1]
+
+Issue here is that keepalive timer needs to be updated to not attempt
+to send a probe if the connection setup was deferred using
+TCP_FASTOPEN_CONNECT socket option added in linux-4.11
+
+[1]
+ divide error: 0000 [#1] SMP
+ CPU: 18 PID: 0 Comm: swapper/18 Not tainted
+ task: ffff986f62f4b040 ti: ffff986f62fa2000 task.ti: ffff986f62fa2000
+ RIP: 0010:[<ffffffff8409cc0d>]  [<ffffffff8409cc0d>] __tcp_select_window+0x8d/0x160
+ Call Trace:
+  <IRQ>
+  [<ffffffff8409d951>] tcp_transmit_skb+0x11/0x20
+  [<ffffffff8409da21>] tcp_xmit_probe_skb+0xc1/0xe0
+  [<ffffffff840a0ee8>] tcp_write_wakeup+0x68/0x160
+  [<ffffffff840a151b>] tcp_keepalive_timer+0x17b/0x230
+  [<ffffffff83b3f799>] call_timer_fn+0x39/0xf0
+  [<ffffffff83b40797>] run_timer_softirq+0x1d7/0x280
+  [<ffffffff83a04ddb>] __do_softirq+0xcb/0x257
+  [<ffffffff83ae03ac>] irq_exit+0x9c/0xb0
+  [<ffffffff83a04c1a>] smp_apic_timer_interrupt+0x6a/0x80
+  [<ffffffff83a03eaf>] apic_timer_interrupt+0x7f/0x90
+  <EOI>
+  [<ffffffff83fed2ea>] ? cpuidle_enter_state+0x13a/0x3b0
+  [<ffffffff83fed2cd>] ? cpuidle_enter_state+0x11d/0x3b0
+
+Tested:
+
+Following packetdrill no longer crashes the kernel
+
+`echo 0 >/proc/sys/net/ipv4/tcp_timestamps`
+
+// Cache warmup: send a Fast Open cookie request
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
+   +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
+   +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop>
+ +.01 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop>
+   +0 > . 1:1(0) ack 1
+   +0 close(3) = 0
+   +0 > F. 1:1(0) ack 1
+   +0 < F. 1:1(0) ack 2 win 92
+   +0 > .  2:2(0) ack 2
+
+   +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+   +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+   +0 setsockopt(4, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
+   +0 setsockopt(4, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
+ +.01 connect(4, ..., ...) = 0
+   +0 setsockopt(4, SOL_TCP, TCP_KEEPIDLE, [5], 4) = 0
+   +10 close(4) = 0
+
+`echo 1 >/proc/sys/net/ipv4/tcp_timestamps`
+
+Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Wei Wang <weiwan@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_timer.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -654,7 +654,8 @@ static void tcp_keepalive_timer (unsigne
+               goto death;
+       }
+-      if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
++      if (!sock_flag(sk, SOCK_KEEPOPEN) ||
++          ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
+               goto out;
+       elapsed = keepalive_time_when(tp);
diff --git a/queue-4.9/net-mlx4_en-don-t-set-checksum_complete-on-sctp-packets.patch b/queue-4.9/net-mlx4_en-don-t-set-checksum_complete-on-sctp-packets.patch
new file mode 100644 (file)
index 0000000..2d770c6
--- /dev/null
@@ -0,0 +1,94 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Thu, 3 Aug 2017 22:54:48 +0200
+Subject: net/mlx4_en: don't set CHECKSUM_COMPLETE on SCTP packets
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+
+[ Upstream commit e718fe450e616227b74d27a233cdf37b4df0c82b ]
+
+if the NIC fails to validate the checksum on TCP/UDP, and validation of IP
+checksum is successful, the driver subtracts the pseudo-header checksum
+from the value obtained by the hardware and sets CHECKSUM_COMPLETE. Don't
+do that if protocol is IPPROTO_SCTP, otherwise CRC32c validation fails.
+
+V2: don't test MLX4_CQE_STATUS_IPV6 if MLX4_CQE_STATUS_IPV4 is set
+
+Reported-by: Shuang Li <shuali@redhat.com>
+Fixes: f8c6455bb04b ("net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE")
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Acked-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_rx.c |   29 ++++++++++++++++++-----------
+ 1 file changed, 18 insertions(+), 11 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+@@ -724,16 +724,21 @@ static inline __wsum get_fixed_vlan_csum
+  * header, the HW adds it. To address that, we are subtracting the pseudo
+  * header checksum from the checksum value provided by the HW.
+  */
+-static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
+-                              struct iphdr *iph)
++static int get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
++                             struct iphdr *iph)
+ {
+       __u16 length_for_csum = 0;
+       __wsum csum_pseudo_header = 0;
++      __u8 ipproto = iph->protocol;
++
++      if (unlikely(ipproto == IPPROTO_SCTP))
++              return -1;
+       length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2));
+       csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+-                                              length_for_csum, iph->protocol, 0);
++                                              length_for_csum, ipproto, 0);
+       skb->csum = csum_sub(hw_checksum, csum_pseudo_header);
++      return 0;
+ }
+ #if IS_ENABLED(CONFIG_IPV6)
+@@ -744,17 +749,20 @@ static void get_fixed_ipv4_csum(__wsum h
+ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
+                              struct ipv6hdr *ipv6h)
+ {
++      __u8 nexthdr = ipv6h->nexthdr;
+       __wsum csum_pseudo_hdr = 0;
+-      if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT ||
+-                   ipv6h->nexthdr == IPPROTO_HOPOPTS))
++      if (unlikely(nexthdr == IPPROTO_FRAGMENT ||
++                   nexthdr == IPPROTO_HOPOPTS ||
++                   nexthdr == IPPROTO_SCTP))
+               return -1;
+-      hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr));
++      hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(nexthdr));
+       csum_pseudo_hdr = csum_partial(&ipv6h->saddr,
+                                      sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);
+       csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len);
+-      csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr));
++      csum_pseudo_hdr = csum_add(csum_pseudo_hdr,
++                                 (__force __wsum)htons(nexthdr));
+       skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr);
+       skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0));
+@@ -777,11 +785,10 @@ static int check_csum(struct mlx4_cqe *c
+       }
+       if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4))
+-              get_fixed_ipv4_csum(hw_checksum, skb, hdr);
++              return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
+ #if IS_ENABLED(CONFIG_IPV6)
+-      else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
+-              if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr)))
+-                      return -1;
++      if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
++              return get_fixed_ipv6_csum(hw_checksum, skb, hdr);
+ #endif
+       return 0;
+ }
diff --git a/queue-4.9/net-sched-set-xt_tgchk_param-par.nft_compat-as-0-in-ipt_init_target.patch b/queue-4.9/net-sched-set-xt_tgchk_param-par.nft_compat-as-0-in-ipt_init_target.patch
new file mode 100644 (file)
index 0000000..d143697
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 9 Aug 2017 18:15:19 +0800
+Subject: net: sched: set xt_tgchk_param par.nft_compat as 0 in ipt_init_target
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 96d9703050a0036a3360ec98bb41e107c90664fe ]
+
+Commit 55917a21d0cc ("netfilter: x_tables: add context to know if
+extension runs from nft_compat") introduced a member nft_compat to
+xt_tgchk_param structure.
+
+But it didn't set it's value for ipt_init_target. With unexpected
+value in par.nft_compat, it may return unexpected result in some
+target's checkentry.
+
+This patch is to set all it's fields as 0 and only initialize the
+non-zero fields in ipt_init_target.
+
+v1->v2:
+  As Wang Cong's suggestion, fix it by setting all it's fields as
+  0 and only initializing the non-zero fields.
+
+Fixes: 55917a21d0cc ("netfilter: x_tables: add context to know if extension runs from nft_compat")
+Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_ipt.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sched/act_ipt.c
++++ b/net/sched/act_ipt.c
+@@ -49,8 +49,8 @@ static int ipt_init_target(struct xt_ent
+               return PTR_ERR(target);
+       t->u.kernel.target = target;
++      memset(&par, 0, sizeof(par));
+       par.table     = table;
+-      par.entryinfo = NULL;
+       par.target    = target;
+       par.targinfo  = t->data;
+       par.hook_mask = hook;
diff --git a/queue-4.9/packet-fix-tp_reserve-race-in-packet_set_ring.patch b/queue-4.9/packet-fix-tp_reserve-race-in-packet_set_ring.patch
new file mode 100644 (file)
index 0000000..7495e80
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Thu, 10 Aug 2017 12:41:58 -0400
+Subject: packet: fix tp_reserve race in packet_set_ring
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit c27927e372f0785f3303e8fad94b85945e2c97b7 ]
+
+Updates to tp_reserve can race with reads of the field in
+packet_set_ring. Avoid this by holding the socket lock during
+updates in setsockopt PACKET_RESERVE.
+
+This bug was discovered by syzkaller.
+
+Fixes: 8913336a7e8d ("packet: add PACKET_RESERVE sockopt")
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -3698,14 +3698,19 @@ packet_setsockopt(struct socket *sock, i
+               if (optlen != sizeof(val))
+                       return -EINVAL;
+-              if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
+-                      return -EBUSY;
+               if (copy_from_user(&val, optval, sizeof(val)))
+                       return -EFAULT;
+               if (val > INT_MAX)
+                       return -EINVAL;
+-              po->tp_reserve = val;
+-              return 0;
++              lock_sock(sk);
++              if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
++                      ret = -EBUSY;
++              } else {
++                      po->tp_reserve = val;
++                      ret = 0;
++              }
++              release_sock(sk);
++              return ret;
+       }
+       case PACKET_LOSS:
+       {
diff --git a/queue-4.9/ppp-fix-false-xmit-recursion-detect-with-two-ppp-devices.patch b/queue-4.9/ppp-fix-false-xmit-recursion-detect-with-two-ppp-devices.patch
new file mode 100644 (file)
index 0000000..0ff9841
--- /dev/null
@@ -0,0 +1,128 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Gao Feng <gfree.wind@vip.163.com>
+Date: Mon, 17 Jul 2017 18:34:42 +0800
+Subject: ppp: Fix false xmit recursion detect with two ppp devices
+
+From: Gao Feng <gfree.wind@vip.163.com>
+
+
+[ Upstream commit e5dadc65f9e0177eb649bcd9d333f1ebf871223e ]
+
+The global percpu variable ppp_xmit_recursion is used to detect the ppp
+xmit recursion to avoid the deadlock, which is caused by one CPU tries to
+lock the xmit lock twice. But it would report false recursion when one CPU
+wants to send the skb from two different PPP devices, like one L2TP on the
+PPPoE. It is a normal case actually.
+
+Now use one percpu member of struct ppp instead of the gloable variable to
+detect the xmit recursion of one ppp device.
+
+Fixes: 55454a565836 ("ppp: avoid dealock on recursive xmit")
+Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
+Signed-off-by: Liu Jianying <jianying.liu@ikuai8.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/ppp_generic.c |   30 +++++++++++++++++++++---------
+ 1 file changed, 21 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -119,6 +119,7 @@ struct ppp {
+       int             n_channels;     /* how many channels are attached 54 */
+       spinlock_t      rlock;          /* lock for receive side 58 */
+       spinlock_t      wlock;          /* lock for transmit side 5c */
++      int             *xmit_recursion __percpu; /* xmit recursion detect */
+       int             mru;            /* max receive unit 60 */
+       unsigned int    flags;          /* control bits 64 */
+       unsigned int    xstate;         /* transmit state bits 68 */
+@@ -1024,6 +1025,7 @@ static int ppp_dev_configure(struct net
+       struct ppp *ppp = netdev_priv(dev);
+       int indx;
+       int err;
++      int cpu;
+       ppp->dev = dev;
+       ppp->ppp_net = src_net;
+@@ -1038,6 +1040,15 @@ static int ppp_dev_configure(struct net
+       INIT_LIST_HEAD(&ppp->channels);
+       spin_lock_init(&ppp->rlock);
+       spin_lock_init(&ppp->wlock);
++
++      ppp->xmit_recursion = alloc_percpu(int);
++      if (!ppp->xmit_recursion) {
++              err = -ENOMEM;
++              goto err1;
++      }
++      for_each_possible_cpu(cpu)
++              (*per_cpu_ptr(ppp->xmit_recursion, cpu)) = 0;
++
+ #ifdef CONFIG_PPP_MULTILINK
+       ppp->minseq = -1;
+       skb_queue_head_init(&ppp->mrq);
+@@ -1049,11 +1060,15 @@ static int ppp_dev_configure(struct net
+       err = ppp_unit_register(ppp, conf->unit, conf->ifname_is_set);
+       if (err < 0)
+-              return err;
++              goto err2;
+       conf->file->private_data = &ppp->file;
+       return 0;
++err2:
++      free_percpu(ppp->xmit_recursion);
++err1:
++      return err;
+ }
+ static const struct nla_policy ppp_nl_policy[IFLA_PPP_MAX + 1] = {
+@@ -1399,18 +1414,16 @@ static void __ppp_xmit_process(struct pp
+       ppp_xmit_unlock(ppp);
+ }
+-static DEFINE_PER_CPU(int, ppp_xmit_recursion);
+-
+ static void ppp_xmit_process(struct ppp *ppp)
+ {
+       local_bh_disable();
+-      if (unlikely(__this_cpu_read(ppp_xmit_recursion)))
++      if (unlikely(*this_cpu_ptr(ppp->xmit_recursion)))
+               goto err;
+-      __this_cpu_inc(ppp_xmit_recursion);
++      (*this_cpu_ptr(ppp->xmit_recursion))++;
+       __ppp_xmit_process(ppp);
+-      __this_cpu_dec(ppp_xmit_recursion);
++      (*this_cpu_ptr(ppp->xmit_recursion))--;
+       local_bh_enable();
+@@ -1904,7 +1917,7 @@ static void __ppp_channel_push(struct ch
+               read_lock_bh(&pch->upl);
+               ppp = pch->ppp;
+               if (ppp)
+-                      __ppp_xmit_process(ppp);
++                      ppp_xmit_process(ppp);
+               read_unlock_bh(&pch->upl);
+       }
+ }
+@@ -1913,9 +1926,7 @@ static void ppp_channel_push(struct chan
+ {
+       local_bh_disable();
+-      __this_cpu_inc(ppp_xmit_recursion);
+       __ppp_channel_push(pch);
+-      __this_cpu_dec(ppp_xmit_recursion);
+       local_bh_enable();
+ }
+@@ -3056,6 +3067,7 @@ static void ppp_destroy_interface(struct
+ #endif /* CONFIG_PPP_FILTER */
+       kfree_skb(ppp->xmit_pending);
++      free_percpu(ppp->xmit_recursion);
+       free_netdev(ppp->dev);
+ }
diff --git a/queue-4.9/ppp-fix-xmit-recursion-detection-on-ppp-channels.patch b/queue-4.9/ppp-fix-xmit-recursion-detection-on-ppp-channels.patch
new file mode 100644 (file)
index 0000000..6875d2c
--- /dev/null
@@ -0,0 +1,75 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Tue, 8 Aug 2017 11:43:24 +0200
+Subject: ppp: fix xmit recursion detection on ppp channels
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 0a0e1a85c83775a648041be2b15de6d0a2f2b8eb ]
+
+Commit e5dadc65f9e0 ("ppp: Fix false xmit recursion detect with two ppp
+devices") dropped the xmit_recursion counter incrementation in
+ppp_channel_push() and relied on ppp_xmit_process() for this task.
+But __ppp_channel_push() can also send packets directly (using the
+.start_xmit() channel callback), in which case the xmit_recursion
+counter isn't incremented anymore. If such packets get routed back to
+the parent ppp unit, ppp_xmit_process() won't notice the recursion and
+will call ppp_channel_push() on the same channel, effectively creating
+the deadlock situation that the xmit_recursion mechanism was supposed
+to prevent.
+
+This patch re-introduces the xmit_recursion counter incrementation in
+ppp_channel_push(). Since the xmit_recursion variable is now part of
+the parent ppp unit, incrementation is skipped if the channel doesn't
+have any. This is fine because only packets routed through the parent
+unit may enter the channel recursively.
+
+Finally, we have to ensure that pch->ppp is not going to be modified
+while executing ppp_channel_push(). Instead of taking this lock only
+while calling ppp_xmit_process(), we now have to hold it for the full
+ppp_channel_push() execution. This respects the ppp locks ordering
+which requires locking ->upl before ->downl.
+
+Fixes: e5dadc65f9e0 ("ppp: Fix false xmit recursion detect with two ppp devices")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/ppp_generic.c |   18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -1914,21 +1914,23 @@ static void __ppp_channel_push(struct ch
+       spin_unlock_bh(&pch->downl);
+       /* see if there is anything from the attached unit to be sent */
+       if (skb_queue_empty(&pch->file.xq)) {
+-              read_lock_bh(&pch->upl);
+               ppp = pch->ppp;
+               if (ppp)
+-                      ppp_xmit_process(ppp);
+-              read_unlock_bh(&pch->upl);
++                      __ppp_xmit_process(ppp);
+       }
+ }
+ static void ppp_channel_push(struct channel *pch)
+ {
+-      local_bh_disable();
+-
+-      __ppp_channel_push(pch);
+-
+-      local_bh_enable();
++      read_lock_bh(&pch->upl);
++      if (pch->ppp) {
++              (*this_cpu_ptr(pch->ppp->xmit_recursion))++;
++              __ppp_channel_push(pch);
++              (*this_cpu_ptr(pch->ppp->xmit_recursion))--;
++      } else {
++              __ppp_channel_push(pch);
++      }
++      read_unlock_bh(&pch->upl);
+ }
+ /*
diff --git a/queue-4.9/revert-ipv4-should-use-consistent-conditional-judgement-for-ip-fragment-in-__ip_append_data-and-ip_finish_output.patch b/queue-4.9/revert-ipv4-should-use-consistent-conditional-judgement-for-ip-fragment-in-__ip_append_data-and-ip_finish_output.patch
new file mode 100644 (file)
index 0000000..bd935c9
--- /dev/null
@@ -0,0 +1,30 @@
+From foo@baz Fri Aug 11 09:19:02 PDT 2017
+Date: Fri, 11 Aug 2017 09:19:02 -0700
+To: Greg KH <gregkh@linuxfoundation.org>
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Subject: revert "ipv4: Should use consistent conditional judgement for ip fragment in __ip_append_data and ip_finish_output"
+
+This reverts commit f102bb7164c9020e12662998f0fd99c3be72d4f6 which is
+commit 0a28cfd51e17f4f0a056bcf66bfbe492c3b99f38 upstream as there is
+another patch that needs to be applied instead of this one.
+
+Cc: Zheng Li <james.z.li@ericsson.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv4/ip_output.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -936,7 +936,7 @@ static int __ip_append_data(struct sock
+               csummode = CHECKSUM_PARTIAL;
+       cork->length += length;
+-      if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
++      if (((length > mtu) || (skb && skb_is_gso(skb))) &&
+           (sk->sk_protocol == IPPROTO_UDP) &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+           (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
diff --git a/queue-4.9/revert-net-account-for-current-skb-length-when-deciding-about-ufo.patch b/queue-4.9/revert-net-account-for-current-skb-length-when-deciding-about-ufo.patch
new file mode 100644 (file)
index 0000000..a0c2ec6
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Fri Aug 11 09:14:09 PDT 2017
+Date: Fri, 11 Aug 2017 09:14:09 -0700
+To: Greg KH <gregkh@linuxfoundation.org>
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Subject: revert "net: account for current skb length when deciding about UFO"
+
+This reverts commit ef09c9ff343122a0b245416066992d096416ff19 which is
+commit a5cb659bbc1c8644efa0c3138a757a1e432a4880 upstream as it causes
+merge issues with later patches that are much more important...
+
+Cc: Michal Kubecek <mkubecek@suse.cz>
+Cc: Vlad Yasevich <vyasevic@redhat.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv4/ip_output.c  |    3 +--
+ net/ipv6/ip6_output.c |    2 +-
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -936,8 +936,7 @@ static int __ip_append_data(struct sock
+               csummode = CHECKSUM_PARTIAL;
+       cork->length += length;
+-      if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
+-           (skb && skb_is_gso(skb))) &&
++      if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
+           (sk->sk_protocol == IPPROTO_UDP) &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+           (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1372,7 +1372,7 @@ emsgsize:
+        */
+       cork->length += length;
+-      if ((((length + (skb ? skb->len : headersize)) > mtu) ||
++      if ((((length + fragheaderlen) > mtu) ||
+            (skb && skb_is_gso(skb))) &&
+           (sk->sk_protocol == IPPROTO_UDP) &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
diff --git a/queue-4.9/tcp-avoid-setting-cwnd-to-invalid-ssthresh-after-cwnd-reduction-states.patch b/queue-4.9/tcp-avoid-setting-cwnd-to-invalid-ssthresh-after-cwnd-reduction-states.patch
new file mode 100644 (file)
index 0000000..abe2afd
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Yuchung Cheng <ycheng@google.com>
+Date: Tue, 1 Aug 2017 13:22:32 -0700
+Subject: tcp: avoid setting cwnd to invalid ssthresh after cwnd reduction states
+
+From: Yuchung Cheng <ycheng@google.com>
+
+
+[ Upstream commit ed254971edea92c3ac5c67c6a05247a92aa6075e ]
+
+If the sender switches the congestion control during ECN-triggered
+cwnd-reduction state (CA_CWR), upon exiting recovery cwnd is set to
+the ssthresh value calculated by the previous congestion control. If
+the previous congestion control is BBR that always keep ssthresh
+to TCP_INIFINITE_SSTHRESH, cwnd ends up being infinite. The safe
+step is to avoid assigning invalid ssthresh value when recovery ends.
+
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2560,8 +2560,8 @@ static inline void tcp_end_cwnd_reductio
+               return;
+       /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
+-      if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
+-          (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
++      if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
++          (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
+               tp->snd_cwnd = tp->snd_ssthresh;
+               tp->snd_cwnd_stamp = tcp_time_stamp;
+       }
diff --git a/queue-4.9/tcp-fastopen-tcp_connect-must-refresh-the-route.patch b/queue-4.9/tcp-fastopen-tcp_connect-must-refresh-the-route.patch
new file mode 100644 (file)
index 0000000..11061d0
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Fri Aug 11 09:10:20 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 8 Aug 2017 01:41:58 -0700
+Subject: tcp: fastopen: tcp_connect() must refresh the route
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 8ba60924710cde564a3905588b6219741d6356d0 ]
+
+With new TCP_FASTOPEN_CONNECT socket option, there is a possibility
+to call tcp_connect() while socket sk_dst_cache is either NULL
+or invalid.
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 setsockopt(4, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
+ +0 connect(4, ..., ...) = 0
+
+<< sk->sk_dst_cache becomes obsolete, or even set to NULL >>
+
+ +1 sendto(4, ..., 1000, MSG_FASTOPEN, ..., ...) = 1000
+
+We need to refresh the route otherwise bad things can happen,
+especially when syzkaller is running on the host :/
+
+Fixes: 19f6d3f3c8422 ("net/tcp-fastopen: Add new API support")
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Wei Wang <weiwan@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Acked-by: Wei Wang <weiwan@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -3344,6 +3344,9 @@ int tcp_connect(struct sock *sk)
+       struct sk_buff *buff;
+       int err;
++      if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
++              return -EHOSTUNREACH; /* Routing failure or similar. */
++
+       tcp_connect_init(sk);
+       if (unlikely(tp->repair)) {
diff --git a/queue-4.9/udp-consistently-apply-ufo-or-fragmentation.patch b/queue-4.9/udp-consistently-apply-ufo-or-fragmentation.patch
new file mode 100644 (file)
index 0000000..2634954
--- /dev/null
@@ -0,0 +1,89 @@
+From foo@baz Fri Aug 11 09:20:24 PDT 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Thu, 10 Aug 2017 12:29:19 -0400
+Subject: udp: consistently apply ufo or fragmentation
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 85f1bd9a7b5a79d5baa8bf44af19658f7bf77bfa ]
+
+When iteratively building a UDP datagram with MSG_MORE and that
+datagram exceeds MTU, consistently choose UFO or fragmentation.
+
+Once skb_is_gso, always apply ufo. Conversely, once a datagram is
+split across multiple skbs, do not consider ufo.
+
+Sendpage already maintains the first invariant, only add the second.
+IPv6 does not have a sendpage implementation to modify.
+
+A gso skb must have a partial checksum, do not follow sk_no_check_tx
+in udp_send_skb.
+
+Found by syzkaller.
+
+Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach")
+Reported-by: Andrey Konovalov <andreyknvl@google.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_output.c  |    7 +++++--
+ net/ipv4/udp.c        |    2 +-
+ net/ipv6/ip6_output.c |    7 ++++---
+ 3 files changed, 10 insertions(+), 6 deletions(-)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -936,10 +936,12 @@ static int __ip_append_data(struct sock
+               csummode = CHECKSUM_PARTIAL;
+       cork->length += length;
+-      if (((length > mtu) || (skb && skb_is_gso(skb))) &&
++      if ((skb && skb_is_gso(skb)) ||
++          ((length > mtu) &&
++          (skb_queue_len(queue) <= 1) &&
+           (sk->sk_protocol == IPPROTO_UDP) &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+-          (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
++          (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
+               err = ip_ufo_append_data(sk, queue, getfrag, from, length,
+                                        hh_len, fragheaderlen, transhdrlen,
+                                        maxfraglen, flags);
+@@ -1255,6 +1257,7 @@ ssize_t  ip_append_page(struct sock *sk,
+               return -EINVAL;
+       if ((size + skb->len > mtu) &&
++          (skb_queue_len(&sk->sk_write_queue) == 1) &&
+           (sk->sk_protocol == IPPROTO_UDP) &&
+           (rt->dst.dev->features & NETIF_F_UFO)) {
+               if (skb->ip_summed != CHECKSUM_PARTIAL)
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -813,7 +813,7 @@ static int udp_send_skb(struct sk_buff *
+       if (is_udplite)                                  /*     UDP-Lite      */
+               csum = udplite_csum(skb);
+-      else if (sk->sk_no_check_tx) {   /* UDP csum disabled */
++      else if (sk->sk_no_check_tx && !skb_is_gso(skb)) {   /* UDP csum off */
+               skb->ip_summed = CHECKSUM_NONE;
+               goto send;
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1372,11 +1372,12 @@ emsgsize:
+        */
+       cork->length += length;
+-      if ((((length + fragheaderlen) > mtu) ||
+-           (skb && skb_is_gso(skb))) &&
++      if ((skb && skb_is_gso(skb)) ||
++          (((length + fragheaderlen) > mtu) &&
++          (skb_queue_len(queue) <= 1) &&
+           (sk->sk_protocol == IPPROTO_UDP) &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+-          (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
++          (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) {
+               err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
+                                         hh_len, fragheaderlen, exthdrlen,
+                                         transhdrlen, mtu, flags, fl6);