From 11a68d5845b0de64420a772e47ab1047f646a8aa Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 27 Jul 2018 10:24:03 +0200
Subject: [PATCH] 4.4-stable patches

added patches:
	ip-hash-fragments-consistently.patch
	ip-in-cmsg-ip-v6-_origdstaddr-call-pskb_may_pull.patch
	net-mlx4_core-save-the-qpn-from-the-input-modifier-in-rst2init-wrapper.patch
	rtnetlink-add-rtnl_link_state-check-in-rtnl_configure_link.patch
	tcp-avoid-collapses-in-tcp_prune_queue-if-possible.patch
	tcp-detect-malicious-patterns-in-tcp_collapse_ofo_queue.patch
	tcp-do-not-cancel-delay-ack-on-dctcp-special-ack.patch
	tcp-do-not-delay-ack-in-dctcp-upon-ce-status-change.patch
	tcp-fix-dctcp-delayed-ack-schedule.patch
	tcp-helpers-to-send-special-dctcp-ack.patch
---
 .../ip-hash-fragments-consistently.patch      |  73 +++++++++
 ...p-v6-_origdstaddr-call-pskb_may_pull.patch |  93 ++++++++++++
 ...e-input-modifier-in-rst2init-wrapper.patch |  40 +++++
 ...k_state-check-in-rtnl_configure_link.patch |  65 +++++++++
 queue-4.4/series                              |  10 ++
 ...apses-in-tcp_prune_queue-if-possible.patch |  46 ++++++
 ...s-patterns-in-tcp_collapse_ofo_queue.patch |  69 +++++++++
 ...ancel-delay-ack-on-dctcp-special-ack.patch | 138 ++++++++++++++++++
 ...y-ack-in-dctcp-upon-ce-status-change.patch | 138 ++++++++++++++++++
 .../tcp-fix-dctcp-delayed-ack-schedule.patch  |  98 +++++++++++++
 ...cp-helpers-to-send-special-dctcp-ack.patch |  79 ++++++++++
 11 files changed, 849 insertions(+)
 create mode 100644 queue-4.4/ip-hash-fragments-consistently.patch
 create mode 100644 queue-4.4/ip-in-cmsg-ip-v6-_origdstaddr-call-pskb_may_pull.patch
 create mode 100644 queue-4.4/net-mlx4_core-save-the-qpn-from-the-input-modifier-in-rst2init-wrapper.patch
 create mode 100644 queue-4.4/rtnetlink-add-rtnl_link_state-check-in-rtnl_configure_link.patch
 create mode 100644 queue-4.4/tcp-avoid-collapses-in-tcp_prune_queue-if-possible.patch
 create mode 100644 queue-4.4/tcp-detect-malicious-patterns-in-tcp_collapse_ofo_queue.patch
 create mode 100644 queue-4.4/tcp-do-not-cancel-delay-ack-on-dctcp-special-ack.patch
 create mode 100644 queue-4.4/tcp-do-not-delay-ack-in-dctcp-upon-ce-status-change.patch
 create mode 100644 queue-4.4/tcp-fix-dctcp-delayed-ack-schedule.patch
 create mode 100644 queue-4.4/tcp-helpers-to-send-special-dctcp-ack.patch

diff --git a/queue-4.4/ip-hash-fragments-consistently.patch b/queue-4.4/ip-hash-fragments-consistently.patch
new file mode 100644
index 00000000000..df19d623782
--- /dev/null
+++ b/queue-4.4/ip-hash-fragments-consistently.patch
@@ -0,0 +1,73 @@
+From foo@baz Fri Jul 27 09:17:52 CEST 2018
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Mon, 23 Jul 2018 16:50:48 +0200
+Subject: ip: hash fragments consistently
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 3dd1c9a1270736029ffca670e9bd0265f4120600 ]
+
+The skb hash for locally generated ip[v6] fragments belonging
+to the same datagram can vary in several circumstances:
+* for connected UDP[v6] sockets, the first fragment get its hash
+  via set_owner_w()/skb_set_hash_from_sk()
+* for unconnected IPv6 UDPv6 sockets, the first fragment can get
+  its hash via ip6_make_flowlabel()/skb_get_hash_flowi6(), if
+  auto_flowlabel is enabled
+
+For the following frags the hash is usually computed via
+skb_get_hash().
+The above can cause OoO for unconnected IPv6 UDPv6 socket: in that
+scenario the egress tx queue can be selected on a per packet basis
+via the skb hash.
+It may also fool flow-oriented schedulers to place fragments belonging
+to the same datagram in different flows.
+
+Fix the issue by copying the skb hash from the head frag into
+the others at fragmentation time.
+
+Before this commit:
+perf probe -a "dev_queue_xmit skb skb->hash skb->l4_hash:b1@0/8 skb->sw_hash:b1@1/8"
+netperf -H $IPV4 -t UDP_STREAM -l 5 -- -m 2000 -n &
+perf record -e probe:dev_queue_xmit -e probe:skb_set_owner_w -a sleep 0.1
+perf script
+probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=3713014309 l4_hash=1 sw_hash=0
+probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=0 l4_hash=0 sw_hash=0
+
+After this commit:
+probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0
+probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0
+
+Fixes: b73c3d0e4f0e ("net: Save TX flow hash in sock and set in skbuf on xmit")
+Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_output.c  |    2 ++
+ net/ipv6/ip6_output.c |    2 ++
+ 2 files changed, 4 insertions(+)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -480,6 +480,8 @@ static void ip_copy_metadata(struct sk_b
+ 	to->dev = from->dev;
+ 	to->mark = from->mark;
+ 
++	skb_copy_hash(to, from);
++
+ 	/* Copy the flags to each fragment. */
+ 	IPCB(to)->flags = IPCB(from)->flags;
+ 
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -559,6 +559,8 @@ static void ip6_copy_metadata(struct sk_
+ 	to->dev = from->dev;
+ 	to->mark = from->mark;
+ 
++	skb_copy_hash(to, from);
++
+ #ifdef CONFIG_NET_SCHED
+ 	to->tc_index = from->tc_index;
+ #endif
diff --git a/queue-4.4/ip-in-cmsg-ip-v6-_origdstaddr-call-pskb_may_pull.patch b/queue-4.4/ip-in-cmsg-ip-v6-_origdstaddr-call-pskb_may_pull.patch
new file mode 100644
index 00000000000..aa36759c73b
--- /dev/null
+++ b/queue-4.4/ip-in-cmsg-ip-v6-_origdstaddr-call-pskb_may_pull.patch
@@ -0,0 +1,93 @@
+From foo@baz Fri Jul 27 09:17:52 CEST 2018
+From: Willem de Bruijn <willemb@google.com>
+Date: Mon, 23 Jul 2018 19:36:48 -0400
+Subject: ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit 2efd4fca703a6707cad16ab486eaab8fc7f0fd49 ]
+
+Syzbot reported a read beyond the end of the skb head when returning
+IPV6_ORIGDSTADDR:
+
+  BUG: KMSAN: kernel-infoleak in put_cmsg+0x5ef/0x860 net/core/scm.c:242
+  CPU: 0 PID: 4501 Comm: syz-executor128 Not tainted 4.17.0+ #9
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+  Google 01/01/2011
+  Call Trace:
+    __dump_stack lib/dump_stack.c:77 [inline]
+    dump_stack+0x185/0x1d0 lib/dump_stack.c:113
+    kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1125
+    kmsan_internal_check_memory+0x138/0x1f0 mm/kmsan/kmsan.c:1219
+    kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1261
+    copy_to_user include/linux/uaccess.h:184 [inline]
+    put_cmsg+0x5ef/0x860 net/core/scm.c:242
+    ip6_datagram_recv_specific_ctl+0x1cf3/0x1eb0 net/ipv6/datagram.c:719
+    ip6_datagram_recv_ctl+0x41c/0x450 net/ipv6/datagram.c:733
+    rawv6_recvmsg+0x10fb/0x1460 net/ipv6/raw.c:521
+    [..]
+
+This logic and its ipv4 counterpart read the destination port from
+the packet at skb_transport_offset(skb) + 4.
+
+With MSG_MORE and a local SOCK_RAW sender, syzbot was able to cook a
+packet that stores headers exactly up to skb_transport_offset(skb) in
+the head and the remainder in a frag.
+
+Call pskb_may_pull before accessing the pointer to ensure that it lies
+in skb head.
+
+Link: http://lkml.kernel.org/r/CAF=yD-LEJwZj5a1-bAAj2Oy_hKmGygV6rsJ_WOrAYnv-fnayiQ@mail.gmail.com
+Reported-by: syzbot+9adb4b567003cac781f0@syzkaller.appspotmail.com
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_sockglue.c |    7 +++++--
+ net/ipv6/datagram.c    |    7 +++++--
+ 2 files changed, 10 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -135,15 +135,18 @@ static void ip_cmsg_recv_dstaddr(struct
+ {
+ 	struct sockaddr_in sin;
+ 	const struct iphdr *iph = ip_hdr(skb);
+-	__be16 *ports = (__be16 *)skb_transport_header(skb);
++	__be16 *ports;
++	int end;
+ 
+-	if (skb_transport_offset(skb) + 4 > skb->len)
++	end = skb_transport_offset(skb) + 4;
++	if (end > 0 && !pskb_may_pull(skb, end))
+ 		return;
+ 
+ 	/* All current transport protocols have the port numbers in the
+ 	 * first four bytes of the transport header and this function is
+ 	 * written with this assumption in mind.
+ 	 */
++	ports = (__be16 *)skb_transport_header(skb);
+ 
+ 	sin.sin_family = AF_INET;
+ 	sin.sin_addr.s_addr = iph->daddr;
+--- a/net/ipv6/datagram.c
++++ b/net/ipv6/datagram.c
+@@ -657,13 +657,16 @@ void ip6_datagram_recv_specific_ctl(stru
+ 	}
+ 	if (np->rxopt.bits.rxorigdstaddr) {
+ 		struct sockaddr_in6 sin6;
+-		__be16 *ports = (__be16 *) skb_transport_header(skb);
++		__be16 *ports;
++		int end;
+ 
+-		if (skb_transport_offset(skb) + 4 <= skb->len) {
++		end = skb_transport_offset(skb) + 4;
++		if (end <= 0 || pskb_may_pull(skb, end)) {
+ 			/* All current transport protocols have the port numbers in the
+ 			 * first four bytes of the transport header and this function is
+ 			 * written with this assumption in mind.
+ 			 */
++			ports = (__be16 *)skb_transport_header(skb);
+ 
+ 			sin6.sin6_family = AF_INET6;
+ 			sin6.sin6_addr = ipv6_hdr(skb)->daddr;
diff --git a/queue-4.4/net-mlx4_core-save-the-qpn-from-the-input-modifier-in-rst2init-wrapper.patch b/queue-4.4/net-mlx4_core-save-the-qpn-from-the-input-modifier-in-rst2init-wrapper.patch
new file mode 100644
index 00000000000..f85c58a7b90
--- /dev/null
+++ b/queue-4.4/net-mlx4_core-save-the-qpn-from-the-input-modifier-in-rst2init-wrapper.patch
@@ -0,0 +1,40 @@
+From foo@baz Fri Jul 27 09:17:52 CEST 2018
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Tue, 24 Jul 2018 14:27:55 +0300
+Subject: net/mlx4_core: Save the qpn from the input modifier in RST2INIT wrapper
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+[ Upstream commit 958c696f5a7274d9447a458ad7aa70719b29a50a ]
+
+Function mlx4_RST2INIT_QP_wrapper saved the qp number passed in the qp
+context, rather than the one passed in the input modifier.
+
+However, the qp number in the qp context is not defined as a
+required parameter by the FW. Therefore, drivers may choose to not
+specify the qp number in the qp context for the reset-to-init transition.
+
+Thus, we must save the qp number passed in the command input modifier --
+which is always present. (This saved qp number is used as the input
+modifier for command 2RST_QP when a slave's qp's are destroyed).
+
+Fixes: c82e9aa0a8bc ("mlx4_core: resource tracking for HCA resources used by guests")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/resource_tracker.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+@@ -2891,7 +2891,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4
+ 	u32 srqn = qp_get_srqn(qpc) & 0xffffff;
+ 	int use_srq = (qp_get_srqn(qpc) >> 24) & 1;
+ 	struct res_srq *srq;
+-	int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff;
++	int local_qpn = vhcr->in_modifier & 0xffffff;
+ 
+ 	err = adjust_qp_sched_queue(dev, slave, qpc, inbox);
+ 	if (err)
diff --git a/queue-4.4/rtnetlink-add-rtnl_link_state-check-in-rtnl_configure_link.patch b/queue-4.4/rtnetlink-add-rtnl_link_state-check-in-rtnl_configure_link.patch
new file mode 100644
index 00000000000..7329180d68a
--- /dev/null
+++ b/queue-4.4/rtnetlink-add-rtnl_link_state-check-in-rtnl_configure_link.patch
@@ -0,0 +1,65 @@
+From foo@baz Fri Jul 27 09:17:52 CEST 2018
+From: Roopa Prabhu <roopa@cumulusnetworks.com>
+Date: Fri, 20 Jul 2018 13:21:01 -0700
+Subject: rtnetlink: add rtnl_link_state check in rtnl_configure_link
+
+From: Roopa Prabhu <roopa@cumulusnetworks.com>
+
+[ Upstream commit 5025f7f7d506fba9b39e7fe8ca10f6f34cb9bc2d ]
+
+rtnl_configure_link sets dev->rtnl_link_state to
+RTNL_LINK_INITIALIZED and unconditionally calls
+__dev_notify_flags to notify user-space of dev flags.
+
+current call sequence for rtnl_configure_link
+rtnetlink_newlink
+    rtnl_link_ops->newlink
+    rtnl_configure_link (unconditionally notifies userspace of
+                         default and new dev flags)
+
+If a newlink handler wants to call rtnl_configure_link
+early, we will end up with duplicate notifications to
+user-space.
+
+This patch fixes rtnl_configure_link to check rtnl_link_state
+and call __dev_notify_flags with gchanges = 0 if already
+RTNL_LINK_INITIALIZED.
+
+Later in the series, this patch will help the following sequence
+where a driver implementing newlink can call rtnl_configure_link
+to initialize the link early.
+
+makes the following call sequence work:
+rtnetlink_newlink
+    rtnl_link_ops->newlink (vxlan) -> rtnl_configure_link (initializes
+                                                link and notifies
+                                                user-space of default
+                                                dev flags)
+    rtnl_configure_link (updates dev flags if requested by user ifm
+                         and notifies user-space of new dev flags)
+
+Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -2087,9 +2087,12 @@ int rtnl_configure_link(struct net_devic
+ 			return err;
+ 	}
+ 
+-	dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+-
+-	__dev_notify_flags(dev, old_flags, ~0U);
++	if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
++		__dev_notify_flags(dev, old_flags, 0U);
++	} else {
++		dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
++		__dev_notify_flags(dev, old_flags, ~0U);
++	}
+ 	return 0;
+ }
+ EXPORT_SYMBOL(rtnl_configure_link);
diff --git a/queue-4.4/series b/queue-4.4/series
index e4d320cdad1..8ea777392db 100644
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -1 +1,11 @@
 mips-ath79-fix-register-address-in-ath79_ddr_wb_flush.patch
+ip-hash-fragments-consistently.patch
+net-mlx4_core-save-the-qpn-from-the-input-modifier-in-rst2init-wrapper.patch
+rtnetlink-add-rtnl_link_state-check-in-rtnl_configure_link.patch
+tcp-fix-dctcp-delayed-ack-schedule.patch
+tcp-helpers-to-send-special-dctcp-ack.patch
+tcp-do-not-cancel-delay-ack-on-dctcp-special-ack.patch
+tcp-do-not-delay-ack-in-dctcp-upon-ce-status-change.patch
+tcp-avoid-collapses-in-tcp_prune_queue-if-possible.patch
+tcp-detect-malicious-patterns-in-tcp_collapse_ofo_queue.patch
+ip-in-cmsg-ip-v6-_origdstaddr-call-pskb_may_pull.patch
diff --git a/queue-4.4/tcp-avoid-collapses-in-tcp_prune_queue-if-possible.patch b/queue-4.4/tcp-avoid-collapses-in-tcp_prune_queue-if-possible.patch
new file mode 100644
index 00000000000..bab37cc1f76
--- /dev/null
+++ b/queue-4.4/tcp-avoid-collapses-in-tcp_prune_queue-if-possible.patch
@@ -0,0 +1,46 @@
+From foo@baz Fri Jul 27 09:17:52 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 23 Jul 2018 09:28:18 -0700
+Subject: tcp: avoid collapses in tcp_prune_queue() if possible
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit f4a3313d8e2ca9fd8d8f45e40a2903ba782607e7 ]
+
+Right after a TCP flow is created, receiving tiny out of order
+packets allways hit the condition :
+
+if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+	tcp_clamp_window(sk);
+
+tcp_clamp_window() increases sk_rcvbuf to match sk_rmem_alloc
+(guarded by tcp_rmem[2])
+
+Calling tcp_collapse_ofo_queue() in this case is not useful,
+and offers a O(N^2) surface attack to malicious peers.
+
+Better not attempt anything before full queue capacity is reached,
+forcing attacker to spend lots of resource and allow us to more
+easily detect the abuse.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4875,6 +4875,9 @@ static int tcp_prune_queue(struct sock *
+ 	else if (tcp_under_memory_pressure(sk))
+ 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+ 
++	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
++		return 0;
++
+ 	tcp_collapse_ofo_queue(sk);
+ 	if (!skb_queue_empty(&sk->sk_receive_queue))
+ 		tcp_collapse(sk, &sk->sk_receive_queue,
diff --git a/queue-4.4/tcp-detect-malicious-patterns-in-tcp_collapse_ofo_queue.patch b/queue-4.4/tcp-detect-malicious-patterns-in-tcp_collapse_ofo_queue.patch
new file mode 100644
index 00000000000..f669daa88ce
--- /dev/null
+++ b/queue-4.4/tcp-detect-malicious-patterns-in-tcp_collapse_ofo_queue.patch
@@ -0,0 +1,69 @@
+From foo@baz Fri Jul 27 09:17:52 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 23 Jul 2018 09:28:19 -0700
+Subject: tcp: detect malicious patterns in tcp_collapse_ofo_queue()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3d4bf93ac12003f9b8e1e2de37fe27983deebdcf ]
+
+In case an attacker feeds tiny packets completely out of order,
+tcp_collapse_ofo_queue() might scan the whole rb-tree, performing
+expensive copies, but not changing socket memory usage at all.
+
+1) Do not attempt to collapse tiny skbs.
+2) Add logic to exit early when too many tiny skbs are detected.
+
+We prefer not doing aggressive collapsing (which copies packets)
+for pathological flows, and revert to tcp_prune_ofo_queue() which
+will be less expensive.
+
+In the future, we might add the possibility of terminating flows
+that are proven to be malicious.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |   15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4789,6 +4789,7 @@ restart:
+ static void tcp_collapse_ofo_queue(struct sock *sk)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
++	u32 range_truesize, sum_tiny = 0;
+ 	struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
+ 	struct sk_buff *head;
+ 	u32 start, end;
+@@ -4798,6 +4799,7 @@ static void tcp_collapse_ofo_queue(struc
+ 
+ 	start = TCP_SKB_CB(skb)->seq;
+ 	end = TCP_SKB_CB(skb)->end_seq;
++	range_truesize = skb->truesize;
+ 	head = skb;
+ 
+ 	for (;;) {
+@@ -4812,8 +4814,17 @@ static void tcp_collapse_ofo_queue(struc
+ 		if (!skb ||
+ 		    after(TCP_SKB_CB(skb)->seq, end) ||
+ 		    before(TCP_SKB_CB(skb)->end_seq, start)) {
+-			tcp_collapse(sk, &tp->out_of_order_queue,
+-				     head, skb, start, end);
++			/* Do not attempt collapsing tiny skbs */
++			if (range_truesize != head->truesize ||
++			    end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
++				tcp_collapse(sk, &tp->out_of_order_queue,
++					     head, skb, start, end);
++			} else {
++				sum_tiny += range_truesize;
++				if (sum_tiny > sk->sk_rcvbuf >> 3)
++					return;
++			}
++
+ 			head = skb;
+ 			if (!skb)
+ 				break;
diff --git a/queue-4.4/tcp-do-not-cancel-delay-ack-on-dctcp-special-ack.patch b/queue-4.4/tcp-do-not-cancel-delay-ack-on-dctcp-special-ack.patch
new file mode 100644
index 00000000000..cf5eb0fef8a
--- /dev/null
+++ b/queue-4.4/tcp-do-not-cancel-delay-ack-on-dctcp-special-ack.patch
@@ -0,0 +1,138 @@
+From foo@baz Fri Jul 27 09:17:52 CEST 2018
+From: Yuchung Cheng <ycheng@google.com>
+Date: Wed, 18 Jul 2018 13:56:35 -0700
+Subject: tcp: do not cancel delay-AcK on DCTCP special ACK
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit 27cde44a259c380a3c09066fc4b42de7dde9b1ad ]
+
+Currently when a DCTCP receiver delays an ACK and receive a
+data packet with a different CE mark from the previous one's, it
+sends two immediate ACKs acking previous and latest sequences
+respectly (for ECN accounting).
+
+Previously sending the first ACK may mark off the delayed ACK timer
+(tcp_event_ack_sent). This may subsequently prevent sending the
+second ACK to acknowledge the latest sequence (tcp_ack_snd_check).
+The culprit is that tcp_send_ack() assumes it always acknowleges
+the latest sequence, which is not true for the first special ACK.
+
+The fix is to not make the assumption in tcp_send_ack and check the
+actual ack sequence before cancelling the delayed ACK. Further it's
+safer to pass the ack sequence number as a local variable into
+tcp_send_ack routine, instead of intercepting tp->rcv_nxt to avoid
+future bugs like this.
+
+Reported-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h     |    1 +
+ net/ipv4/tcp_dctcp.c  |   34 ++++------------------------------
+ net/ipv4/tcp_output.c |   11 ++++++++---
+ 3 files changed, 13 insertions(+), 33 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -559,6 +559,7 @@ void tcp_send_fin(struct sock *sk);
+ void tcp_send_active_reset(struct sock *sk, gfp_t priority);
+ int tcp_send_synack(struct sock *);
+ void tcp_push_one(struct sock *, unsigned int mss_now);
++void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
+ void tcp_send_ack(struct sock *sk);
+ void tcp_send_delayed_ack(struct sock *sk);
+ void tcp_send_loss_probe(struct sock *sk);
+--- a/net/ipv4/tcp_dctcp.c
++++ b/net/ipv4/tcp_dctcp.c
+@@ -135,21 +135,8 @@ static void dctcp_ce_state_0_to_1(struct
+ 	 * ACK has not sent yet.
+ 	 */
+ 	if (!ca->ce_state &&
+-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
+-		u32 tmp_rcv_nxt;
+-
+-		/* Save current rcv_nxt. */
+-		tmp_rcv_nxt = tp->rcv_nxt;
+-
+-		/* Generate previous ack with CE=0. */
+-		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+-		tp->rcv_nxt = ca->prior_rcv_nxt;
+-
+-		tcp_send_ack(sk);
+-
+-		/* Recover current rcv_nxt. */
+-		tp->rcv_nxt = tmp_rcv_nxt;
+-	}
++	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
++		__tcp_send_ack(sk, ca->prior_rcv_nxt);
+ 
+ 	ca->prior_rcv_nxt = tp->rcv_nxt;
+ 	ca->ce_state = 1;
+@@ -166,21 +153,8 @@ static void dctcp_ce_state_1_to_0(struct
+ 	 * ACK has not sent yet.
+ 	 */
+ 	if (ca->ce_state &&
+-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
+-		u32 tmp_rcv_nxt;
+-
+-		/* Save current rcv_nxt. */
+-		tmp_rcv_nxt = tp->rcv_nxt;
+-
+-		/* Generate previous ack with CE=1. */
+-		tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+-		tp->rcv_nxt = ca->prior_rcv_nxt;
+-
+-		tcp_send_ack(sk);
+-
+-		/* Recover current rcv_nxt. */
+-		tp->rcv_nxt = tmp_rcv_nxt;
+-	}
++	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
++		__tcp_send_ack(sk, ca->prior_rcv_nxt);
+ 
+ 	ca->prior_rcv_nxt = tp->rcv_nxt;
+ 	ca->ce_state = 0;
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -177,8 +177,13 @@ static void tcp_event_data_sent(struct t
+ }
+ 
+ /* Account for an ACK we sent. */
+-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
++static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
++				      u32 rcv_nxt)
+ {
++	struct tcp_sock *tp = tcp_sk(sk);
++
++	if (unlikely(rcv_nxt != tp->rcv_nxt))
++		return;  /* Special ACK sent by DCTCP to reflect ECN */
+ 	tcp_dec_quickack_mode(sk, pkts);
+ 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+ }
+@@ -1005,7 +1010,7 @@ static int __tcp_transmit_skb(struct soc
+ 	icsk->icsk_af_ops->send_check(sk, skb);
+ 
+ 	if (likely(tcb->tcp_flags & TCPHDR_ACK))
+-		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
++		tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
+ 
+ 	if (skb->len != tcp_header_size)
+ 		tcp_event_data_sent(tp, sk);
+@@ -3400,12 +3405,12 @@ void __tcp_send_ack(struct sock *sk, u32
+ 	skb_mstamp_get(&buff->skb_mstamp);
+ 	__tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC), rcv_nxt);
+ }
++EXPORT_SYMBOL_GPL(__tcp_send_ack);
+ 
+ void tcp_send_ack(struct sock *sk)
+ {
+ 	__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
+ }
+-EXPORT_SYMBOL_GPL(tcp_send_ack);
+ 
+ /* This routine sends a packet with an out of date sequence
+  * number. It assumes the other end will try to ack it.
diff --git a/queue-4.4/tcp-do-not-delay-ack-in-dctcp-upon-ce-status-change.patch b/queue-4.4/tcp-do-not-delay-ack-in-dctcp-upon-ce-status-change.patch
new file mode 100644
index 00000000000..73d4a437cfc
--- /dev/null
+++ b/queue-4.4/tcp-do-not-delay-ack-in-dctcp-upon-ce-status-change.patch
@@ -0,0 +1,138 @@
+From foo@baz Fri Jul 27 09:17:52 CEST 2018
+From: Yuchung Cheng <ycheng@google.com>
+Date: Wed, 18 Jul 2018 13:56:36 -0700
+Subject: tcp: do not delay ACK in DCTCP upon CE status change
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit a0496ef2c23b3b180902dd185d0d63ccbc624cf8 ]
+
+Per DCTCP RFC8257 (Section 3.2) the ACK reflecting the CE status change
+has to be sent immediately so the sender can respond quickly:
+
+""" When receiving packets, the CE codepoint MUST be processed as follows:
+
+   1.  If the CE codepoint is set and DCTCP.CE is false, set DCTCP.CE to
+       true and send an immediate ACK.
+
+   2.  If the CE codepoint is not set and DCTCP.CE is true, set DCTCP.CE
+       to false and send an immediate ACK.
+"""
+
+Previously DCTCP implementation may continue to delay the ACK. This
+patch fixes that to implement the RFC by forcing an immediate ACK.
+
+Tested with this packetdrill script provided by Larry Brakmo
+
+0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0
+0.000 bind(3, ..., ...) = 0
+0.000 listen(3, 1) = 0
+
+0.100 < [ect0] SEW 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0.100 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0.110 < [ect0] . 1:1(0) ack 1 win 257
+0.200 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_DEBUG, [1], 4) = 0
+
+0.200 < [ect0] . 1:1001(1000) ack 1 win 257
+0.200 > [ect01] . 1:1(0) ack 1001
+
+0.200 write(4, ..., 1) = 1
+0.200 > [ect01] P. 1:2(1) ack 1001
+
+0.200 < [ect0] . 1001:2001(1000) ack 2 win 257
++0.005 < [ce] . 2001:3001(1000) ack 2 win 257
+
++0.000 > [ect01] . 2:2(0) ack 2001
+// Previously the ACK below would be delayed by 40ms
++0.000 > [ect01] E. 2:2(0) ack 3001
+
++0.500 < F. 9501:9501(0) ack 4 win 257
+
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h    |    1 +
+ net/ipv4/tcp_dctcp.c |   30 ++++++++++++++++++------------
+ net/ipv4/tcp_input.c |    3 ++-
+ 3 files changed, 21 insertions(+), 13 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -376,6 +376,7 @@ ssize_t tcp_splice_read(struct socket *s
+ 			struct pipe_inode_info *pipe, size_t len,
+ 			unsigned int flags);
+ 
++void tcp_enter_quickack_mode(struct sock *sk);
+ static inline void tcp_dec_quickack_mode(struct sock *sk,
+ 					 const unsigned int pkts)
+ {
+--- a/net/ipv4/tcp_dctcp.c
++++ b/net/ipv4/tcp_dctcp.c
+@@ -131,12 +131,15 @@ static void dctcp_ce_state_0_to_1(struct
+ 	struct dctcp *ca = inet_csk_ca(sk);
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 
+-	/* State has changed from CE=0 to CE=1 and delayed
+-	 * ACK has not sent yet.
+-	 */
+-	if (!ca->ce_state &&
+-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+-		__tcp_send_ack(sk, ca->prior_rcv_nxt);
++	if (!ca->ce_state) {
++		/* State has changed from CE=0 to CE=1, force an immediate
++		 * ACK to reflect the new CE state. If an ACK was delayed,
++		 * send that first to reflect the prior CE state.
++		 */
++		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
++			__tcp_send_ack(sk, ca->prior_rcv_nxt);
++		tcp_enter_quickack_mode(sk);
++	}
+ 
+ 	ca->prior_rcv_nxt = tp->rcv_nxt;
+ 	ca->ce_state = 1;
+@@ -149,12 +152,15 @@ static void dctcp_ce_state_1_to_0(struct
+ 	struct dctcp *ca = inet_csk_ca(sk);
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 
+-	/* State has changed from CE=1 to CE=0 and delayed
+-	 * ACK has not sent yet.
+-	 */
+-	if (ca->ce_state &&
+-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+-		__tcp_send_ack(sk, ca->prior_rcv_nxt);
++	if (ca->ce_state) {
++		/* State has changed from CE=1 to CE=0, force an immediate
++		 * ACK to reflect the new CE state. If an ACK was delayed,
++		 * send that first to reflect the prior CE state.
++		 */
++		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
++			__tcp_send_ack(sk, ca->prior_rcv_nxt);
++		tcp_enter_quickack_mode(sk);
++	}
+ 
+ 	ca->prior_rcv_nxt = tp->rcv_nxt;
+ 	ca->ce_state = 0;
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -187,13 +187,14 @@ static void tcp_incr_quickack(struct soc
+ 		icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
+ }
+ 
+-static void tcp_enter_quickack_mode(struct sock *sk)
++void tcp_enter_quickack_mode(struct sock *sk)
+ {
+ 	struct inet_connection_sock *icsk = inet_csk(sk);
+ 	tcp_incr_quickack(sk);
+ 	icsk->icsk_ack.pingpong = 0;
+ 	icsk->icsk_ack.ato = TCP_ATO_MIN;
+ }
++EXPORT_SYMBOL(tcp_enter_quickack_mode);
+ 
+ /* Send ACKs quickly, if "quick" count is not exhausted
+  * and the session is not interactive.
diff --git a/queue-4.4/tcp-fix-dctcp-delayed-ack-schedule.patch b/queue-4.4/tcp-fix-dctcp-delayed-ack-schedule.patch
new file mode 100644
index 00000000000..e0f868dc3d2
--- /dev/null
+++ b/queue-4.4/tcp-fix-dctcp-delayed-ack-schedule.patch
@@ -0,0 +1,98 @@
+From foo@baz Fri Jul 27 09:17:52 CEST 2018
+From: Yuchung Cheng <ycheng@google.com>
+Date: Thu, 12 Jul 2018 06:04:52 -0700
+Subject: tcp: fix dctcp delayed ACK schedule
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit b0c05d0e99d98d7f0cd41efc1eeec94efdc3325d ]
+
+Previously, when a data segment was sent an ACK was piggybacked
+on the data segment without generating a CA_EVENT_NON_DELAYED_ACK
+event to notify congestion control modules. So the DCTCP
+ca->delayed_ack_reserved flag could incorrectly stay set when
+in fact there were no delayed ACKs being reserved. This could result
+in sending a special ECN notification ACK that carries an older
+ACK sequence, when in fact there was no need for such an ACK.
+DCTCP keeps track of the delayed ACK status with its own separate
+state ca->delayed_ack_reserved. Previously it may accidentally cancel
+the delayed ACK without updating this field upon sending a special
+ACK that carries a older ACK sequence. This inconsistency would
+lead to DCTCP receiver never acknowledging the latest data until the
+sender times out and retry in some cases.
+
+Packetdrill script (provided by Larry Brakmo)
+
+0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0
+0.000 bind(3, ..., ...) = 0
+0.000 listen(3, 1) = 0
+
+0.100 < [ect0] SEW 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0.100 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0.110 < [ect0] . 1:1(0) ack 1 win 257
+0.200 accept(3, ..., ...) = 4
+
+0.200 < [ect0] . 1:1001(1000) ack 1 win 257
+0.200 > [ect01] . 1:1(0) ack 1001
+
+0.200 write(4, ..., 1) = 1
+0.200 > [ect01] P. 1:2(1) ack 1001
+
+0.200 < [ect0] . 1001:2001(1000) ack 2 win 257
+0.200 write(4, ..., 1) = 1
+0.200 > [ect01] P. 2:3(1) ack 2001
+
+0.200 < [ect0] . 2001:3001(1000) ack 3 win 257
+0.200 < [ect0] . 3001:4001(1000) ack 3 win 257
+0.200 > [ect01] . 3:3(0) ack 4001
+
+0.210 < [ce] P. 4001:4501(500) ack 3 win 257
+
++0.001 read(4, ..., 4500) = 4500
++0 write(4, ..., 1) = 1
++0 > [ect01] PE. 3:4(1) ack 4501
+
++0.010 < [ect0] W. 4501:5501(1000) ack 4 win 257
+// Previously the ACK sequence below would be 4501, causing a long RTO
++0.040~+0.045 > [ect01] . 4:4(0) ack 5501   // delayed ack
+
++0.311 < [ect0] . 5501:6501(1000) ack 4 win 257  // More data
++0 > [ect01] . 4:4(0) ack 6501     // now acks everything
+
++0.500 < F. 9501:9501(0) ack 4 win 257
+
+Reported-by: Larry Brakmo <brakmo@fb.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Lawrence Brakmo <brakmo@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_dctcp.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_dctcp.c
++++ b/net/ipv4/tcp_dctcp.c
+@@ -134,7 +134,8 @@ static void dctcp_ce_state_0_to_1(struct
+ 	/* State has changed from CE=0 to CE=1 and delayed
+ 	 * ACK has not sent yet.
+ 	 */
+-	if (!ca->ce_state && ca->delayed_ack_reserved) {
++	if (!ca->ce_state &&
++	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
+ 		u32 tmp_rcv_nxt;
+ 
+ 		/* Save current rcv_nxt. */
+@@ -164,7 +165,8 @@ static void dctcp_ce_state_1_to_0(struct
+ 	/* State has changed from CE=1 to CE=0 and delayed
+ 	 * ACK has not sent yet.
+ 	 */
+-	if (ca->ce_state && ca->delayed_ack_reserved) {
++	if (ca->ce_state &&
++	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
+ 		u32 tmp_rcv_nxt;
+ 
+ 		/* Save current rcv_nxt. */
diff --git a/queue-4.4/tcp-helpers-to-send-special-dctcp-ack.patch b/queue-4.4/tcp-helpers-to-send-special-dctcp-ack.patch
new file mode 100644
index 00000000000..598339a2f8f
--- /dev/null
+++ b/queue-4.4/tcp-helpers-to-send-special-dctcp-ack.patch
@@ -0,0 +1,79 @@
+From foo@baz Fri Jul 27 09:17:52 CEST 2018
+From: Yuchung Cheng <ycheng@google.com>
+Date: Wed, 18 Jul 2018 13:56:34 -0700
+Subject: tcp: helpers to send special DCTCP ack
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit 2987babb6982306509380fc11b450227a844493b ]
+
+Refactor and create helpers to send the special ACK in DCTCP.
+
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |   22 +++++++++++++++++-----
+ 1 file changed, 17 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -901,8 +901,8 @@ out:
+  * We are working here with either a clone of the original
+  * SKB, or a fresh unique copy made by the retransmit engine.
+  */
+-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+-			    gfp_t gfp_mask)
++static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
++			      int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
+ {
+ 	const struct inet_connection_sock *icsk = inet_csk(sk);
+ 	struct inet_sock *inet;
+@@ -962,7 +962,7 @@ static int tcp_transmit_skb(struct sock
+ 	th->source		= inet->inet_sport;
+ 	th->dest		= inet->inet_dport;
+ 	th->seq			= htonl(tcb->seq);
+-	th->ack_seq		= htonl(tp->rcv_nxt);
++	th->ack_seq		= htonl(rcv_nxt);
+ 	*(((__be16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
+ 					tcb->tcp_flags);
+ 
+@@ -1036,6 +1036,13 @@ static int tcp_transmit_skb(struct sock
+ 	return net_xmit_eval(err);
+ }
+ 
++static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
++			    gfp_t gfp_mask)
++{
++	return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
++				  tcp_sk(sk)->rcv_nxt);
++}
++
+ /* This routine just queues the buffer for sending.
+  *
+  * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
+@@ -3354,7 +3361,7 @@ void tcp_send_delayed_ack(struct sock *s
+ }
+ 
+ /* This routine sends an ack and also updates the window. */
+-void tcp_send_ack(struct sock *sk)
++void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
+ {
+ 	struct sk_buff *buff;
+ 
+@@ -3391,7 +3398,12 @@ void tcp_send_ack(struct sock *sk)
+ 
+ 	/* Send it off, this clears delayed acks for us. */
+ 	skb_mstamp_get(&buff->skb_mstamp);
+-	tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
++	__tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC), rcv_nxt);
++}
++
++void tcp_send_ack(struct sock *sk)
++{
++	__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
+ }
+ EXPORT_SYMBOL_GPL(tcp_send_ack);
+ 
-- 
2.47.3