]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Aug 2018 19:44:23 +0000 (21:44 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Aug 2018 19:44:23 +0000 (21:44 +0200)
added patches:
ipv4-remove-bug_on-from-fib_compute_spec_dst.patch
net-fix-amd-xgbe-flow-control-issue.patch
net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch
net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch
netlink-do-not-subscribe-to-non-existent-groups.patch
netlink-don-t-shift-with-ub-on-nlk-ngroups.patch
tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch
tcp-add-one-more-quick-ack-after-after-ecn-events.patch
tcp-do-not-aggressively-quick-ack-after-ecn-events.patch
tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch
tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch
xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch

13 files changed:
queue-4.4/ipv4-remove-bug_on-from-fib_compute_spec_dst.patch [new file with mode: 0644]
queue-4.4/net-fix-amd-xgbe-flow-control-issue.patch [new file with mode: 0644]
queue-4.4/net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch [new file with mode: 0644]
queue-4.4/net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch [new file with mode: 0644]
queue-4.4/netlink-do-not-subscribe-to-non-existent-groups.patch [new file with mode: 0644]
queue-4.4/netlink-don-t-shift-with-ub-on-nlk-ngroups.patch [new file with mode: 0644]
queue-4.4/series
queue-4.4/tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch [new file with mode: 0644]
queue-4.4/tcp-add-one-more-quick-ack-after-after-ecn-events.patch [new file with mode: 0644]
queue-4.4/tcp-do-not-aggressively-quick-ack-after-ecn-events.patch [new file with mode: 0644]
queue-4.4/tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch [new file with mode: 0644]
queue-4.4/tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch [new file with mode: 0644]
queue-4.4/xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch [new file with mode: 0644]

diff --git a/queue-4.4/ipv4-remove-bug_on-from-fib_compute_spec_dst.patch b/queue-4.4/ipv4-remove-bug_on-from-fib_compute_spec_dst.patch
new file mode 100644 (file)
index 0000000..21b0270
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Date: Fri, 27 Jul 2018 18:15:46 +0200
+Subject: ipv4: remove BUG_ON() from fib_compute_spec_dst
+
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+
+[ Upstream commit 9fc12023d6f51551d6ca9ed7e02ecc19d79caf17 ]
+
+Remove BUG_ON() from fib_compute_spec_dst routine and check
+in_dev pointer during flowi4 data structure initialization.
+fib_compute_spec_dst routine can be run concurrently with device removal
+where ip_ptr net_device pointer is set to NULL. This can happen
+if userspace enables pkt info on UDP rx socket and the device
+is removed while traffic is flowing
+
+Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper")
+Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -289,19 +289,19 @@ __be32 fib_compute_spec_dst(struct sk_bu
+               return ip_hdr(skb)->daddr;
+       in_dev = __in_dev_get_rcu(dev);
+-      BUG_ON(!in_dev);
+       net = dev_net(dev);
+       scope = RT_SCOPE_UNIVERSE;
+       if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
++              bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
+               struct flowi4 fl4 = {
+                       .flowi4_iif = LOOPBACK_IFINDEX,
+                       .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
+                       .daddr = ip_hdr(skb)->saddr,
+                       .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
+                       .flowi4_scope = scope,
+-                      .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
++                      .flowi4_mark = vmark ? skb->mark : 0,
+               };
+               if (!fib_lookup(net, &fl4, &res, 0))
+                       return FIB_RES_PREFSRC(net, res);
diff --git a/queue-4.4/net-fix-amd-xgbe-flow-control-issue.patch b/queue-4.4/net-fix-amd-xgbe-flow-control-issue.patch
new file mode 100644 (file)
index 0000000..80b2266
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: tangpengpeng <tangpengpeng@higon.com>
+Date: Thu, 26 Jul 2018 14:45:16 +0800
+Subject: net: fix amd-xgbe flow-control issue
+
+From: tangpengpeng <tangpengpeng@higon.com>
+
+[ Upstream commit 7f3fc7ddf719cd6faaf787722c511f6918ac6aab ]
+
+If we enable or disable xgbe flow-control by ethtool ,
+it does't work.Because the parameter is not properly
+assigned,so we need to adjust the assignment order
+of the parameters.
+
+Fixes: c1ce2f77366b ("amd-xgbe: Fix flow control setting logic")
+Signed-off-by: tangpengpeng <tangpengpeng@higon.com>
+Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-mdio.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+@@ -872,14 +872,14 @@ static void xgbe_phy_adjust_link(struct
+               if (pdata->tx_pause != pdata->phy.tx_pause) {
+                       new_state = 1;
+-                      pdata->hw_if.config_tx_flow_control(pdata);
+                       pdata->tx_pause = pdata->phy.tx_pause;
++                      pdata->hw_if.config_tx_flow_control(pdata);
+               }
+               if (pdata->rx_pause != pdata->phy.rx_pause) {
+                       new_state = 1;
+-                      pdata->hw_if.config_rx_flow_control(pdata);
+                       pdata->rx_pause = pdata->phy.rx_pause;
++                      pdata->hw_if.config_rx_flow_control(pdata);
+               }
+               /* Speed support */
diff --git a/queue-4.4/net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch b/queue-4.4/net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch
new file mode 100644 (file)
index 0000000..cf02835
--- /dev/null
@@ -0,0 +1,35 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Stefan Wahren <stefan.wahren@i2se.com>
+Date: Sat, 28 Jul 2018 09:52:10 +0200
+Subject: net: lan78xx: fix rx handling before first packet is send
+
+From: Stefan Wahren <stefan.wahren@i2se.com>
+
+[ Upstream commit 136f55f660192ce04af091642efc75d85e017364 ]
+
+As long the bh tasklet isn't scheduled once, no packet from the rx path
+will be handled. Since the tx path also schedule the same tasklet
+this situation only persits until the first packet transmission.
+So fix this issue by scheduling the tasklet after link reset.
+
+Link: https://github.com/raspberrypi/linux/issues/2617
+Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet")
+Suggested-by: Floris Bos <bos@je-eigen-domein.nl>
+Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/lan78xx.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -1361,6 +1361,8 @@ static void lan78xx_init_mac_address(str
+                       netif_dbg(dev, ifup, dev->net,
+                                 "MAC address set to random addr");
+               }
++
++              tasklet_schedule(&dev->bh);
+       }
+       ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
diff --git a/queue-4.4/net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch b/queue-4.4/net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch
new file mode 100644 (file)
index 0000000..91b3c28
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+Date: Thu, 26 Jul 2018 15:05:37 +0300
+Subject: NET: stmmac: align DMA stuff to largest cache line length
+
+From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+
+[ Upstream commit 9939a46d90c6c76f4533d534dbadfa7b39dc6acc ]
+
+As for today STMMAC_ALIGN macro (which is used to align DMA stuff)
+relies on L1 line length (L1_CACHE_BYTES).
+This isn't correct in case of system with several cache levels
+which might have L1 cache line length smaller than L2 line. This
+can lead to sharing one cache line between DMA buffer and other
+data, so we can lose this data while invalidate DMA buffer before
+DMA transaction.
+
+Fix that by using SMP_CACHE_BYTES instead of L1_CACHE_BYTES for
+aligning.
+
+Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -54,7 +54,7 @@
+ #include <linux/reset.h>
+ #include <linux/of_mdio.h>
+-#define STMMAC_ALIGN(x)       L1_CACHE_ALIGN(x)
++#define       STMMAC_ALIGN(x)         __ALIGN_KERNEL(x, SMP_CACHE_BYTES)
+ /* Module parameters */
+ #define TX_TIMEO      5000
diff --git a/queue-4.4/netlink-do-not-subscribe-to-non-existent-groups.patch b/queue-4.4/netlink-do-not-subscribe-to-non-existent-groups.patch
new file mode 100644 (file)
index 0000000..de809d2
--- /dev/null
@@ -0,0 +1,35 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Dmitry Safonov <dima@arista.com>
+Date: Fri, 27 Jul 2018 16:54:44 +0100
+Subject: netlink: Do not subscribe to non-existent groups
+
+From: Dmitry Safonov <dima@arista.com>
+
+[ Upstream commit 7acf9d4237c46894e0fa0492dd96314a41742e84 ]
+
+Make ABI more strict about subscribing to group > ngroups.
+Code doesn't check for that and it looks bogus.
+(one can subscribe to non-existing group)
+Still, it's possible to bind() to all possible groups with (-1)
+
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Cc: netdev@vger.kernel.org
+Signed-off-by: Dmitry Safonov <dima@arista.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -983,6 +983,7 @@ static int netlink_bind(struct socket *s
+               if (err)
+                       return err;
+       }
++      groups &= (1UL << nlk->ngroups) - 1;
+       bound = nlk->bound;
+       if (bound) {
diff --git a/queue-4.4/netlink-don-t-shift-with-ub-on-nlk-ngroups.patch b/queue-4.4/netlink-don-t-shift-with-ub-on-nlk-ngroups.patch
new file mode 100644 (file)
index 0000000..b0326b0
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Dmitry Safonov <dima@arista.com>
+Date: Mon, 30 Jul 2018 18:32:36 +0100
+Subject: netlink: Don't shift with UB on nlk->ngroups
+
+From: Dmitry Safonov <dima@arista.com>
+
+[ Upstream commit 61f4b23769f0cc72ae62c9a81cf08f0397d40da8 ]
+
+On i386 nlk->ngroups might be 32 or 0. Which leads to UB, resulting in
+hang during boot.
+Check for 0 ngroups and use (unsigned long long) as a type to shift.
+
+Fixes: 7acf9d4237c4 ("netlink: Do not subscribe to non-existent groups").
+Reported-by: kernel test robot <rong.a.chen@intel.com>
+Signed-off-by: Dmitry Safonov <dima@arista.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -983,7 +983,11 @@ static int netlink_bind(struct socket *s
+               if (err)
+                       return err;
+       }
+-      groups &= (1UL << nlk->ngroups) - 1;
++
++      if (nlk->ngroups == 0)
++              groups = 0;
++      else
++              groups &= (1ULL << nlk->ngroups) - 1;
+       bound = nlk->bound;
+       if (bound) {
index b12970406bace0763013bd3ae87865489d75ae82..7e383bb9bc8d9601dcbc937816a6cf3c49c99797 100644 (file)
@@ -99,3 +99,15 @@ ext4-fix-inline-data-updates-with-checksums-enabled.patch
 ext4-check-for-allocation-block-validity-with-block-group-locked.patch
 dmaengine-pxa_dma-remove-duplicate-const-qualifier.patch
 asoc-pxa-fix-module-autoload-for-platform-drivers.patch
+ipv4-remove-bug_on-from-fib_compute_spec_dst.patch
+net-fix-amd-xgbe-flow-control-issue.patch
+net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch
+xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch
+netlink-do-not-subscribe-to-non-existent-groups.patch
+netlink-don-t-shift-with-ub-on-nlk-ngroups.patch
+net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch
+tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch
+tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch
+tcp-do-not-aggressively-quick-ack-after-ecn-events.patch
+tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch
+tcp-add-one-more-quick-ack-after-after-ecn-events.patch
diff --git a/queue-4.4/tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch b/queue-4.4/tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch
new file mode 100644 (file)
index 0000000..64e5593
--- /dev/null
@@ -0,0 +1,150 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 21 May 2018 15:08:56 -0700
+Subject: tcp: add max_quickacks param to tcp_incr_quickack and tcp_enter_quickack_mode
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9a9c9b51e54618861420093ae6e9b50a961914c5 ]
+
+We want to add finer control of the number of ACK packets sent after
+ECN events.
+
+This patch is not changing current behavior, it only enables following
+change.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h    |    2 +-
+ net/ipv4/tcp_dctcp.c |    4 ++--
+ net/ipv4/tcp_input.c |   24 +++++++++++++-----------
+ 3 files changed, 16 insertions(+), 14 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -376,7 +376,7 @@ ssize_t tcp_splice_read(struct socket *s
+                       struct pipe_inode_info *pipe, size_t len,
+                       unsigned int flags);
+-void tcp_enter_quickack_mode(struct sock *sk);
++void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
+ static inline void tcp_dec_quickack_mode(struct sock *sk,
+                                        const unsigned int pkts)
+ {
+--- a/net/ipv4/tcp_dctcp.c
++++ b/net/ipv4/tcp_dctcp.c
+@@ -138,7 +138,7 @@ static void dctcp_ce_state_0_to_1(struct
+                */
+               if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+                       __tcp_send_ack(sk, ca->prior_rcv_nxt);
+-              tcp_enter_quickack_mode(sk);
++              tcp_enter_quickack_mode(sk, 1);
+       }
+       ca->prior_rcv_nxt = tp->rcv_nxt;
+@@ -159,7 +159,7 @@ static void dctcp_ce_state_1_to_0(struct
+                */
+               if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+                       __tcp_send_ack(sk, ca->prior_rcv_nxt);
+-              tcp_enter_quickack_mode(sk);
++              tcp_enter_quickack_mode(sk, 1);
+       }
+       ca->prior_rcv_nxt = tp->rcv_nxt;
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -176,21 +176,23 @@ static void tcp_measure_rcv_mss(struct s
+       }
+ }
+-static void tcp_incr_quickack(struct sock *sk)
++static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
+       if (quickacks == 0)
+               quickacks = 2;
++      quickacks = min(quickacks, max_quickacks);
+       if (quickacks > icsk->icsk_ack.quick)
+-              icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
++              icsk->icsk_ack.quick = quickacks;
+ }
+-void tcp_enter_quickack_mode(struct sock *sk)
++void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
+-      tcp_incr_quickack(sk);
++
++      tcp_incr_quickack(sk, max_quickacks);
+       icsk->icsk_ack.pingpong = 0;
+       icsk->icsk_ack.ato = TCP_ATO_MIN;
+ }
+@@ -235,7 +237,7 @@ static void __tcp_ecn_check_ce(struct tc
+                * it is probably a retransmit.
+                */
+               if (tp->ecn_flags & TCP_ECN_SEEN)
+-                      tcp_enter_quickack_mode((struct sock *)tp);
++                      tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
+               break;
+       case INET_ECN_CE:
+               if (tcp_ca_needs_ecn((struct sock *)tp))
+@@ -243,7 +245,7 @@ static void __tcp_ecn_check_ce(struct tc
+               if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+                       /* Better not delay acks, sender can have a very low cwnd */
+-                      tcp_enter_quickack_mode((struct sock *)tp);
++                      tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
+                       tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+               }
+               tp->ecn_flags |= TCP_ECN_SEEN;
+@@ -651,7 +653,7 @@ static void tcp_event_data_recv(struct s
+               /* The _first_ data packet received, initialize
+                * delayed ACK engine.
+                */
+-              tcp_incr_quickack(sk);
++              tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
+               icsk->icsk_ack.ato = TCP_ATO_MIN;
+       } else {
+               int m = now - icsk->icsk_ack.lrcvtime;
+@@ -667,7 +669,7 @@ static void tcp_event_data_recv(struct s
+                       /* Too long gap. Apparently sender failed to
+                        * restart window, so that we send ACKs quickly.
+                        */
+-                      tcp_incr_quickack(sk);
++                      tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
+                       sk_mem_reclaim(sk);
+               }
+       }
+@@ -4136,7 +4138,7 @@ static void tcp_send_dupack(struct sock
+       if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+           before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+-              tcp_enter_quickack_mode(sk);
++              tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+               if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+                       u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+@@ -4638,7 +4640,7 @@ queue_and_out:
+               tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ out_of_window:
+-              tcp_enter_quickack_mode(sk);
++              tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+               inet_csk_schedule_ack(sk);
+ drop:
+               __kfree_skb(skb);
+@@ -5674,7 +5676,7 @@ static int tcp_rcv_synsent_state_process
+                        * to stand against the temptation 8)     --ANK
+                        */
+                       inet_csk_schedule_ack(sk);
+-                      tcp_enter_quickack_mode(sk);
++                      tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                 TCP_DELACK_MAX, TCP_RTO_MAX);
diff --git a/queue-4.4/tcp-add-one-more-quick-ack-after-after-ecn-events.patch b/queue-4.4/tcp-add-one-more-quick-ack-after-after-ecn-events.patch
new file mode 100644 (file)
index 0000000..cd6bb03
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 27 Jun 2018 08:47:21 -0700
+Subject: tcp: add one more quick ack after after ECN events
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 15ecbe94a45ef88491ca459b26efdd02f91edb6d ]
+
+Larry Brakmo proposal ( https://patchwork.ozlabs.org/patch/935233/
+tcp: force cwnd at least 2 in tcp_cwnd_reduction) made us rethink
+about our recent patch removing ~16 quick acks after ECN events.
+
+tcp_enter_quickack_mode(sk, 1) makes sure one immediate ack is sent,
+but in the case the sender cwnd was lowered to 1, we do not want
+to have a delayed ack for the next packet we will receive.
+
+Fixes: 522040ea5fdd ("tcp: do not aggressively quick ack after ECN events")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Neal Cardwell <ncardwell@google.com>
+Cc: Lawrence Brakmo <brakmo@fb.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -239,7 +239,7 @@ static void __tcp_ecn_check_ce(struct so
+                * it is probably a retransmit.
+                */
+               if (tp->ecn_flags & TCP_ECN_SEEN)
+-                      tcp_enter_quickack_mode(sk, 1);
++                      tcp_enter_quickack_mode(sk, 2);
+               break;
+       case INET_ECN_CE:
+               if (tcp_ca_needs_ecn(sk))
+@@ -247,7 +247,7 @@ static void __tcp_ecn_check_ce(struct so
+               if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+                       /* Better not delay acks, sender can have a very low cwnd */
+-                      tcp_enter_quickack_mode(sk, 1);
++                      tcp_enter_quickack_mode(sk, 2);
+                       tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+               }
+               tp->ecn_flags |= TCP_ECN_SEEN;
diff --git a/queue-4.4/tcp-do-not-aggressively-quick-ack-after-ecn-events.patch b/queue-4.4/tcp-do-not-aggressively-quick-ack-after-ecn-events.patch
new file mode 100644 (file)
index 0000000..349a035
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 21 May 2018 15:08:57 -0700
+Subject: tcp: do not aggressively quick ack after ECN events
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 522040ea5fdd1c33bbf75e1d7c7c0422b96a94ef ]
+
+ECN signals currently forces TCP to enter quickack mode for
+up to 16 (TCP_MAX_QUICKACKS) following incoming packets.
+
+We believe this is not needed, and only sending one immediate ack
+for the current packet should be enough.
+
+This should reduce the extra load noticed in DCTCP environments,
+after congestion events.
+
+This is part 2 of our effort to reduce pure ACK packets.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -237,7 +237,7 @@ static void __tcp_ecn_check_ce(struct tc
+                * it is probably a retransmit.
+                */
+               if (tp->ecn_flags & TCP_ECN_SEEN)
+-                      tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
++                      tcp_enter_quickack_mode((struct sock *)tp, 1);
+               break;
+       case INET_ECN_CE:
+               if (tcp_ca_needs_ecn((struct sock *)tp))
+@@ -245,7 +245,7 @@ static void __tcp_ecn_check_ce(struct tc
+               if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+                       /* Better not delay acks, sender can have a very low cwnd */
+-                      tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
++                      tcp_enter_quickack_mode((struct sock *)tp, 1);
+                       tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+               }
+               tp->ecn_flags |= TCP_ECN_SEEN;
diff --git a/queue-4.4/tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch b/queue-4.4/tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch
new file mode 100644 (file)
index 0000000..e53d03d
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 17 May 2018 14:47:25 -0700
+Subject: tcp: do not force quickack when receiving out-of-order packets
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a3893637e1eb0ef5eb1bbc52b3a8d2dfa317a35d ]
+
+As explained in commit 9f9843a751d0 ("tcp: properly handle stretch
+acks in slow start"), TCP stacks have to consider how many packets
+are acknowledged in one single ACK, because of GRO, but also
+because of ACK compression or losses.
+
+We plan to add SACK compression in the following patch, we
+must therefore not call tcp_enter_quickack_mode()
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4649,8 +4649,6 @@ drop:
+       if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
+               goto out_of_window;
+-      tcp_enter_quickack_mode(sk);
+-
+       if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+               /* Partial packet, seq < rcv_next < end_seq */
+               SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
diff --git a/queue-4.4/tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch b/queue-4.4/tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch
new file mode 100644 (file)
index 0000000..55cf493
--- /dev/null
@@ -0,0 +1,97 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Yousuk Seung <ysseung@google.com>
+Date: Mon, 4 Jun 2018 15:29:51 -0700
+Subject: tcp: refactor tcp_ecn_check_ce to remove sk type cast
+
+From: Yousuk Seung <ysseung@google.com>
+
+[ Upstream commit f4c9f85f3b2cb7669830cd04d0be61192a4d2436 ]
+
+Refactor tcp_ecn_check_ce and __tcp_ecn_check_ce to accept struct sock*
+instead of tcp_sock* to clean up type casts. This is a pure refactor
+patch.
+
+Signed-off-by: Yousuk Seung <ysseung@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |   26 ++++++++++++++------------
+ 1 file changed, 14 insertions(+), 12 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -228,8 +228,10 @@ static void tcp_ecn_withdraw_cwr(struct
+       tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+ }
+-static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
++static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
+ {
++      struct tcp_sock *tp = tcp_sk(sk);
++
+       switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
+       case INET_ECN_NOT_ECT:
+               /* Funny extension: if ECT is not set on a segment,
+@@ -237,31 +239,31 @@ static void __tcp_ecn_check_ce(struct tc
+                * it is probably a retransmit.
+                */
+               if (tp->ecn_flags & TCP_ECN_SEEN)
+-                      tcp_enter_quickack_mode((struct sock *)tp, 1);
++                      tcp_enter_quickack_mode(sk, 1);
+               break;
+       case INET_ECN_CE:
+-              if (tcp_ca_needs_ecn((struct sock *)tp))
+-                      tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
++              if (tcp_ca_needs_ecn(sk))
++                      tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
+               if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+                       /* Better not delay acks, sender can have a very low cwnd */
+-                      tcp_enter_quickack_mode((struct sock *)tp, 1);
++                      tcp_enter_quickack_mode(sk, 1);
+                       tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+               }
+               tp->ecn_flags |= TCP_ECN_SEEN;
+               break;
+       default:
+-              if (tcp_ca_needs_ecn((struct sock *)tp))
+-                      tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
++              if (tcp_ca_needs_ecn(sk))
++                      tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
+               tp->ecn_flags |= TCP_ECN_SEEN;
+               break;
+       }
+ }
+-static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
++static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
+ {
+-      if (tp->ecn_flags & TCP_ECN_OK)
+-              __tcp_ecn_check_ce(tp, skb);
++      if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
++              __tcp_ecn_check_ce(sk, skb);
+ }
+ static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
+@@ -675,7 +677,7 @@ static void tcp_event_data_recv(struct s
+       }
+       icsk->icsk_ack.lrcvtime = now;
+-      tcp_ecn_check_ce(tp, skb);
++      tcp_ecn_check_ce(sk, skb);
+       if (skb->len >= 128)
+               tcp_grow_window(sk, skb);
+@@ -4366,7 +4368,7 @@ static void tcp_data_queue_ofo(struct so
+       struct sk_buff *skb1;
+       u32 seq, end_seq;
+-      tcp_ecn_check_ce(tp, skb);
++      tcp_ecn_check_ce(sk, skb);
+       if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
diff --git a/queue-4.4/xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch b/queue-4.4/xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch
new file mode 100644 (file)
index 0000000..961708a
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Fri Aug  3 21:25:07 CEST 2018
+From: Xiao Liang <xiliang@redhat.com>
+Date: Fri, 27 Jul 2018 17:56:08 +0800
+Subject: xen-netfront: wait xenbus state change when load module manually
+
+From: Xiao Liang <xiliang@redhat.com>
+
+[ Upstream commit 822fb18a82abaf4ee7058793d95d340f5dab7bfc ]
+
+When loading module manually, after call xenbus_switch_state to initializes
+the state of the netfront device, the driver state did not change so fast
+that may lead no dev created in latest kernel. This patch adds wait to make
+sure xenbus knows the driver is not in closed/unknown state.
+
+Current state:
+[vm]# ethtool eth0
+Settings for eth0:
+       Link detected: yes
+[vm]# modprobe -r xen_netfront
+[vm]# modprobe  xen_netfront
+[vm]# ethtool eth0
+Settings for eth0:
+Cannot get device settings: No such device
+Cannot get wake-on-lan settings: No such device
+Cannot get message level: No such device
+Cannot get link status: No such device
+No data available
+
+With the patch installed.
+[vm]# ethtool eth0
+Settings for eth0:
+       Link detected: yes
+[vm]# modprobe -r xen_netfront
+[vm]# modprobe xen_netfront
+[vm]# ethtool eth0
+Settings for eth0:
+       Link detected: yes
+
+Signed-off-by: Xiao Liang <xiliang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netfront.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -86,6 +86,7 @@ struct netfront_cb {
+ /* IRQ name is queue name with "-tx" or "-rx" appended */
+ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
++static DECLARE_WAIT_QUEUE_HEAD(module_load_q);
+ static DECLARE_WAIT_QUEUE_HEAD(module_unload_q);
+ struct netfront_stats {
+@@ -1335,6 +1336,11 @@ static struct net_device *xennet_create_
+       netif_carrier_off(netdev);
+       xenbus_switch_state(dev, XenbusStateInitialising);
++      wait_event(module_load_q,
++                         xenbus_read_driver_state(dev->otherend) !=
++                         XenbusStateClosed &&
++                         xenbus_read_driver_state(dev->otherend) !=
++                         XenbusStateUnknown);
+       return netdev;
+  exit: