From: Greg Kroah-Hartman Date: Fri, 3 Aug 2018 19:44:23 +0000 (+0200) Subject: 4.4-stable patches X-Git-Tag: v4.17.13~18 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d81e56f68660552c617a0dc04db7b6a38860e9d7;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: ipv4-remove-bug_on-from-fib_compute_spec_dst.patch net-fix-amd-xgbe-flow-control-issue.patch net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch netlink-do-not-subscribe-to-non-existent-groups.patch netlink-don-t-shift-with-ub-on-nlk-ngroups.patch tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch tcp-add-one-more-quick-ack-after-after-ecn-events.patch tcp-do-not-aggressively-quick-ack-after-ecn-events.patch tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch --- diff --git a/queue-4.4/ipv4-remove-bug_on-from-fib_compute_spec_dst.patch b/queue-4.4/ipv4-remove-bug_on-from-fib_compute_spec_dst.patch new file mode 100644 index 00000000000..21b02708bda --- /dev/null +++ b/queue-4.4/ipv4-remove-bug_on-from-fib_compute_spec_dst.patch @@ -0,0 +1,48 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Lorenzo Bianconi +Date: Fri, 27 Jul 2018 18:15:46 +0200 +Subject: ipv4: remove BUG_ON() from fib_compute_spec_dst + +From: Lorenzo Bianconi + +[ Upstream commit 9fc12023d6f51551d6ca9ed7e02ecc19d79caf17 ] + +Remove BUG_ON() from fib_compute_spec_dst routine and check +in_dev pointer during flowi4 data structure initialization. +fib_compute_spec_dst routine can be run concurrently with device removal +where ip_ptr net_device pointer is set to NULL. This can happen +if userspace enables pkt info on UDP rx socket and the device +is removed while traffic is flowing + +Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper") +Signed-off-by: Lorenzo Bianconi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_frontend.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -289,19 +289,19 @@ __be32 fib_compute_spec_dst(struct sk_bu + return ip_hdr(skb)->daddr; + + in_dev = __in_dev_get_rcu(dev); +- BUG_ON(!in_dev); + + net = dev_net(dev); + + scope = RT_SCOPE_UNIVERSE; + if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { ++ bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); + struct flowi4 fl4 = { + .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_oif = l3mdev_master_ifindex_rcu(dev), + .daddr = ip_hdr(skb)->saddr, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_scope = scope, +- .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, ++ .flowi4_mark = vmark ? skb->mark : 0, + }; + if (!fib_lookup(net, &fl4, &res, 0)) + return FIB_RES_PREFSRC(net, res); diff --git a/queue-4.4/net-fix-amd-xgbe-flow-control-issue.patch b/queue-4.4/net-fix-amd-xgbe-flow-control-issue.patch new file mode 100644 index 00000000000..80b22664c9c --- /dev/null +++ b/queue-4.4/net-fix-amd-xgbe-flow-control-issue.patch @@ -0,0 +1,42 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: tangpengpeng +Date: Thu, 26 Jul 2018 14:45:16 +0800 +Subject: net: fix amd-xgbe flow-control issue + +From: tangpengpeng + +[ Upstream commit 7f3fc7ddf719cd6faaf787722c511f6918ac6aab ] + +If we enable or disable xgbe flow-control by ethtool , +it does't work.Because the parameter is not properly +assigned,so we need to adjust the assignment order +of the parameters. + +Fixes: c1ce2f77366b ("amd-xgbe: Fix flow control setting logic") +Signed-off-by: tangpengpeng +Acked-by: Tom Lendacky +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +@@ -872,14 +872,14 @@ static void xgbe_phy_adjust_link(struct + + if (pdata->tx_pause != pdata->phy.tx_pause) { + new_state = 1; +- pdata->hw_if.config_tx_flow_control(pdata); + pdata->tx_pause = pdata->phy.tx_pause; ++ pdata->hw_if.config_tx_flow_control(pdata); + } + + if (pdata->rx_pause != pdata->phy.rx_pause) { + new_state = 1; +- pdata->hw_if.config_rx_flow_control(pdata); + pdata->rx_pause = pdata->phy.rx_pause; ++ pdata->hw_if.config_rx_flow_control(pdata); + } + + /* Speed support */ diff --git a/queue-4.4/net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch b/queue-4.4/net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch new file mode 100644 index 00000000000..cf02835832d --- /dev/null +++ b/queue-4.4/net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch @@ -0,0 +1,35 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Stefan Wahren +Date: Sat, 28 Jul 2018 09:52:10 +0200 +Subject: net: lan78xx: fix rx handling before first packet is send + +From: Stefan Wahren + +[ Upstream commit 136f55f660192ce04af091642efc75d85e017364 ] + +As long the bh tasklet isn't scheduled once, no packet from the rx path +will be handled. Since the tx path also schedule the same tasklet +this situation only persits until the first packet transmission. +So fix this issue by scheduling the tasklet after link reset. + +Link: https://github.com/raspberrypi/linux/issues/2617 +Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet") +Suggested-by: Floris Bos +Signed-off-by: Stefan Wahren +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/lan78xx.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -1361,6 +1361,8 @@ static void lan78xx_init_mac_address(str + netif_dbg(dev, ifup, dev->net, + "MAC address set to random addr"); + } ++ ++ tasklet_schedule(&dev->bh); + } + + ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); diff --git a/queue-4.4/net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch b/queue-4.4/net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch new file mode 100644 index 00000000000..91b3c2846e2 --- /dev/null +++ b/queue-4.4/net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch @@ -0,0 +1,38 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Eugeniy Paltsev +Date: Thu, 26 Jul 2018 15:05:37 +0300 +Subject: NET: stmmac: align DMA stuff to largest cache line length + +From: Eugeniy Paltsev + +[ Upstream commit 9939a46d90c6c76f4533d534dbadfa7b39dc6acc ] + +As for today STMMAC_ALIGN macro (which is used to align DMA stuff) +relies on L1 line length (L1_CACHE_BYTES). +This isn't correct in case of system with several cache levels +which might have L1 cache line length smaller than L2 line. This +can lead to sharing one cache line between DMA buffer and other +data, so we can lose this data while invalidate DMA buffer before +DMA transaction. + +Fix that by using SMP_CACHE_BYTES instead of L1_CACHE_BYTES for +aligning. + +Signed-off-by: Eugeniy Paltsev +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -54,7 +54,7 @@ + #include + #include + +-#define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x) ++#define STMMAC_ALIGN(x) __ALIGN_KERNEL(x, SMP_CACHE_BYTES) + + /* Module parameters */ + #define TX_TIMEO 5000 diff --git a/queue-4.4/netlink-do-not-subscribe-to-non-existent-groups.patch b/queue-4.4/netlink-do-not-subscribe-to-non-existent-groups.patch new file mode 100644 index 00000000000..de809d2eac0 --- /dev/null +++ b/queue-4.4/netlink-do-not-subscribe-to-non-existent-groups.patch @@ -0,0 +1,35 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Dmitry Safonov +Date: Fri, 27 Jul 2018 16:54:44 +0100 +Subject: netlink: Do not subscribe to non-existent groups + +From: Dmitry Safonov + +[ Upstream commit 7acf9d4237c46894e0fa0492dd96314a41742e84 ] + +Make ABI more strict about subscribing to group > ngroups. +Code doesn't check for that and it looks bogus. +(one can subscribe to non-existing group) +Still, it's possible to bind() to all possible groups with (-1) + +Cc: "David S. Miller" +Cc: Herbert Xu +Cc: Steffen Klassert +Cc: netdev@vger.kernel.org +Signed-off-by: Dmitry Safonov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -983,6 +983,7 @@ static int netlink_bind(struct socket *s + if (err) + return err; + } ++ groups &= (1UL << nlk->ngroups) - 1; + + bound = nlk->bound; + if (bound) { diff --git a/queue-4.4/netlink-don-t-shift-with-ub-on-nlk-ngroups.patch b/queue-4.4/netlink-don-t-shift-with-ub-on-nlk-ngroups.patch new file mode 100644 index 00000000000..b0326b0d5d0 --- /dev/null +++ b/queue-4.4/netlink-don-t-shift-with-ub-on-nlk-ngroups.patch @@ -0,0 +1,37 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Dmitry Safonov +Date: Mon, 30 Jul 2018 18:32:36 +0100 +Subject: netlink: Don't shift with UB on nlk->ngroups + +From: Dmitry Safonov + +[ Upstream commit 61f4b23769f0cc72ae62c9a81cf08f0397d40da8 ] + +On i386 nlk->ngroups might be 32 or 0. Which leads to UB, resulting in +hang during boot. +Check for 0 ngroups and use (unsigned long long) as a type to shift. + +Fixes: 7acf9d4237c4 ("netlink: Do not subscribe to non-existent groups"). +Reported-by: kernel test robot +Signed-off-by: Dmitry Safonov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -983,7 +983,11 @@ static int netlink_bind(struct socket *s + if (err) + return err; + } +- groups &= (1UL << nlk->ngroups) - 1; ++ ++ if (nlk->ngroups == 0) ++ groups = 0; ++ else ++ groups &= (1ULL << nlk->ngroups) - 1; + + bound = nlk->bound; + if (bound) { diff --git a/queue-4.4/series b/queue-4.4/series index b12970406ba..7e383bb9bc8 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -99,3 +99,15 @@ ext4-fix-inline-data-updates-with-checksums-enabled.patch ext4-check-for-allocation-block-validity-with-block-group-locked.patch dmaengine-pxa_dma-remove-duplicate-const-qualifier.patch asoc-pxa-fix-module-autoload-for-platform-drivers.patch +ipv4-remove-bug_on-from-fib_compute_spec_dst.patch +net-fix-amd-xgbe-flow-control-issue.patch +net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch +xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch +netlink-do-not-subscribe-to-non-existent-groups.patch +netlink-don-t-shift-with-ub-on-nlk-ngroups.patch +net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch +tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch +tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch +tcp-do-not-aggressively-quick-ack-after-ecn-events.patch +tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch +tcp-add-one-more-quick-ack-after-after-ecn-events.patch diff --git a/queue-4.4/tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch b/queue-4.4/tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch new file mode 100644 index 00000000000..64e55938f72 --- /dev/null +++ b/queue-4.4/tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch @@ -0,0 +1,150 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Eric Dumazet +Date: Mon, 21 May 2018 15:08:56 -0700 +Subject: tcp: add max_quickacks param to tcp_incr_quickack and tcp_enter_quickack_mode + +From: Eric Dumazet + +[ Upstream commit 9a9c9b51e54618861420093ae6e9b50a961914c5 ] + +We want to add finer control of the number of ACK packets sent after +ECN events. + +This patch is not changing current behavior, it only enables following +change. + +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 2 +- + net/ipv4/tcp_dctcp.c | 4 ++-- + net/ipv4/tcp_input.c | 24 +++++++++++++----------- + 3 files changed, 16 insertions(+), 14 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -376,7 +376,7 @@ ssize_t tcp_splice_read(struct socket *s + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); + +-void tcp_enter_quickack_mode(struct sock *sk); ++void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks); + static inline void tcp_dec_quickack_mode(struct sock *sk, + const unsigned int pkts) + { +--- a/net/ipv4/tcp_dctcp.c ++++ b/net/ipv4/tcp_dctcp.c +@@ -138,7 +138,7 @@ static void dctcp_ce_state_0_to_1(struct + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); +- tcp_enter_quickack_mode(sk); ++ tcp_enter_quickack_mode(sk, 1); + } + + ca->prior_rcv_nxt = tp->rcv_nxt; +@@ -159,7 +159,7 @@ static void dctcp_ce_state_1_to_0(struct + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); +- tcp_enter_quickack_mode(sk); ++ tcp_enter_quickack_mode(sk, 1); + } + + ca->prior_rcv_nxt = tp->rcv_nxt; +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -176,21 +176,23 @@ static void tcp_measure_rcv_mss(struct s + } + } + +-static void tcp_incr_quickack(struct sock *sk) ++static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) + { + struct inet_connection_sock *icsk = inet_csk(sk); + unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); + + if (quickacks == 0) + quickacks = 2; ++ quickacks = min(quickacks, max_quickacks); + if (quickacks > icsk->icsk_ack.quick) +- icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); ++ icsk->icsk_ack.quick = quickacks; + } + +-void tcp_enter_quickack_mode(struct sock *sk) ++void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) + { + struct inet_connection_sock *icsk = inet_csk(sk); +- tcp_incr_quickack(sk); ++ ++ tcp_incr_quickack(sk, max_quickacks); + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; + } +@@ -235,7 +237,7 @@ static void __tcp_ecn_check_ce(struct tc + * it is probably a retransmit. + */ + if (tp->ecn_flags & TCP_ECN_SEEN) +- tcp_enter_quickack_mode((struct sock *)tp); ++ tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); + break; + case INET_ECN_CE: + if (tcp_ca_needs_ecn((struct sock *)tp)) +@@ -243,7 +245,7 @@ static void __tcp_ecn_check_ce(struct tc + + if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { + /* Better not delay acks, sender can have a very low cwnd */ +- tcp_enter_quickack_mode((struct sock *)tp); ++ tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + } + tp->ecn_flags |= TCP_ECN_SEEN; +@@ -651,7 +653,7 @@ static void tcp_event_data_recv(struct s + /* The _first_ data packet received, initialize + * delayed ACK engine. + */ +- tcp_incr_quickack(sk); ++ tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); + icsk->icsk_ack.ato = TCP_ATO_MIN; + } else { + int m = now - icsk->icsk_ack.lrcvtime; +@@ -667,7 +669,7 @@ static void tcp_event_data_recv(struct s + /* Too long gap. Apparently sender failed to + * restart window, so that we send ACKs quickly. + */ +- tcp_incr_quickack(sk); ++ tcp_incr_quickack(sk, TCP_MAX_QUICKACKS); + sk_mem_reclaim(sk); + } + } +@@ -4136,7 +4138,7 @@ static void tcp_send_dupack(struct sock + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); +- tcp_enter_quickack_mode(sk); ++ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); + + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + u32 end_seq = TCP_SKB_CB(skb)->end_seq; +@@ -4638,7 +4640,7 @@ queue_and_out: + tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + + out_of_window: +- tcp_enter_quickack_mode(sk); ++ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); + inet_csk_schedule_ack(sk); + drop: + __kfree_skb(skb); +@@ -5674,7 +5676,7 @@ static int tcp_rcv_synsent_state_process + * to stand against the temptation 8) --ANK + */ + inet_csk_schedule_ack(sk); +- tcp_enter_quickack_mode(sk); ++ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MAX, TCP_RTO_MAX); + diff --git a/queue-4.4/tcp-add-one-more-quick-ack-after-after-ecn-events.patch b/queue-4.4/tcp-add-one-more-quick-ack-after-after-ecn-events.patch new file mode 100644 index 00000000000..cd6bb03d669 --- /dev/null +++ b/queue-4.4/tcp-add-one-more-quick-ack-after-after-ecn-events.patch @@ -0,0 +1,48 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Eric Dumazet +Date: Wed, 27 Jun 2018 08:47:21 -0700 +Subject: tcp: add one more quick ack after after ECN events + +From: Eric Dumazet + +[ Upstream commit 15ecbe94a45ef88491ca459b26efdd02f91edb6d ] + +Larry Brakmo proposal ( https://patchwork.ozlabs.org/patch/935233/ +tcp: force cwnd at least 2 in tcp_cwnd_reduction) made us rethink +about our recent patch removing ~16 quick acks after ECN events. + +tcp_enter_quickack_mode(sk, 1) makes sure one immediate ack is sent, +but in the case the sender cwnd was lowered to 1, we do not want +to have a delayed ack for the next packet we will receive. + +Fixes: 522040ea5fdd ("tcp: do not aggressively quick ack after ECN events") +Signed-off-by: Eric Dumazet +Reported-by: Neal Cardwell +Cc: Lawrence Brakmo +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -239,7 +239,7 @@ static void __tcp_ecn_check_ce(struct so + * it is probably a retransmit. + */ + if (tp->ecn_flags & TCP_ECN_SEEN) +- tcp_enter_quickack_mode(sk, 1); ++ tcp_enter_quickack_mode(sk, 2); + break; + case INET_ECN_CE: + if (tcp_ca_needs_ecn(sk)) +@@ -247,7 +247,7 @@ static void __tcp_ecn_check_ce(struct so + + if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { + /* Better not delay acks, sender can have a very low cwnd */ +- tcp_enter_quickack_mode(sk, 1); ++ tcp_enter_quickack_mode(sk, 2); + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + } + tp->ecn_flags |= TCP_ECN_SEEN; diff --git a/queue-4.4/tcp-do-not-aggressively-quick-ack-after-ecn-events.patch b/queue-4.4/tcp-do-not-aggressively-quick-ack-after-ecn-events.patch new file mode 100644 index 00000000000..349a03538d7 --- /dev/null +++ b/queue-4.4/tcp-do-not-aggressively-quick-ack-after-ecn-events.patch @@ -0,0 +1,50 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Eric Dumazet +Date: Mon, 21 May 2018 15:08:57 -0700 +Subject: tcp: do not aggressively quick ack after ECN events + +From: Eric Dumazet + +[ Upstream commit 522040ea5fdd1c33bbf75e1d7c7c0422b96a94ef ] + +ECN signals currently forces TCP to enter quickack mode for +up to 16 (TCP_MAX_QUICKACKS) following incoming packets. + +We believe this is not needed, and only sending one immediate ack +for the current packet should be enough. + +This should reduce the extra load noticed in DCTCP environments, +after congestion events. + +This is part 2 of our effort to reduce pure ACK packets. + +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Acked-by: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -237,7 +237,7 @@ static void __tcp_ecn_check_ce(struct tc + * it is probably a retransmit. + */ + if (tp->ecn_flags & TCP_ECN_SEEN) +- tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); ++ tcp_enter_quickack_mode((struct sock *)tp, 1); + break; + case INET_ECN_CE: + if (tcp_ca_needs_ecn((struct sock *)tp)) +@@ -245,7 +245,7 @@ static void __tcp_ecn_check_ce(struct tc + + if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { + /* Better not delay acks, sender can have a very low cwnd */ +- tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS); ++ tcp_enter_quickack_mode((struct sock *)tp, 1); + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + } + tp->ecn_flags |= TCP_ECN_SEEN; diff --git a/queue-4.4/tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch b/queue-4.4/tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch new file mode 100644 index 00000000000..e53d03debe9 --- /dev/null +++ b/queue-4.4/tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch @@ -0,0 +1,37 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Eric Dumazet +Date: Thu, 17 May 2018 14:47:25 -0700 +Subject: tcp: do not force quickack when receiving out-of-order packets + +From: Eric Dumazet + +[ Upstream commit a3893637e1eb0ef5eb1bbc52b3a8d2dfa317a35d ] + +As explained in commit 9f9843a751d0 ("tcp: properly handle stretch +acks in slow start"), TCP stacks have to consider how many packets +are acknowledged in one single ACK, because of GRO, but also +because of ACK compression or losses. + +We plan to add SACK compression in the following patch, we +must therefore not call tcp_enter_quickack_mode() + +Signed-off-by: Eric Dumazet +Acked-by: Neal Cardwell +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -4649,8 +4649,6 @@ drop: + if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) + goto out_of_window; + +- tcp_enter_quickack_mode(sk); +- + if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { + /* Partial packet, seq < rcv_next < end_seq */ + SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n", diff --git a/queue-4.4/tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch b/queue-4.4/tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch new file mode 100644 index 00000000000..55cf493380e --- /dev/null +++ b/queue-4.4/tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch @@ -0,0 +1,97 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Yousuk Seung +Date: Mon, 4 Jun 2018 15:29:51 -0700 +Subject: tcp: refactor tcp_ecn_check_ce to remove sk type cast + +From: Yousuk Seung + +[ Upstream commit f4c9f85f3b2cb7669830cd04d0be61192a4d2436 ] + +Refactor tcp_ecn_check_ce and __tcp_ecn_check_ce to accept struct sock* +instead of tcp_sock* to clean up type casts. This is a pure refactor +patch. + +Signed-off-by: Yousuk Seung +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 26 ++++++++++++++------------ + 1 file changed, 14 insertions(+), 12 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -228,8 +228,10 @@ static void tcp_ecn_withdraw_cwr(struct + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + } + +-static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) ++static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) + { ++ struct tcp_sock *tp = tcp_sk(sk); ++ + switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { + case INET_ECN_NOT_ECT: + /* Funny extension: if ECT is not set on a segment, +@@ -237,31 +239,31 @@ static void __tcp_ecn_check_ce(struct tc + * it is probably a retransmit. + */ + if (tp->ecn_flags & TCP_ECN_SEEN) +- tcp_enter_quickack_mode((struct sock *)tp, 1); ++ tcp_enter_quickack_mode(sk, 1); + break; + case INET_ECN_CE: +- if (tcp_ca_needs_ecn((struct sock *)tp)) +- tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE); ++ if (tcp_ca_needs_ecn(sk)) ++ tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); + + if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { + /* Better not delay acks, sender can have a very low cwnd */ +- tcp_enter_quickack_mode((struct sock *)tp, 1); ++ tcp_enter_quickack_mode(sk, 1); + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + } + tp->ecn_flags |= TCP_ECN_SEEN; + break; + default: +- if (tcp_ca_needs_ecn((struct sock *)tp)) +- tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE); ++ if (tcp_ca_needs_ecn(sk)) ++ tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); + tp->ecn_flags |= TCP_ECN_SEEN; + break; + } + } + +-static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) ++static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) + { +- if (tp->ecn_flags & TCP_ECN_OK) +- __tcp_ecn_check_ce(tp, skb); ++ if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) ++ __tcp_ecn_check_ce(sk, skb); + } + + static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) +@@ -675,7 +677,7 @@ static void tcp_event_data_recv(struct s + } + icsk->icsk_ack.lrcvtime = now; + +- tcp_ecn_check_ce(tp, skb); ++ tcp_ecn_check_ce(sk, skb); + + if (skb->len >= 128) + tcp_grow_window(sk, skb); +@@ -4366,7 +4368,7 @@ static void tcp_data_queue_ofo(struct so + struct sk_buff *skb1; + u32 seq, end_seq; + +- tcp_ecn_check_ce(tp, skb); ++ tcp_ecn_check_ce(sk, skb); + + if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); diff --git a/queue-4.4/xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch b/queue-4.4/xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch new file mode 100644 index 00000000000..961708a8855 --- /dev/null +++ b/queue-4.4/xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch @@ -0,0 +1,67 @@ +From foo@baz Fri Aug 3 21:25:07 CEST 2018 +From: Xiao Liang +Date: Fri, 27 Jul 2018 17:56:08 +0800 +Subject: xen-netfront: wait xenbus state change when load module manually + +From: Xiao Liang + +[ Upstream commit 822fb18a82abaf4ee7058793d95d340f5dab7bfc ] + +When loading module manually, after call xenbus_switch_state to initializes +the state of the netfront device, the driver state did not change so fast +that may lead no dev created in latest kernel. This patch adds wait to make +sure xenbus knows the driver is not in closed/unknown state. + +Current state: +[vm]# ethtool eth0 +Settings for eth0: + Link detected: yes +[vm]# modprobe -r xen_netfront +[vm]# modprobe xen_netfront +[vm]# ethtool eth0 +Settings for eth0: +Cannot get device settings: No such device +Cannot get wake-on-lan settings: No such device +Cannot get message level: No such device +Cannot get link status: No such device +No data available + +With the patch installed. +[vm]# ethtool eth0 +Settings for eth0: + Link detected: yes +[vm]# modprobe -r xen_netfront +[vm]# modprobe xen_netfront +[vm]# ethtool eth0 +Settings for eth0: + Link detected: yes + +Signed-off-by: Xiao Liang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -86,6 +86,7 @@ struct netfront_cb { + /* IRQ name is queue name with "-tx" or "-rx" appended */ + #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3) + ++static DECLARE_WAIT_QUEUE_HEAD(module_load_q); + static DECLARE_WAIT_QUEUE_HEAD(module_unload_q); + + struct netfront_stats { +@@ -1335,6 +1336,11 @@ static struct net_device *xennet_create_ + netif_carrier_off(netdev); + + xenbus_switch_state(dev, XenbusStateInitialising); ++ wait_event(module_load_q, ++ xenbus_read_driver_state(dev->otherend) != ++ XenbusStateClosed && ++ xenbus_read_driver_state(dev->otherend) != ++ XenbusStateUnknown); + return netdev; + + exit: