--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Arjun Vynipadath <arjun@chelsio.com>
+Date: Wed, 25 Jul 2018 19:39:52 +0530
+Subject: cxgb4: Added missing break in ndo_udp_tunnel_{add/del}
+
+From: Arjun Vynipadath <arjun@chelsio.com>
+
+[ Upstream commit 942a656f1f228f06a37adad0e6c347773cfe7bd6 ]
+
+Break statements were missing for Geneve case in
+ndo_udp_tunnel_{add/del}, thereby raw mac matchall
+entries were not getting added.
+
+Fixes: c746fc0e8b2d("cxgb4: add geneve offload support for T6")
+Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
+Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+@@ -3066,6 +3066,7 @@ static void cxgb_del_udp_tunnel(struct n
+
+ adapter->geneve_port = 0;
+ t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, 0);
++ break;
+ default:
+ return;
+ }
+@@ -3151,6 +3152,7 @@ static void cxgb_add_udp_tunnel(struct n
+
+ t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A,
+ GENEVE_V(be16_to_cpu(ti->port)) | GENEVE_EN_F);
++ break;
+ default:
+ return;
+ }
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Date: Fri, 27 Jul 2018 18:15:46 +0200
+Subject: ipv4: remove BUG_ON() from fib_compute_spec_dst
+
+From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+
+[ Upstream commit 9fc12023d6f51551d6ca9ed7e02ecc19d79caf17 ]
+
+Remove BUG_ON() from fib_compute_spec_dst routine and check
+in_dev pointer during flowi4 data structure initialization.
+fib_compute_spec_dst routine can be run concurrently with device removal
+where ip_ptr net_device pointer is set to NULL. This can happen
+if userspace enables pkt info on UDP rx socket and the device
+is removed while traffic is flowing
+
+Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper")
+Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -292,19 +292,19 @@ __be32 fib_compute_spec_dst(struct sk_bu
+ return ip_hdr(skb)->daddr;
+
+ in_dev = __in_dev_get_rcu(dev);
+- BUG_ON(!in_dev);
+
+ net = dev_net(dev);
+
+ scope = RT_SCOPE_UNIVERSE;
+ if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
++ bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
+ struct flowi4 fl4 = {
+ .flowi4_iif = LOOPBACK_IFINDEX,
+ .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
+ .daddr = ip_hdr(skb)->saddr,
+ .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
+ .flowi4_scope = scope,
+- .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
++ .flowi4_mark = vmark ? skb->mark : 0,
+ };
+ if (!fib_lookup(net, &fl4, &res, 0))
+ return FIB_RES_PREFSRC(net, res);
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Gal Pressman <pressmangal@gmail.com>
+Date: Thu, 26 Jul 2018 23:40:33 +0300
+Subject: net: ena: Fix use of uninitialized DMA address bits field
+
+From: Gal Pressman <pressmangal@gmail.com>
+
+[ Upstream commit 101f0cd4f2216d32f1b8a75a2154cf3997484ee2 ]
+
+UBSAN triggers the following undefined behaviour warnings:
+[...]
+[ 13.236124] UBSAN: Undefined behaviour in drivers/net/ethernet/amazon/ena/ena_eth_com.c:468:22
+[ 13.240043] shift exponent 64 is too large for 64-bit type 'long long unsigned int'
+[...]
+[ 13.744769] UBSAN: Undefined behaviour in drivers/net/ethernet/amazon/ena/ena_eth_com.c:373:4
+[ 13.748694] shift exponent 64 is too large for 64-bit type 'long long unsigned int'
+[...]
+
+When splitting the address to high and low, GENMASK_ULL is used to generate
+a bitmask with dma_addr_bits field from io_sq (in ena_com_prepare_tx and
+ena_com_add_single_rx_desc).
+The problem is that dma_addr_bits is not initialized with a proper value
+(besides being cleared in ena_com_create_io_queue).
+Assign dma_addr_bits the correct value that is stored in ena_dev when
+initializing the SQ.
+
+Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
+Signed-off-by: Gal Pressman <pressmangal@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_com.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/amazon/ena/ena_com.c
++++ b/drivers/net/ethernet/amazon/ena/ena_com.c
+@@ -333,6 +333,7 @@ static int ena_com_init_io_sq(struct ena
+
+ memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr));
+
++ io_sq->dma_addr_bits = ena_dev->dma_addr_bits;
+ io_sq->desc_entry_size =
+ (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
+ sizeof(struct ena_eth_io_tx_desc) :
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: tangpengpeng <tangpengpeng@higon.com>
+Date: Thu, 26 Jul 2018 14:45:16 +0800
+Subject: net: fix amd-xgbe flow-control issue
+
+From: tangpengpeng <tangpengpeng@higon.com>
+
+[ Upstream commit 7f3fc7ddf719cd6faaf787722c511f6918ac6aab ]
+
+If we enable or disable xgbe flow-control by ethtool ,
+it does't work.Because the parameter is not properly
+assigned,so we need to adjust the assignment order
+of the parameters.
+
+Fixes: c1ce2f77366b ("amd-xgbe: Fix flow control setting logic")
+Signed-off-by: tangpengpeng <tangpengpeng@higon.com>
+Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+@@ -1128,14 +1128,14 @@ static void xgbe_phy_adjust_link(struct
+
+ if (pdata->tx_pause != pdata->phy.tx_pause) {
+ new_state = 1;
+- pdata->hw_if.config_tx_flow_control(pdata);
+ pdata->tx_pause = pdata->phy.tx_pause;
++ pdata->hw_if.config_tx_flow_control(pdata);
+ }
+
+ if (pdata->rx_pause != pdata->phy.rx_pause) {
+ new_state = 1;
+- pdata->hw_if.config_rx_flow_control(pdata);
+ pdata->rx_pause = pdata->phy.rx_pause;
++ pdata->hw_if.config_rx_flow_control(pdata);
+ }
+
+ /* Speed support */
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Stefan Wahren <stefan.wahren@i2se.com>
+Date: Sat, 28 Jul 2018 09:52:10 +0200
+Subject: net: lan78xx: fix rx handling before first packet is send
+
+From: Stefan Wahren <stefan.wahren@i2se.com>
+
+[ Upstream commit 136f55f660192ce04af091642efc75d85e017364 ]
+
+As long the bh tasklet isn't scheduled once, no packet from the rx path
+will be handled. Since the tx path also schedule the same tasklet
+this situation only persits until the first packet transmission.
+So fix this issue by scheduling the tasklet after link reset.
+
+Link: https://github.com/raspberrypi/linux/issues/2617
+Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet")
+Suggested-by: Floris Bos <bos@je-eigen-domein.nl>
+Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/lan78xx.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -1216,6 +1216,8 @@ static int lan78xx_link_reset(struct lan
+ mod_timer(&dev->stat_monitor,
+ jiffies + STAT_UPDATE_TIMER);
+ }
++
++ tasklet_schedule(&dev->bh);
+ }
+
+ return ret;
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Anton Vasilyev <vasilyev@ispras.ru>
+Date: Fri, 27 Jul 2018 18:57:47 +0300
+Subject: net: mdio-mux: bcm-iproc: fix wrong getter and setter pair
+
+From: Anton Vasilyev <vasilyev@ispras.ru>
+
+[ Upstream commit b0753408aadf32c7ece9e6b765017881e54af833 ]
+
+mdio_mux_iproc_probe() uses platform_set_drvdata() to store md pointer
+in device, whereas mdio_mux_iproc_remove() restores md pointer by
+dev_get_platdata(&pdev->dev). This leads to wrong resources release.
+
+The patch replaces getter to platform_get_drvdata.
+
+Fixes: 98bc865a1ec8 ("net: mdio-mux: Add MDIO mux driver for iProc SoCs")
+Signed-off-by: Anton Vasilyev <vasilyev@ispras.ru>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/mdio-mux-bcm-iproc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/phy/mdio-mux-bcm-iproc.c
++++ b/drivers/net/phy/mdio-mux-bcm-iproc.c
+@@ -218,7 +218,7 @@ out:
+
+ static int mdio_mux_iproc_remove(struct platform_device *pdev)
+ {
+- struct iproc_mdiomux_desc *md = dev_get_platdata(&pdev->dev);
++ struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
+
+ mdio_mux_uninit(md->mux_handle);
+ mdiobus_unregister(md->mii_bus);
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Tariq Toukan <tariqt@mellanox.com>
+Date: Tue, 24 Jul 2018 14:12:20 +0300
+Subject: net: rollback orig value on failure of dev_qdisc_change_tx_queue_len
+
+From: Tariq Toukan <tariqt@mellanox.com>
+
+[ Upstream commit 7effaf06c3cdef6855e127886c7405b9ab62f90d ]
+
+Fix dev_change_tx_queue_len so it rolls back original value
+upon a failure in dev_qdisc_change_tx_queue_len.
+This is already done for notifirers' failures, share the code.
+
+In case of failure in dev_qdisc_change_tx_queue_len, some tx queues
+would still be of the new length, while they should be reverted.
+Currently, the revert is not done, and is marked with a TODO label
+in dev_qdisc_change_tx_queue_len, and should find some nice solution
+to do it.
+Yet it is still better to not apply the newly requested value.
+
+Fixes: 48bfd55e7e41 ("net_sched: plug in qdisc ops change_tx_queue_len")
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
+Reported-by: Ran Rozenstein <ranro@mellanox.com>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -7113,16 +7113,19 @@ int dev_change_tx_queue_len(struct net_d
+ dev->tx_queue_len = new_len;
+ res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+ res = notifier_to_errno(res);
+- if (res) {
+- netdev_err(dev,
+- "refused to change device tx_queue_len\n");
+- dev->tx_queue_len = orig_len;
+- return res;
+- }
+- return dev_qdisc_change_tx_queue_len(dev);
++ if (res)
++ goto err_rollback;
++ res = dev_qdisc_change_tx_queue_len(dev);
++ if (res)
++ goto err_rollback;
+ }
+
+ return 0;
++
++err_rollback:
++ netdev_err(dev, "refused to change device tx_queue_len\n");
++ dev->tx_queue_len = orig_len;
++ return res;
+ }
+
+ /**
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+Date: Thu, 26 Jul 2018 15:05:37 +0300
+Subject: NET: stmmac: align DMA stuff to largest cache line length
+
+From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+
+[ Upstream commit 9939a46d90c6c76f4533d534dbadfa7b39dc6acc ]
+
+As for today STMMAC_ALIGN macro (which is used to align DMA stuff)
+relies on L1 line length (L1_CACHE_BYTES).
+This isn't correct in case of system with several cache levels
+which might have L1 cache line length smaller than L2 line. This
+can lead to sharing one cache line between DMA buffer and other
+data, so we can lose this data while invalidate DMA buffer before
+DMA transaction.
+
+Fix that by using SMP_CACHE_BYTES instead of L1_CACHE_BYTES for
+aligning.
+
+Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -51,7 +51,7 @@
+ #include <linux/of_mdio.h>
+ #include "dwmac1000.h"
+
+-#define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x)
++#define STMMAC_ALIGN(x) __ALIGN_KERNEL(x, SMP_CACHE_BYTES)
+ #define TSO_MAX_BUFF_SIZE (SZ_16K - 1)
+
+ /* Module parameters */
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Wed, 25 Jul 2018 15:39:27 -0700
+Subject: netdevsim: don't leak devlink resources
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+[ Upstream commit c259b4fb33ee6e7667bf1d34bf0803b7c5fdbdce ]
+
+Devlink resources registered with devlink_resource_register() have
+to be unregistered.
+
+Fixes: 37923ed6b8ce ("netdevsim: Add simple FIB resource controller via devlink")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/netdevsim/devlink.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/netdevsim/devlink.c
++++ b/drivers/net/netdevsim/devlink.c
+@@ -206,6 +206,7 @@ void nsim_devlink_teardown(struct netdev
+ struct net *net = nsim_to_net(ns);
+ bool *reg_devlink = net_generic(net, nsim_devlink_id);
+
++ devlink_resources_unregister(ns->devlink, NULL);
+ devlink_unregister(ns->devlink);
+ devlink_free(ns->devlink);
+ ns->devlink = NULL;
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Dmitry Safonov <dima@arista.com>
+Date: Fri, 27 Jul 2018 16:54:44 +0100
+Subject: netlink: Do not subscribe to non-existent groups
+
+From: Dmitry Safonov <dima@arista.com>
+
+[ Upstream commit 7acf9d4237c46894e0fa0492dd96314a41742e84 ]
+
+Make ABI more strict about subscribing to group > ngroups.
+Code doesn't check for that and it looks bogus.
+(one can subscribe to non-existing group)
+Still, it's possible to bind() to all possible groups with (-1)
+
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Cc: netdev@vger.kernel.org
+Signed-off-by: Dmitry Safonov <dima@arista.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -1008,6 +1008,7 @@ static int netlink_bind(struct socket *s
+ if (err)
+ return err;
+ }
++ groups &= (1UL << nlk->ngroups) - 1;
+
+ bound = nlk->bound;
+ if (bound) {
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Dmitry Safonov <dima@arista.com>
+Date: Mon, 30 Jul 2018 18:32:36 +0100
+Subject: netlink: Don't shift with UB on nlk->ngroups
+
+From: Dmitry Safonov <dima@arista.com>
+
+[ Upstream commit 61f4b23769f0cc72ae62c9a81cf08f0397d40da8 ]
+
+On i386 nlk->ngroups might be 32 or 0. Which leads to UB, resulting in
+hang during boot.
+Check for 0 ngroups and use (unsigned long long) as a type to shift.
+
+Fixes: 7acf9d4237c4 ("netlink: Do not subscribe to non-existent groups").
+Reported-by: kernel test robot <rong.a.chen@intel.com>
+Signed-off-by: Dmitry Safonov <dima@arista.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -1008,7 +1008,11 @@ static int netlink_bind(struct socket *s
+ if (err)
+ return err;
+ }
+- groups &= (1UL << nlk->ngroups) - 1;
++
++ if (nlk->ngroups == 0)
++ groups = 0;
++ else
++ groups &= (1ULL << nlk->ngroups) - 1;
+
+ bound = nlk->bound;
+ if (bound) {
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Avinash Repaka <avinash.repaka@oracle.com>
+Date: Tue, 24 Jul 2018 20:31:58 -0700
+Subject: RDS: RDMA: Fix the NULL-ptr deref in rds_ib_get_mr
+
+From: Avinash Repaka <avinash.repaka@oracle.com>
+
+[ Upstream commit 9e630bcb7701f94dbd729fe57d37c089c763ad9f ]
+
+Registration of a memory region(MR) through FRMR/fastreg(unlike FMR)
+needs a connection/qp. With a proxy qp, this dependency on connection
+will be removed, but that needs more infrastructure patches, which is a
+work in progress.
+
+As an intermediate fix, the get_mr returns EOPNOTSUPP when connection
+details are not populated. The MR registration through sendmsg() will
+continue to work even with fast registration, since connection in this
+case is formed upfront.
+
+This patch fixes the following crash:
+kasan: GPF could be caused by NULL-ptr deref or user memory access
+general protection fault: 0000 [#1] SMP KASAN
+Modules linked in:
+CPU: 1 PID: 4244 Comm: syzkaller468044 Not tainted 4.16.0-rc6+ #361
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+Google 01/01/2011
+RIP: 0010:rds_ib_get_mr+0x5c/0x230 net/rds/ib_rdma.c:544
+RSP: 0018:ffff8801b059f890 EFLAGS: 00010202
+RAX: dffffc0000000000 RBX: ffff8801b07e1300 RCX: ffffffff8562d96e
+RDX: 000000000000000d RSI: 0000000000000001 RDI: 0000000000000068
+RBP: ffff8801b059f8b8 R08: ffffed0036274244 R09: ffff8801b13a1200
+R10: 0000000000000004 R11: ffffed0036274243 R12: ffff8801b13a1200
+R13: 0000000000000001 R14: ffff8801ca09fa9c R15: 0000000000000000
+FS: 00007f4d050af700(0000) GS:ffff8801db300000(0000)
+knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f4d050aee78 CR3: 00000001b0d9b006 CR4: 00000000001606e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ __rds_rdma_map+0x710/0x1050 net/rds/rdma.c:271
+ rds_get_mr_for_dest+0x1d4/0x2c0 net/rds/rdma.c:357
+ rds_setsockopt+0x6cc/0x980 net/rds/af_rds.c:347
+ SYSC_setsockopt net/socket.c:1849 [inline]
+ SyS_setsockopt+0x189/0x360 net/socket.c:1828
+ do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
+ entry_SYSCALL_64_after_hwframe+0x42/0xb7
+RIP: 0033:0x4456d9
+RSP: 002b:00007f4d050aedb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036
+RAX: ffffffffffffffda RBX: 00000000006dac3c RCX: 00000000004456d9
+RDX: 0000000000000007 RSI: 0000000000000114 RDI: 0000000000000004
+RBP: 00000000006dac38 R08: 00000000000000a0 R09: 0000000000000000
+R10: 0000000020000380 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007fffbfb36d6f R14: 00007f4d050af9c0 R15: 0000000000000005
+Code: fa 48 c1 ea 03 80 3c 02 00 0f 85 cc 01 00 00 4c 8b bb 80 04 00 00
+48
+b8 00 00 00 00 00 fc ff df 49 8d 7f 68 48 89 fa 48 c1 ea 03 <80> 3c 02
+00 0f
+85 9c 01 00 00 4d 8b 7f 68 48 b8 00 00 00 00 00
+RIP: rds_ib_get_mr+0x5c/0x230 net/rds/ib_rdma.c:544 RSP:
+ffff8801b059f890
+---[ end trace 7e1cea13b85473b0 ]---
+
+Reported-by: syzbot+b51c77ef956678a65834@syzkaller.appspotmail.com
+Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
+Signed-off-by: Avinash Repaka <avinash.repaka@oracle.com>
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rds/ib_frmr.c | 5 +++++
+ net/rds/ib_mr.h | 3 ++-
+ net/rds/ib_rdma.c | 21 +++++++++++++--------
+ net/rds/rdma.c | 13 ++++++++-----
+ net/rds/rds.h | 5 ++++-
+ net/rds/send.c | 12 +++++++-----
+ 6 files changed, 39 insertions(+), 20 deletions(-)
+
+--- a/net/rds/ib_frmr.c
++++ b/net/rds/ib_frmr.c
+@@ -344,6 +344,11 @@ struct rds_ib_mr *rds_ib_reg_frmr(struct
+ struct rds_ib_frmr *frmr;
+ int ret;
+
++ if (!ic) {
++ /* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/
++ return ERR_PTR(-EOPNOTSUPP);
++ }
++
+ do {
+ if (ibmr)
+ rds_ib_free_frmr(ibmr, true);
+--- a/net/rds/ib_mr.h
++++ b/net/rds/ib_mr.h
+@@ -115,7 +115,8 @@ void rds_ib_get_mr_info(struct rds_ib_de
+ struct rds_info_rdma_connection *iinfo);
+ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
+ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
+- struct rds_sock *rs, u32 *key_ret);
++ struct rds_sock *rs, u32 *key_ret,
++ struct rds_connection *conn);
+ void rds_ib_sync_mr(void *trans_private, int dir);
+ void rds_ib_free_mr(void *trans_private, int invalidate);
+ void rds_ib_flush_mrs(void);
+--- a/net/rds/ib_rdma.c
++++ b/net/rds/ib_rdma.c
+@@ -537,11 +537,12 @@ void rds_ib_flush_mrs(void)
+ }
+
+ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
+- struct rds_sock *rs, u32 *key_ret)
++ struct rds_sock *rs, u32 *key_ret,
++ struct rds_connection *conn)
+ {
+ struct rds_ib_device *rds_ibdev;
+ struct rds_ib_mr *ibmr = NULL;
+- struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;
++ struct rds_ib_connection *ic = NULL;
+ int ret;
+
+ rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
+@@ -550,6 +551,9 @@ void *rds_ib_get_mr(struct scatterlist *
+ goto out;
+ }
+
++ if (conn)
++ ic = conn->c_transport_data;
++
+ if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) {
+ ret = -ENODEV;
+ goto out;
+@@ -559,17 +563,18 @@ void *rds_ib_get_mr(struct scatterlist *
+ ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret);
+ else
+ ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret);
+- if (ibmr)
+- rds_ibdev = NULL;
+-
+- out:
+- if (!ibmr)
++ if (IS_ERR(ibmr)) {
++ ret = PTR_ERR(ibmr);
+ pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret);
++ } else {
++ return ibmr;
++ }
+
++ out:
+ if (rds_ibdev)
+ rds_ib_dev_put(rds_ibdev);
+
+- return ibmr;
++ return ERR_PTR(ret);
+ }
+
+ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
+--- a/net/rds/rdma.c
++++ b/net/rds/rdma.c
+@@ -170,7 +170,8 @@ static int rds_pin_pages(unsigned long u
+ }
+
+ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
+- u64 *cookie_ret, struct rds_mr **mr_ret)
++ u64 *cookie_ret, struct rds_mr **mr_ret,
++ struct rds_conn_path *cp)
+ {
+ struct rds_mr *mr = NULL, *found;
+ unsigned int nr_pages;
+@@ -269,7 +270,8 @@ static int __rds_rdma_map(struct rds_soc
+ * Note that dma_map() implies that pending writes are
+ * flushed to RAM, so no dma_sync is needed here. */
+ trans_private = rs->rs_transport->get_mr(sg, nents, rs,
+- &mr->r_key);
++ &mr->r_key,
++ cp ? cp->cp_conn : NULL);
+
+ if (IS_ERR(trans_private)) {
+ for (i = 0 ; i < nents; i++)
+@@ -330,7 +332,7 @@ int rds_get_mr(struct rds_sock *rs, char
+ sizeof(struct rds_get_mr_args)))
+ return -EFAULT;
+
+- return __rds_rdma_map(rs, &args, NULL, NULL);
++ return __rds_rdma_map(rs, &args, NULL, NULL, NULL);
+ }
+
+ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
+@@ -354,7 +356,7 @@ int rds_get_mr_for_dest(struct rds_sock
+ new_args.cookie_addr = args.cookie_addr;
+ new_args.flags = args.flags;
+
+- return __rds_rdma_map(rs, &new_args, NULL, NULL);
++ return __rds_rdma_map(rs, &new_args, NULL, NULL, NULL);
+ }
+
+ /*
+@@ -782,7 +784,8 @@ int rds_cmsg_rdma_map(struct rds_sock *r
+ rm->m_rdma_cookie != 0)
+ return -EINVAL;
+
+- return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr);
++ return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie,
++ &rm->rdma.op_rdma_mr, rm->m_conn_path);
+ }
+
+ /*
+--- a/net/rds/rds.h
++++ b/net/rds/rds.h
+@@ -464,6 +464,8 @@ struct rds_message {
+ struct scatterlist *op_sg;
+ } data;
+ };
++
++ struct rds_conn_path *m_conn_path;
+ };
+
+ /*
+@@ -544,7 +546,8 @@ struct rds_transport {
+ unsigned int avail);
+ void (*exit)(void);
+ void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
+- struct rds_sock *rs, u32 *key_ret);
++ struct rds_sock *rs, u32 *key_ret,
++ struct rds_connection *conn);
+ void (*sync_mr)(void *trans_private, int direction);
+ void (*free_mr)(void *trans_private, int invalidate);
+ void (*flush_mrs)(void);
+--- a/net/rds/send.c
++++ b/net/rds/send.c
+@@ -1169,6 +1169,13 @@ int rds_sendmsg(struct socket *sock, str
+ rs->rs_conn = conn;
+ }
+
++ if (conn->c_trans->t_mp_capable)
++ cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)];
++ else
++ cpath = &conn->c_path[0];
++
++ rm->m_conn_path = cpath;
++
+ /* Parse any control messages the user may have included. */
+ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
+ if (ret) {
+@@ -1192,11 +1199,6 @@ int rds_sendmsg(struct socket *sock, str
+ goto out;
+ }
+
+- if (conn->c_trans->t_mp_capable)
+- cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)];
+- else
+- cpath = &conn->c_path[0];
+-
+ if (rds_destroy_pending(conn)) {
+ ret = -EAGAIN;
+ goto out;
net-dsa-qca8k-enable-rxmac-when-bringing-up-a-port.patch
net-dsa-qca8k-add-qca8334-binding-documentation.patch
net-dsa-qca8k-allow-overwriting-cpu-port-setting.patch
+ipv4-remove-bug_on-from-fib_compute_spec_dst.patch
+netdevsim-don-t-leak-devlink-resources.patch
+net-ena-fix-use-of-uninitialized-dma-address-bits-field.patch
+net-fix-amd-xgbe-flow-control-issue.patch
+net-lan78xx-fix-rx-handling-before-first-packet-is-send.patch
+net-mdio-mux-bcm-iproc-fix-wrong-getter-and-setter-pair.patch
+net-stmmac-align-dma-stuff-to-largest-cache-line-length.patch
+rds-rdma-fix-the-null-ptr-deref-in-rds_ib_get_mr.patch
+tcp_bbr-fix-bw-probing-to-raise-in-flight-data-for-very-small-bdps.patch
+virtio_net-fix-incosistent-received-bytes-counter.patch
+xen-netfront-wait-xenbus-state-change-when-load-module-manually.patch
+cxgb4-added-missing-break-in-ndo_udp_tunnel_-add-del.patch
+net-rollback-orig-value-on-failure-of-dev_qdisc_change_tx_queue_len.patch
+netlink-do-not-subscribe-to-non-existent-groups.patch
+netlink-don-t-shift-with-ub-on-nlk-ngroups.patch
+tcp-do-not-force-quickack-when-receiving-out-of-order-packets.patch
+tcp-add-max_quickacks-param-to-tcp_incr_quickack-and-tcp_enter_quickack_mode.patch
+tcp-do-not-aggressively-quick-ack-after-ecn-events.patch
+tcp-refactor-tcp_ecn_check_ce-to-remove-sk-type-cast.patch
+tcp-add-one-more-quick-ack-after-after-ecn-events.patch
+tcp-ack-immediately-when-a-cwr-packet-arrives.patch
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Lawrence Brakmo <brakmo@fb.com>
+Date: Mon, 23 Jul 2018 17:49:39 -0700
+Subject: tcp: ack immediately when a cwr packet arrives
+
+From: Lawrence Brakmo <brakmo@fb.com>
+
+[ Upstream commit 9aee40006190a3cda9a4d2dbae71e92617c8c362 ]
+
+We observed high 99 and 99.9% latencies when doing RPCs with DCTCP. The
+problem is triggered when the last packet of a request arrives CE
+marked. The reply will carry the ECE mark causing TCP to shrink its cwnd
+to 1 (because there are no packets in flight). When the 1st packet of
+the next request arrives, the ACK was sometimes delayed even though it
+is CWR marked, adding up to 40ms to the RPC latency.
+
+This patch insures that CWR marked data packets arriving will be acked
+immediately.
+
+Packetdrill script to reproduce the problem:
+
+0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0
+0.000 bind(3, ..., ...) = 0
+0.000 listen(3, 1) = 0
+
+0.100 < [ect0] SEW 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0.100 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+0.110 < [ect0] . 1:1(0) ack 1 win 257
+0.200 accept(3, ..., ...) = 4
+
+0.200 < [ect0] . 1:1001(1000) ack 1 win 257
+0.200 > [ect01] . 1:1(0) ack 1001
+
+0.200 write(4, ..., 1) = 1
+0.200 > [ect01] P. 1:2(1) ack 1001
+
+0.200 < [ect0] . 1001:2001(1000) ack 2 win 257
+0.200 write(4, ..., 1) = 1
+0.200 > [ect01] P. 2:3(1) ack 2001
+
+0.200 < [ect0] . 2001:3001(1000) ack 3 win 257
+0.200 < [ect0] . 3001:4001(1000) ack 3 win 257
+0.200 > [ect01] . 3:3(0) ack 4001
+
+0.210 < [ce] P. 4001:4501(500) ack 3 win 257
+
++0.001 read(4, ..., 4500) = 4500
++0 write(4, ..., 1) = 1
++0 > [ect01] PE. 3:4(1) ack 4501
+
++0.010 < [ect0] W. 4501:5501(1000) ack 4 win 257
+// Previously the ACK sequence below would be 4501, causing a long RTO
++0.040~+0.045 > [ect01] . 4:4(0) ack 5501 // delayed ack
+
++0.311 < [ect0] . 5501:6501(1000) ack 4 win 257 // More data
++0 > [ect01] . 4:4(0) ack 6501 // now acks everything
+
++0.500 < F. 9501:9501(0) ack 4 win 257
+
+Modified based on comments by Neal Cardwell <ncardwell@google.com>
+
+Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -227,8 +227,15 @@ static void tcp_ecn_queue_cwr(struct tcp
+
+ static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
+ {
+- if (tcp_hdr(skb)->cwr)
++ if (tcp_hdr(skb)->cwr) {
+ tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
++
++ /* If the sender is telling us it has entered CWR, then its
++ * cwnd may be very low (even just 1 packet), so we should ACK
++ * immediately.
++ */
++ tcp_enter_quickack_mode((struct sock *)tp, 2);
++ }
+ }
+
+ static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 21 May 2018 15:08:56 -0700
+Subject: tcp: add max_quickacks param to tcp_incr_quickack and tcp_enter_quickack_mode
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9a9c9b51e54618861420093ae6e9b50a961914c5 ]
+
+We want to add finer control of the number of ACK packets sent after
+ECN events.
+
+This patch is not changing current behavior, it only enables following
+change.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h | 2 +-
+ net/ipv4/tcp_dctcp.c | 4 ++--
+ net/ipv4/tcp_input.c | 24 +++++++++++++-----------
+ 3 files changed, 16 insertions(+), 14 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -342,7 +342,7 @@ ssize_t tcp_splice_read(struct socket *s
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags);
+
+-void tcp_enter_quickack_mode(struct sock *sk);
++void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
+ static inline void tcp_dec_quickack_mode(struct sock *sk,
+ const unsigned int pkts)
+ {
+--- a/net/ipv4/tcp_dctcp.c
++++ b/net/ipv4/tcp_dctcp.c
+@@ -138,7 +138,7 @@ static void dctcp_ce_state_0_to_1(struct
+ */
+ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+ __tcp_send_ack(sk, ca->prior_rcv_nxt);
+- tcp_enter_quickack_mode(sk);
++ tcp_enter_quickack_mode(sk, 1);
+ }
+
+ ca->prior_rcv_nxt = tp->rcv_nxt;
+@@ -159,7 +159,7 @@ static void dctcp_ce_state_1_to_0(struct
+ */
+ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+ __tcp_send_ack(sk, ca->prior_rcv_nxt);
+- tcp_enter_quickack_mode(sk);
++ tcp_enter_quickack_mode(sk, 1);
+ }
+
+ ca->prior_rcv_nxt = tp->rcv_nxt;
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -184,21 +184,23 @@ static void tcp_measure_rcv_mss(struct s
+ }
+ }
+
+-static void tcp_incr_quickack(struct sock *sk)
++static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
+
+ if (quickacks == 0)
+ quickacks = 2;
++ quickacks = min(quickacks, max_quickacks);
+ if (quickacks > icsk->icsk_ack.quick)
+- icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
++ icsk->icsk_ack.quick = quickacks;
+ }
+
+-void tcp_enter_quickack_mode(struct sock *sk)
++void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+- tcp_incr_quickack(sk);
++
++ tcp_incr_quickack(sk, max_quickacks);
+ icsk->icsk_ack.pingpong = 0;
+ icsk->icsk_ack.ato = TCP_ATO_MIN;
+ }
+@@ -243,7 +245,7 @@ static void __tcp_ecn_check_ce(struct tc
+ * it is probably a retransmit.
+ */
+ if (tp->ecn_flags & TCP_ECN_SEEN)
+- tcp_enter_quickack_mode((struct sock *)tp);
++ tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
+ break;
+ case INET_ECN_CE:
+ if (tcp_ca_needs_ecn((struct sock *)tp))
+@@ -251,7 +253,7 @@ static void __tcp_ecn_check_ce(struct tc
+
+ if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+ /* Better not delay acks, sender can have a very low cwnd */
+- tcp_enter_quickack_mode((struct sock *)tp);
++ tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ }
+ tp->ecn_flags |= TCP_ECN_SEEN;
+@@ -666,7 +668,7 @@ static void tcp_event_data_recv(struct s
+ /* The _first_ data packet received, initialize
+ * delayed ACK engine.
+ */
+- tcp_incr_quickack(sk);
++ tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
+ icsk->icsk_ack.ato = TCP_ATO_MIN;
+ } else {
+ int m = now - icsk->icsk_ack.lrcvtime;
+@@ -682,7 +684,7 @@ static void tcp_event_data_recv(struct s
+ /* Too long gap. Apparently sender failed to
+ * restart window, so that we send ACKs quickly.
+ */
+- tcp_incr_quickack(sk);
++ tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
+ sk_mem_reclaim(sk);
+ }
+ }
+@@ -4136,7 +4138,7 @@ static void tcp_send_dupack(struct sock
+ if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+ before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+- tcp_enter_quickack_mode(sk);
++ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+
+ if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+@@ -4667,7 +4669,7 @@ queue_and_out:
+ tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+
+ out_of_window:
+- tcp_enter_quickack_mode(sk);
++ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+ inet_csk_schedule_ack(sk);
+ drop:
+ tcp_drop(sk, skb);
+@@ -5744,7 +5746,7 @@ static int tcp_rcv_synsent_state_process
+ * to stand against the temptation 8) --ANK
+ */
+ inet_csk_schedule_ack(sk);
+- tcp_enter_quickack_mode(sk);
++ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ TCP_DELACK_MAX, TCP_RTO_MAX);
+
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 27 Jun 2018 08:47:21 -0700
+Subject: tcp: add one more quick ack after after ECN events
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 15ecbe94a45ef88491ca459b26efdd02f91edb6d ]
+
+Larry Brakmo proposal ( https://patchwork.ozlabs.org/patch/935233/
+tcp: force cwnd at least 2 in tcp_cwnd_reduction) made us rethink
+about our recent patch removing ~16 quick acks after ECN events.
+
+tcp_enter_quickack_mode(sk, 1) makes sure one immediate ack is sent,
+but in the case the sender cwnd was lowered to 1, we do not want
+to have a delayed ack for the next packet we will receive.
+
+Fixes: 522040ea5fdd ("tcp: do not aggressively quick ack after ECN events")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Neal Cardwell <ncardwell@google.com>
+Cc: Lawrence Brakmo <brakmo@fb.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -247,7 +247,7 @@ static void __tcp_ecn_check_ce(struct so
+ * it is probably a retransmit.
+ */
+ if (tp->ecn_flags & TCP_ECN_SEEN)
+- tcp_enter_quickack_mode(sk, 1);
++ tcp_enter_quickack_mode(sk, 2);
+ break;
+ case INET_ECN_CE:
+ if (tcp_ca_needs_ecn(sk))
+@@ -255,7 +255,7 @@ static void __tcp_ecn_check_ce(struct so
+
+ if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+ /* Better not delay acks, sender can have a very low cwnd */
+- tcp_enter_quickack_mode(sk, 1);
++ tcp_enter_quickack_mode(sk, 2);
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ }
+ tp->ecn_flags |= TCP_ECN_SEEN;
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 21 May 2018 15:08:57 -0700
+Subject: tcp: do not aggressively quick ack after ECN events
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 522040ea5fdd1c33bbf75e1d7c7c0422b96a94ef ]
+
+ECN signals currently forces TCP to enter quickack mode for
+up to 16 (TCP_MAX_QUICKACKS) following incoming packets.
+
+We believe this is not needed, and only sending one immediate ack
+for the current packet should be enough.
+
+This should reduce the extra load noticed in DCTCP environments,
+after congestion events.
+
+This is part 2 of our effort to reduce pure ACK packets.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -245,7 +245,7 @@ static void __tcp_ecn_check_ce(struct tc
+ * it is probably a retransmit.
+ */
+ if (tp->ecn_flags & TCP_ECN_SEEN)
+- tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
++ tcp_enter_quickack_mode((struct sock *)tp, 1);
+ break;
+ case INET_ECN_CE:
+ if (tcp_ca_needs_ecn((struct sock *)tp))
+@@ -253,7 +253,7 @@ static void __tcp_ecn_check_ce(struct tc
+
+ if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+ /* Better not delay acks, sender can have a very low cwnd */
+- tcp_enter_quickack_mode((struct sock *)tp, TCP_MAX_QUICKACKS);
++ tcp_enter_quickack_mode((struct sock *)tp, 1);
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ }
+ tp->ecn_flags |= TCP_ECN_SEEN;
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 17 May 2018 14:47:25 -0700
+Subject: tcp: do not force quickack when receiving out-of-order packets
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a3893637e1eb0ef5eb1bbc52b3a8d2dfa317a35d ]
+
+As explained in commit 9f9843a751d0 ("tcp: properly handle stretch
+acks in slow start"), TCP stacks have to consider how many packets
+are acknowledged in one single ACK, because of GRO, but also
+because of ACK compression or losses.
+
+We plan to add SACK compression in the following patch, we
+must therefore not call tcp_enter_quickack_mode()
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4678,8 +4678,6 @@ drop:
+ if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
+ goto out_of_window;
+
+- tcp_enter_quickack_mode(sk);
+-
+ if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+ /* Partial packet, seq < rcv_next < end_seq */
+ SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Yousuk Seung <ysseung@google.com>
+Date: Mon, 4 Jun 2018 15:29:51 -0700
+Subject: tcp: refactor tcp_ecn_check_ce to remove sk type cast
+
+From: Yousuk Seung <ysseung@google.com>
+
+[ Upstream commit f4c9f85f3b2cb7669830cd04d0be61192a4d2436 ]
+
+Refactor tcp_ecn_check_ce and __tcp_ecn_check_ce to accept struct sock*
+instead of tcp_sock* to clean up type casts. This is a pure refactor
+patch.
+
+Signed-off-by: Yousuk Seung <ysseung@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 26 ++++++++++++++------------
+ 1 file changed, 14 insertions(+), 12 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -236,8 +236,10 @@ static void tcp_ecn_withdraw_cwr(struct
+ tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+ }
+
+-static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
++static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
+ {
++ struct tcp_sock *tp = tcp_sk(sk);
++
+ switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
+ case INET_ECN_NOT_ECT:
+ /* Funny extension: if ECT is not set on a segment,
+@@ -245,31 +247,31 @@ static void __tcp_ecn_check_ce(struct tc
+ * it is probably a retransmit.
+ */
+ if (tp->ecn_flags & TCP_ECN_SEEN)
+- tcp_enter_quickack_mode((struct sock *)tp, 1);
++ tcp_enter_quickack_mode(sk, 1);
+ break;
+ case INET_ECN_CE:
+- if (tcp_ca_needs_ecn((struct sock *)tp))
+- tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
++ if (tcp_ca_needs_ecn(sk))
++ tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
+
+ if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+ /* Better not delay acks, sender can have a very low cwnd */
+- tcp_enter_quickack_mode((struct sock *)tp, 1);
++ tcp_enter_quickack_mode(sk, 1);
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ }
+ tp->ecn_flags |= TCP_ECN_SEEN;
+ break;
+ default:
+- if (tcp_ca_needs_ecn((struct sock *)tp))
+- tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
++ if (tcp_ca_needs_ecn(sk))
++ tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
+ tp->ecn_flags |= TCP_ECN_SEEN;
+ break;
+ }
+ }
+
+-static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
++static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
+ {
+- if (tp->ecn_flags & TCP_ECN_OK)
+- __tcp_ecn_check_ce(tp, skb);
++ if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
++ __tcp_ecn_check_ce(sk, skb);
+ }
+
+ static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
+@@ -690,7 +692,7 @@ static void tcp_event_data_recv(struct s
+ }
+ icsk->icsk_ack.lrcvtime = now;
+
+- tcp_ecn_check_ce(tp, skb);
++ tcp_ecn_check_ce(sk, skb);
+
+ if (skb->len >= 128)
+ tcp_grow_window(sk, skb);
+@@ -4406,7 +4408,7 @@ static void tcp_data_queue_ofo(struct so
+ u32 seq, end_seq;
+ bool fragstolen;
+
+- tcp_ecn_check_ce(tp, skb);
++ tcp_ecn_check_ce(sk, skb);
+
+ if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 27 Jul 2018 17:19:12 -0400
+Subject: tcp_bbr: fix bw probing to raise in-flight data for very small BDPs
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit 383d470936c05554219094a4d364d964cb324827 ]
+
+For some very small BDPs (with just a few packets) there was a
+quantization effect where the target number of packets in flight
+during the super-unity-gain (1.25x) phase of gain cycling was
+implicitly truncated to a number of packets no larger than the normal
+unity-gain (1.0x) phase of gain cycling. This meant that in multi-flow
+scenarios some flows could get stuck with a lower bandwidth, because
+they did not push enough packets inflight to discover that there was
+more bandwidth available. This was really only an issue in multi-flow
+LAN scenarios, where RTTs and BDPs are low enough for this to be an
+issue.
+
+This fix ensures that gain cycling can raise inflight for small BDPs
+by ensuring that in PROBE_BW mode target inflight values with a
+super-unity gain are always greater than inflight values with a gain
+<= 1. Importantly, this applies whether the inflight value is
+calculated for use as a cwnd value, or as a target inflight value for
+the end of the super-unity phase in bbr_is_next_cycle_phase() (both
+need to be bigger to ensure we can probe with more packets in flight
+reliably).
+
+This is a candidate fix for stable releases.
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Acked-by: Priyaranjan Jha <priyarjha@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -354,6 +354,10 @@ static u32 bbr_target_cwnd(struct sock *
+ /* Reduce delayed ACKs by rounding up cwnd to the next even number. */
+ cwnd = (cwnd + 1) & ~1U;
+
++ /* Ensure gain cycling gets inflight above BDP even for small BDPs. */
++ if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT)
++ cwnd += 2;
++
+ return cwnd;
+ }
+
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Date: Mon, 23 Jul 2018 23:36:04 +0900
+Subject: virtio_net: Fix incosistent received bytes counter
+
+From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+
+[ Upstream commit ecbc42ca5d665e9238a4cdb595024d2e6cf87f2d ]
+
+When received packets are dropped in virtio_net driver, received packets
+counter is incremented but bytes counter is not.
+As a result, for instance if we drop all packets by XDP, only received
+is counted and bytes stays 0, which looks inconsistent.
+IMHO received packets/bytes should be counted if packets are produced by
+the hypervisor, like what common NICs on physical machines are doing.
+So fix the bytes counter.
+
+Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 41 +++++++++++++++++++++++------------------
+ 1 file changed, 23 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -551,7 +551,8 @@ static struct sk_buff *receive_small(str
+ struct receive_queue *rq,
+ void *buf, void *ctx,
+ unsigned int len,
+- unsigned int *xdp_xmit)
++ unsigned int *xdp_xmit,
++ unsigned int *rbytes)
+ {
+ struct sk_buff *skb;
+ struct bpf_prog *xdp_prog;
+@@ -567,6 +568,7 @@ static struct sk_buff *receive_small(str
+ int err;
+
+ len -= vi->hdr_len;
++ *rbytes += len;
+
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(rq->xdp_prog);
+@@ -666,11 +668,13 @@ static struct sk_buff *receive_big(struc
+ struct virtnet_info *vi,
+ struct receive_queue *rq,
+ void *buf,
+- unsigned int len)
++ unsigned int len,
++ unsigned int *rbytes)
+ {
+ struct page *page = buf;
+ struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+
++ *rbytes += len - vi->hdr_len;
+ if (unlikely(!skb))
+ goto err;
+
+@@ -688,7 +692,8 @@ static struct sk_buff *receive_mergeable
+ void *buf,
+ void *ctx,
+ unsigned int len,
+- unsigned int *xdp_xmit)
++ unsigned int *xdp_xmit,
++ unsigned int *rbytes)
+ {
+ struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
+ u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
+@@ -702,6 +707,7 @@ static struct sk_buff *receive_mergeable
+ int err;
+
+ head_skb = NULL;
++ *rbytes += len - vi->hdr_len;
+
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(rq->xdp_prog);
+@@ -831,6 +837,7 @@ static struct sk_buff *receive_mergeable
+ goto err_buf;
+ }
+
++ *rbytes += len;
+ page = virt_to_head_page(buf);
+
+ truesize = mergeable_ctx_to_truesize(ctx);
+@@ -886,6 +893,7 @@ err_skb:
+ dev->stats.rx_length_errors++;
+ break;
+ }
++ *rbytes += len;
+ page = virt_to_head_page(buf);
+ put_page(page);
+ }
+@@ -896,14 +904,13 @@ xdp_xmit:
+ return NULL;
+ }
+
+-static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
+- void *buf, unsigned int len, void **ctx,
+- unsigned int *xdp_xmit)
++static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
++ void *buf, unsigned int len, void **ctx,
++ unsigned int *xdp_xmit, unsigned int *rbytes)
+ {
+ struct net_device *dev = vi->dev;
+ struct sk_buff *skb;
+ struct virtio_net_hdr_mrg_rxbuf *hdr;
+- int ret;
+
+ if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
+ pr_debug("%s: short packet %i\n", dev->name, len);
+@@ -915,23 +922,22 @@ static int receive_buf(struct virtnet_in
+ } else {
+ put_page(virt_to_head_page(buf));
+ }
+- return 0;
++ return;
+ }
+
+ if (vi->mergeable_rx_bufs)
+- skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit);
++ skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
++ rbytes);
+ else if (vi->big_packets)
+- skb = receive_big(dev, vi, rq, buf, len);
++ skb = receive_big(dev, vi, rq, buf, len, rbytes);
+ else
+- skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit);
++ skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, rbytes);
+
+ if (unlikely(!skb))
+- return 0;
++ return;
+
+ hdr = skb_vnet_hdr(skb);
+
+- ret = skb->len;
+-
+ if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+@@ -948,12 +954,11 @@ static int receive_buf(struct virtnet_in
+ ntohs(skb->protocol), skb->len, skb->pkt_type);
+
+ napi_gro_receive(&rq->napi, skb);
+- return ret;
++ return;
+
+ frame_err:
+ dev->stats.rx_frame_errors++;
+ dev_kfree_skb(skb);
+- return 0;
+ }
+
+ /* Unlike mergeable buffers, all buffers are allocated to the
+@@ -1203,13 +1208,13 @@ static int virtnet_receive(struct receiv
+
+ while (received < budget &&
+ (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
+- bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit);
++ receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &bytes);
+ received++;
+ }
+ } else {
+ while (received < budget &&
+ (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+- bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit);
++ receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &bytes);
+ received++;
+ }
+ }
--- /dev/null
+From foo@baz Wed Aug 1 08:19:18 CEST 2018
+From: Xiao Liang <xiliang@redhat.com>
+Date: Fri, 27 Jul 2018 17:56:08 +0800
+Subject: xen-netfront: wait xenbus state change when load module manually
+
+From: Xiao Liang <xiliang@redhat.com>
+
+[ Upstream commit 822fb18a82abaf4ee7058793d95d340f5dab7bfc ]
+
+When loading module manually, after call xenbus_switch_state to initializes
+the state of the netfront device, the driver state did not change so fast
+that may lead no dev created in latest kernel. This patch adds wait to make
+sure xenbus knows the driver is not in closed/unknown state.
+
+Current state:
+[vm]# ethtool eth0
+Settings for eth0:
+ Link detected: yes
+[vm]# modprobe -r xen_netfront
+[vm]# modprobe xen_netfront
+[vm]# ethtool eth0
+Settings for eth0:
+Cannot get device settings: No such device
+Cannot get wake-on-lan settings: No such device
+Cannot get message level: No such device
+Cannot get link status: No such device
+No data available
+
+With the patch installed.
+[vm]# ethtool eth0
+Settings for eth0:
+ Link detected: yes
+[vm]# modprobe -r xen_netfront
+[vm]# modprobe xen_netfront
+[vm]# ethtool eth0
+Settings for eth0:
+ Link detected: yes
+
+Signed-off-by: Xiao Liang <xiliang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netfront.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -87,6 +87,7 @@ struct netfront_cb {
+ /* IRQ name is queue name with "-tx" or "-rx" appended */
+ #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
+
++static DECLARE_WAIT_QUEUE_HEAD(module_load_q);
+ static DECLARE_WAIT_QUEUE_HEAD(module_unload_q);
+
+ struct netfront_stats {
+@@ -1330,6 +1331,11 @@ static struct net_device *xennet_create_
+ netif_carrier_off(netdev);
+
+ xenbus_switch_state(dev, XenbusStateInitialising);
++ wait_event(module_load_q,
++ xenbus_read_driver_state(dev->otherend) !=
++ XenbusStateClosed &&
++ xenbus_read_driver_state(dev->otherend) !=
++ XenbusStateUnknown);
+ return netdev;
+
+ exit: