--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:03 -0700
+Subject: hv_netvsc: Add initialization of tx_table in netvsc_device_add()
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit 6b0cbe315868d613123cf387052ccda5f09d49ea upstream. ]
+
+tx_table is part of the private data of kernel net_device. It is only
+zero-ed out when allocating net_device.
+
+We may recreate netvsc_device w/o recreating net_device, so the private
+netdev data, including tx_table, are not zeroed. It may contain channel
+numbers for the older netvsc_device.
+
+This patch adds initialization of tx_table each time we recreate
+netvsc_device.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -1252,6 +1252,9 @@ struct netvsc_device *netvsc_device_add(
+ if (!net_device)
+ return ERR_PTR(-ENOMEM);
+
++ for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
++ net_device_ctx->tx_table[i] = 0;
++
+ net_device->ring_size = ring_size;
+
+ /* Because the device uses NAPI, all the interrupt batching and
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:09 -0700
+Subject: hv_netvsc: avoid retry on send during shutdown
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit 12f69661a49446840d742d8feb593ace022d9f66 upstream. ]
+
+Change the initialization order so that the device is ready to transmit
+(ie connect vsp is completed) before setting the internal reference
+to the device with RCU.
+
+This avoids any races on initialization and prevents retry issues
+on shutdown.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 24 +++++++-----------------
+ 1 file changed, 7 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -850,13 +850,6 @@ int netvsc_send(struct net_device *ndev,
+ if (unlikely(!net_device || net_device->destroy))
+ return -ENODEV;
+
+- /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
+- * here before the negotiation with the host is finished and
+- * send_section_map may not be allocated yet.
+- */
+- if (unlikely(!net_device->send_section_map))
+- return -EAGAIN;
+-
+ nvchan = &net_device->chan_table[packet->q_idx];
+ packet->send_buf_index = NETVSC_INVALID_INDEX;
+ packet->cp_partial = false;
+@@ -864,10 +857,8 @@ int netvsc_send(struct net_device *ndev,
+ /* Send control message directly without accessing msd (Multi-Send
+ * Data) field which may be changed during data packet processing.
+ */
+- if (!skb) {
+- cur_send = packet;
+- goto send_now;
+- }
++ if (!skb)
++ return netvsc_send_pkt(device, packet, net_device, pb, skb);
+
+ /* batch packets in send buffer if possible */
+ msdp = &nvchan->msd;
+@@ -951,7 +942,6 @@ int netvsc_send(struct net_device *ndev,
+ }
+ }
+
+-send_now:
+ if (cur_send)
+ ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
+
+@@ -1308,11 +1298,6 @@ struct netvsc_device *netvsc_device_add(
+
+ napi_enable(&net_device->chan_table[0].napi);
+
+- /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
+- * populated.
+- */
+- rcu_assign_pointer(net_device_ctx->nvdev, net_device);
+-
+ /* Connect with the NetVsp */
+ ret = netvsc_connect_vsp(device, net_device, device_info);
+ if (ret != 0) {
+@@ -1321,6 +1306,11 @@ struct netvsc_device *netvsc_device_add(
+ goto close;
+ }
+
++ /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
++ * populated.
++ */
++ rcu_assign_pointer(net_device_ctx->nvdev, net_device);
++
+ return net_device;
+
+ close:
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:12 -0700
+Subject: hv_netvsc: cancel subchannel setup before halting device
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit a7483ec0267c69b34e818738da60b392623da94b upstream. ]
+
+Block setup of multiple channels earlier in the teardown
+process. This avoids possible races between halt and subchannel
+initialization.
+
+Suggested-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/rndis_filter.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1340,6 +1340,9 @@ void rndis_filter_device_remove(struct h
+ {
+ struct rndis_device *rndis_dev = net_dev->extension;
+
++ /* Don't try and setup sub channels if about to halt */
++ cancel_work_sync(&net_dev->subchan_work);
++
+ /* Halt and release the rndis device */
+ rndis_filter_halt_device(rndis_dev);
+
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:17 -0700
+Subject: hv_netvsc: change GPAD teardown order on older versions
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit 0ef58b0a05c127762f975c3dfe8b922e4aa87a29 upstream. ]
+
+On older versions of Windows, the host ignores messages after
+vmbus channel is closed.
+
+Workaround this by doing what Windows does and send the teardown
+before close on older versions of NVSP protocol.
+
+Reported-by: Mohammed Gamal <mgamal@redhat.com>
+Fixes: 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split")
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -570,10 +570,15 @@ void netvsc_device_remove(struct hv_devi
+ */
+ netdev_dbg(ndev, "net device safe to remove\n");
+
++ /* older versions require that buffer be revoked before close */
++ if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
++ netvsc_teardown_gpadl(device, net_device);
++
+ /* Now, we can close the channel safely */
+ vmbus_close(device->channel);
+
+- netvsc_teardown_gpadl(device, net_device);
++ if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
++ netvsc_teardown_gpadl(device, net_device);
+
+ /* Release all resources */
+ free_netvsc_device_rcu(net_device);
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:18 -0700
+Subject: hv_netvsc: common detach logic
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit 7b2ee50c0cd513a176a26a71f2989facdd75bfea upstream. ]
+
+Make common function for detaching internals of device
+during changes to MTU and RSS. Make sure no more packets
+are transmitted and all packets have been received before
+doing device teardown.
+
+Change the wait logic to be common and use usleep_range().
+
+Changes transmit enabling logic so that transmit queues are disabled
+during the period when lower device is being changed. And enabled
+only after sub channels are setup. This avoids issue where it could
+be that a packet was being sent while subchannel was not initialized.
+
+Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug")
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/hyperv_net.h | 1
+ drivers/net/hyperv/netvsc.c | 21 +-
+ drivers/net/hyperv/netvsc_drv.c | 280 +++++++++++++++++++++-----------------
+ drivers/net/hyperv/rndis_filter.c | 15 --
+ 4 files changed, 175 insertions(+), 142 deletions(-)
+
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -208,7 +208,6 @@ void netvsc_channel_cb(void *context);
+ int netvsc_poll(struct napi_struct *napi, int budget);
+
+ void rndis_set_subchannel(struct work_struct *w);
+-bool rndis_filter_opened(const struct netvsc_device *nvdev);
+ int rndis_filter_open(struct netvsc_device *nvdev);
+ int rndis_filter_close(struct netvsc_device *nvdev);
+ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -554,8 +554,6 @@ void netvsc_device_remove(struct hv_devi
+ = rtnl_dereference(net_device_ctx->nvdev);
+ int i;
+
+- cancel_work_sync(&net_device->subchan_work);
+-
+ netvsc_revoke_buf(device, net_device);
+
+ RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+@@ -644,13 +642,18 @@ static void netvsc_send_tx_complete(stru
+ queue_sends =
+ atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
+
+- if (net_device->destroy && queue_sends == 0)
+- wake_up(&net_device->wait_drain);
+-
+- if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
+- (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
+- queue_sends < 1))
+- netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
++ if (unlikely(net_device->destroy)) {
++ if (queue_sends == 0)
++ wake_up(&net_device->wait_drain);
++ } else {
++ struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
++
++ if (netif_tx_queue_stopped(txq) &&
++ (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
++ queue_sends < 1)) {
++ netif_tx_wake_queue(txq);
++ }
++ }
+ }
+
+ static void netvsc_send_completion(struct netvsc_device *net_device,
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -45,7 +45,10 @@
+
+ #include "hyperv_net.h"
+
+-#define RING_SIZE_MIN 64
++#define RING_SIZE_MIN 64
++#define RETRY_US_LO 5000
++#define RETRY_US_HI 10000
++#define RETRY_MAX 2000 /* >10 sec */
+
+ #define LINKCHANGE_INT (2 * HZ)
+ #define VF_TAKEOVER_INT (HZ / 10)
+@@ -89,10 +92,8 @@ static int netvsc_open(struct net_device
+ }
+
+ rdev = nvdev->extension;
+- if (!rdev->link_state) {
++ if (!rdev->link_state)
+ netif_carrier_on(net);
+- netif_tx_wake_all_queues(net);
+- }
+
+ if (vf_netdev) {
+ /* Setting synthetic device up transparently sets
+@@ -108,36 +109,25 @@ static int netvsc_open(struct net_device
+ return 0;
+ }
+
+-static int netvsc_close(struct net_device *net)
++static int netvsc_wait_until_empty(struct netvsc_device *nvdev)
+ {
+- struct net_device_context *net_device_ctx = netdev_priv(net);
+- struct net_device *vf_netdev
+- = rtnl_dereference(net_device_ctx->vf_netdev);
+- struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
+- int ret = 0;
+- u32 aread, i, msec = 10, retry = 0, retry_max = 20;
+- struct vmbus_channel *chn;
+-
+- netif_tx_disable(net);
+-
+- /* No need to close rndis filter if it is removed already */
+- if (!nvdev)
+- goto out;
+-
+- ret = rndis_filter_close(nvdev);
+- if (ret != 0) {
+- netdev_err(net, "unable to close device (ret %d).\n", ret);
+- return ret;
+- }
++ unsigned int retry = 0;
++ int i;
+
+ /* Ensure pending bytes in ring are read */
+- while (true) {
+- aread = 0;
++ for (;;) {
++ u32 aread = 0;
++
+ for (i = 0; i < nvdev->num_chn; i++) {
+- chn = nvdev->chan_table[i].channel;
++ struct vmbus_channel *chn
++ = nvdev->chan_table[i].channel;
++
+ if (!chn)
+ continue;
+
++ /* make sure receive not running now */
++ napi_synchronize(&nvdev->chan_table[i].napi);
++
+ aread = hv_get_bytes_to_read(&chn->inbound);
+ if (aread)
+ break;
+@@ -147,22 +137,40 @@ static int netvsc_close(struct net_devic
+ break;
+ }
+
+- retry++;
+- if (retry > retry_max || aread == 0)
+- break;
++ if (aread == 0)
++ return 0;
+
+- msleep(msec);
++ if (++retry > RETRY_MAX)
++ return -ETIMEDOUT;
+
+- if (msec < 1000)
+- msec *= 2;
++ usleep_range(RETRY_US_LO, RETRY_US_HI);
+ }
++}
+
+- if (aread) {
+- netdev_err(net, "Ring buffer not empty after closing rndis\n");
+- ret = -ETIMEDOUT;
++static int netvsc_close(struct net_device *net)
++{
++ struct net_device_context *net_device_ctx = netdev_priv(net);
++ struct net_device *vf_netdev
++ = rtnl_dereference(net_device_ctx->vf_netdev);
++ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
++ int ret;
++
++ netif_tx_disable(net);
++
++ /* No need to close rndis filter if it is removed already */
++ if (!nvdev)
++ return 0;
++
++ ret = rndis_filter_close(nvdev);
++ if (ret != 0) {
++ netdev_err(net, "unable to close device (ret %d).\n", ret);
++ return ret;
+ }
+
+-out:
++ ret = netvsc_wait_until_empty(nvdev);
++ if (ret)
++ netdev_err(net, "Ring buffer not empty after closing rndis\n");
++
+ if (vf_netdev)
+ dev_close(vf_netdev);
+
+@@ -820,16 +828,81 @@ static void netvsc_get_channels(struct n
+ }
+ }
+
++static int netvsc_detach(struct net_device *ndev,
++ struct netvsc_device *nvdev)
++{
++ struct net_device_context *ndev_ctx = netdev_priv(ndev);
++ struct hv_device *hdev = ndev_ctx->device_ctx;
++ int ret;
++
++ /* Don't try continuing to try and setup sub channels */
++ if (cancel_work_sync(&nvdev->subchan_work))
++ nvdev->num_chn = 1;
++
++ /* If device was up (receiving) then shutdown */
++ if (netif_running(ndev)) {
++ netif_tx_disable(ndev);
++
++ ret = rndis_filter_close(nvdev);
++ if (ret) {
++ netdev_err(ndev,
++ "unable to close device (ret %d).\n", ret);
++ return ret;
++ }
++
++ ret = netvsc_wait_until_empty(nvdev);
++ if (ret) {
++ netdev_err(ndev,
++ "Ring buffer not empty after closing rndis\n");
++ return ret;
++ }
++ }
++
++ netif_device_detach(ndev);
++
++ rndis_filter_device_remove(hdev, nvdev);
++
++ return 0;
++}
++
++static int netvsc_attach(struct net_device *ndev,
++ struct netvsc_device_info *dev_info)
++{
++ struct net_device_context *ndev_ctx = netdev_priv(ndev);
++ struct hv_device *hdev = ndev_ctx->device_ctx;
++ struct netvsc_device *nvdev;
++ struct rndis_device *rdev;
++ int ret;
++
++ nvdev = rndis_filter_device_add(hdev, dev_info);
++ if (IS_ERR(nvdev))
++ return PTR_ERR(nvdev);
++
++ /* Note: enable and attach happen when sub-channels setup */
++
++ netif_carrier_off(ndev);
++
++ if (netif_running(ndev)) {
++ ret = rndis_filter_open(nvdev);
++ if (ret)
++ return ret;
++
++ rdev = nvdev->extension;
++ if (!rdev->link_state)
++ netif_carrier_on(ndev);
++ }
++
++ return 0;
++}
++
+ static int netvsc_set_channels(struct net_device *net,
+ struct ethtool_channels *channels)
+ {
+ struct net_device_context *net_device_ctx = netdev_priv(net);
+- struct hv_device *dev = net_device_ctx->device_ctx;
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
+ unsigned int orig, count = channels->combined_count;
+ struct netvsc_device_info device_info;
+- bool was_opened;
+- int ret = 0;
++ int ret;
+
+ /* We do not support separate count for rx, tx, or other */
+ if (count == 0 ||
+@@ -846,9 +919,6 @@ static int netvsc_set_channels(struct ne
+ return -EINVAL;
+
+ orig = nvdev->num_chn;
+- was_opened = rndis_filter_opened(nvdev);
+- if (was_opened)
+- rndis_filter_close(nvdev);
+
+ memset(&device_info, 0, sizeof(device_info));
+ device_info.num_chn = count;
+@@ -858,28 +928,17 @@ static int netvsc_set_channels(struct ne
+ device_info.recv_sections = nvdev->recv_section_cnt;
+ device_info.recv_section_size = nvdev->recv_section_size;
+
+- rndis_filter_device_remove(dev, nvdev);
++ ret = netvsc_detach(net, nvdev);
++ if (ret)
++ return ret;
+
+- nvdev = rndis_filter_device_add(dev, &device_info);
+- if (IS_ERR(nvdev)) {
+- ret = PTR_ERR(nvdev);
++ ret = netvsc_attach(net, &device_info);
++ if (ret) {
+ device_info.num_chn = orig;
+- nvdev = rndis_filter_device_add(dev, &device_info);
+-
+- if (IS_ERR(nvdev)) {
+- netdev_err(net, "restoring channel setting failed: %ld\n",
+- PTR_ERR(nvdev));
+- return ret;
+- }
++ if (netvsc_attach(net, &device_info))
++ netdev_err(net, "restoring channel setting failed\n");
+ }
+
+- if (was_opened)
+- rndis_filter_open(nvdev);
+-
+- /* We may have missed link change notifications */
+- net_device_ctx->last_reconfig = 0;
+- schedule_delayed_work(&net_device_ctx->dwork, 0);
+-
+ return ret;
+ }
+
+@@ -946,10 +1005,8 @@ static int netvsc_change_mtu(struct net_
+ struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+- struct hv_device *hdev = ndevctx->device_ctx;
+ int orig_mtu = ndev->mtu;
+ struct netvsc_device_info device_info;
+- bool was_opened;
+ int ret = 0;
+
+ if (!nvdev || nvdev->destroy)
+@@ -962,11 +1019,6 @@ static int netvsc_change_mtu(struct net_
+ return ret;
+ }
+
+- netif_device_detach(ndev);
+- was_opened = rndis_filter_opened(nvdev);
+- if (was_opened)
+- rndis_filter_close(nvdev);
+-
+ memset(&device_info, 0, sizeof(device_info));
+ device_info.ring_size = ring_size;
+ device_info.num_chn = nvdev->num_chn;
+@@ -975,35 +1027,27 @@ static int netvsc_change_mtu(struct net_
+ device_info.recv_sections = nvdev->recv_section_cnt;
+ device_info.recv_section_size = nvdev->recv_section_size;
+
+- rndis_filter_device_remove(hdev, nvdev);
++ ret = netvsc_detach(ndev, nvdev);
++ if (ret)
++ goto rollback_vf;
+
+ ndev->mtu = mtu;
+
+- nvdev = rndis_filter_device_add(hdev, &device_info);
+- if (IS_ERR(nvdev)) {
+- ret = PTR_ERR(nvdev);
+-
+- /* Attempt rollback to original MTU */
+- ndev->mtu = orig_mtu;
+- nvdev = rndis_filter_device_add(hdev, &device_info);
+-
+- if (vf_netdev)
+- dev_set_mtu(vf_netdev, orig_mtu);
+-
+- if (IS_ERR(nvdev)) {
+- netdev_err(ndev, "restoring mtu failed: %ld\n",
+- PTR_ERR(nvdev));
+- return ret;
+- }
+- }
+-
+- if (was_opened)
+- rndis_filter_open(nvdev);
++ ret = netvsc_attach(ndev, &device_info);
++ if (ret)
++ goto rollback;
+
+- netif_device_attach(ndev);
++ return 0;
+
+- /* We may have missed link change notifications */
+- schedule_delayed_work(&ndevctx->dwork, 0);
++rollback:
++ /* Attempt rollback to original MTU */
++ ndev->mtu = orig_mtu;
++
++ if (netvsc_attach(ndev, &device_info))
++ netdev_err(ndev, "restoring mtu failed\n");
++rollback_vf:
++ if (vf_netdev)
++ dev_set_mtu(vf_netdev, orig_mtu);
+
+ return ret;
+ }
+@@ -1469,11 +1513,9 @@ static int netvsc_set_ringparam(struct n
+ {
+ struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+- struct hv_device *hdev = ndevctx->device_ctx;
+ struct netvsc_device_info device_info;
+ struct ethtool_ringparam orig;
+ u32 new_tx, new_rx;
+- bool was_opened;
+ int ret = 0;
+
+ if (!nvdev || nvdev->destroy)
+@@ -1499,34 +1541,18 @@ static int netvsc_set_ringparam(struct n
+ device_info.recv_sections = new_rx;
+ device_info.recv_section_size = nvdev->recv_section_size;
+
+- netif_device_detach(ndev);
+- was_opened = rndis_filter_opened(nvdev);
+- if (was_opened)
+- rndis_filter_close(nvdev);
+-
+- rndis_filter_device_remove(hdev, nvdev);
+-
+- nvdev = rndis_filter_device_add(hdev, &device_info);
+- if (IS_ERR(nvdev)) {
+- ret = PTR_ERR(nvdev);
++ ret = netvsc_detach(ndev, nvdev);
++ if (ret)
++ return ret;
+
++ ret = netvsc_attach(ndev, &device_info);
++ if (ret) {
+ device_info.send_sections = orig.tx_pending;
+ device_info.recv_sections = orig.rx_pending;
+- nvdev = rndis_filter_device_add(hdev, &device_info);
+- if (IS_ERR(nvdev)) {
+- netdev_err(ndev, "restoring ringparam failed: %ld\n",
+- PTR_ERR(nvdev));
+- return ret;
+- }
+- }
+
+- if (was_opened)
+- rndis_filter_open(nvdev);
+- netif_device_attach(ndev);
+-
+- /* We may have missed link change notifications */
+- ndevctx->last_reconfig = 0;
+- schedule_delayed_work(&ndevctx->dwork, 0);
++ if (netvsc_attach(ndev, &device_info))
++ netdev_err(ndev, "restoring ringparam failed");
++ }
+
+ return ret;
+ }
+@@ -2003,8 +2029,8 @@ no_net:
+ static int netvsc_remove(struct hv_device *dev)
+ {
+ struct net_device_context *ndev_ctx;
+- struct net_device *vf_netdev;
+- struct net_device *net;
++ struct net_device *vf_netdev, *net;
++ struct netvsc_device *nvdev;
+
+ net = hv_get_drvdata(dev);
+ if (net == NULL) {
+@@ -2014,10 +2040,14 @@ static int netvsc_remove(struct hv_devic
+
+ ndev_ctx = netdev_priv(net);
+
+- netif_device_detach(net);
+-
+ cancel_delayed_work_sync(&ndev_ctx->dwork);
+
++ rcu_read_lock();
++ nvdev = rcu_dereference(ndev_ctx->nvdev);
++
++ if (nvdev)
++ cancel_work_sync(&nvdev->subchan_work);
++
+ /*
+ * Call to the vsc driver to let it know that the device is being
+ * removed. Also blocks mtu and channel changes.
+@@ -2027,11 +2057,13 @@ static int netvsc_remove(struct hv_devic
+ if (vf_netdev)
+ netvsc_unregister_vf(vf_netdev);
+
++ if (nvdev)
++ rndis_filter_device_remove(dev, nvdev);
++
+ unregister_netdevice(net);
+
+- rndis_filter_device_remove(dev,
+- rtnl_dereference(ndev_ctx->nvdev));
+ rtnl_unlock();
++ rcu_read_unlock();
+
+ hv_set_drvdata(dev, NULL);
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1112,6 +1112,7 @@ void rndis_set_subchannel(struct work_st
+ for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
+ ndev_ctx->tx_table[i] = i % nvdev->num_chn;
+
++ netif_device_attach(ndev);
+ rtnl_unlock();
+ return;
+
+@@ -1122,6 +1123,8 @@ failed:
+
+ nvdev->max_chn = 1;
+ nvdev->num_chn = 1;
++
++ netif_device_attach(ndev);
+ unlock:
+ rtnl_unlock();
+ }
+@@ -1324,6 +1327,10 @@ out:
+ net_device->num_chn = 1;
+ }
+
++ /* No sub channels, device is ready */
++ if (net_device->num_chn == 1)
++ netif_device_attach(net);
++
+ return net_device;
+
+ err_dev_remv:
+@@ -1336,9 +1343,6 @@ void rndis_filter_device_remove(struct h
+ {
+ struct rndis_device *rndis_dev = net_dev->extension;
+
+- /* Don't try and setup sub channels if about to halt */
+- cancel_work_sync(&net_dev->subchan_work);
+-
+ /* Halt and release the rndis device */
+ rndis_filter_halt_device(rndis_dev);
+
+@@ -1368,8 +1372,3 @@ int rndis_filter_close(struct netvsc_dev
+
+ return rndis_filter_close_device(nvdev->extension);
+ }
+-
+-bool rndis_filter_opened(const struct netvsc_device *nvdev)
+-{
+- return atomic_read(&nvdev->open_cnt) > 0;
+-}
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:14 -0700
+Subject: hv_netvsc: defer queue selection to VF
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit b3bf5666a51068ad5ddd89a76ed877101ef3bc16 upstream. ]
+
+When VF is used for accelerated networking it will likely have
+more queues (and different policy) than the synthetic NIC.
+This patch defers the queue policy to the VF so that all the
+queues can be used. This impacts workloads like local generate UDP.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -283,8 +283,19 @@ static u16 netvsc_select_queue(struct ne
+ rcu_read_lock();
+ vf_netdev = rcu_dereference(ndc->vf_netdev);
+ if (vf_netdev) {
+- txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+- qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
++ const struct net_device_ops *vf_ops = vf_netdev->netdev_ops;
++
++ if (vf_ops->ndo_select_queue)
++ txq = vf_ops->ndo_select_queue(vf_netdev, skb,
++ accel_priv, fallback);
++ else
++ txq = fallback(vf_netdev, skb);
++
++ /* Record the queue selected by VF so that it can be
++ * used for common case where VF has more queues than
++ * the synthetic device.
++ */
++ qdisc_skb_cb(skb)->slave_dev_queue_mapping = txq;
+ } else {
+ txq = netvsc_pick_tx(ndev, skb);
+ }
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:15 -0700
+Subject: hv_netvsc: disable NAPI before channel close
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit 8348e0460ab1473f06c8b824699dd2eed3c1979d upstream. ]
+
+This makes sure that no CPU is still process packets when
+the channel is closed.
+
+Fixes: 76bb5db5c749 ("netvsc: fix use after free on module removal")
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -567,6 +567,10 @@ void netvsc_device_remove(struct hv_devi
+
+ RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+
++ /* And disassociate NAPI context from device */
++ for (i = 0; i < net_device->num_chn; i++)
++ netif_napi_del(&net_device->chan_table[i].napi);
++
+ /*
+ * At this point, no one should be accessing net_device
+ * except in here
+@@ -578,10 +582,6 @@ void netvsc_device_remove(struct hv_devi
+
+ netvsc_teardown_gpadl(device, net_device);
+
+- /* And dissassociate NAPI context from device */
+- for (i = 0; i < net_device->num_chn; i++)
+- netif_napi_del(&net_device->chan_table[i].napi);
+-
+ /* Release all resources */
+ free_netvsc_device_rcu(net_device);
+ }
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:07 -0700
+Subject: hv_netvsc: empty current transmit aggregation if flow blocked
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit cfd8afd986cdb59ea9adac873c5082498a1eb7c0 upstream. ]
+
+If the transmit queue is known full, then don't keep aggregating
+data. And the cp_partial flag which indicates that the current
+aggregation buffer is full can be folded in to avoid more
+conditionals.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/hyperv_net.h | 2 +-
+ drivers/net/hyperv/netvsc.c | 36 +++++++++++++++++++++---------------
+ drivers/net/hyperv/netvsc_drv.c | 2 +-
+ drivers/net/hyperv/rndis_filter.c | 3 +--
+ 4 files changed, 24 insertions(+), 19 deletions(-)
+
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -192,7 +192,7 @@ struct netvsc_device *netvsc_device_add(
+ const struct netvsc_device_info *info);
+ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx);
+ void netvsc_device_remove(struct hv_device *device);
+-int netvsc_send(struct net_device_context *ndc,
++int netvsc_send(struct net_device *net,
+ struct hv_netvsc_packet *packet,
+ struct rndis_message *rndis_msg,
+ struct hv_page_buffer *page_buffer,
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -700,13 +700,13 @@ static u32 netvsc_get_next_send_section(
+ return NETVSC_INVALID_INDEX;
+ }
+
+-static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+- unsigned int section_index,
+- u32 pend_size,
+- struct hv_netvsc_packet *packet,
+- struct rndis_message *rndis_msg,
+- struct hv_page_buffer *pb,
+- struct sk_buff *skb)
++static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
++ unsigned int section_index,
++ u32 pend_size,
++ struct hv_netvsc_packet *packet,
++ struct rndis_message *rndis_msg,
++ struct hv_page_buffer *pb,
++ bool xmit_more)
+ {
+ char *start = net_device->send_buf;
+ char *dest = start + (section_index * net_device->send_section_size)
+@@ -719,7 +719,8 @@ static u32 netvsc_copy_to_send_buf(struc
+ packet->page_buf_cnt;
+
+ /* Add padding */
+- if (skb->xmit_more && remain && !packet->cp_partial) {
++ remain = packet->total_data_buflen & (net_device->pkt_align - 1);
++ if (xmit_more && remain) {
+ padding = net_device->pkt_align - remain;
+ rndis_msg->msg_len += padding;
+ packet->total_data_buflen += padding;
+@@ -739,8 +740,6 @@ static u32 netvsc_copy_to_send_buf(struc
+ memset(dest, 0, padding);
+ msg_size += padding;
+ }
+-
+- return msg_size;
+ }
+
+ static inline int netvsc_send_pkt(
+@@ -828,12 +827,13 @@ static inline void move_pkt_msd(struct h
+ }
+
+ /* RCU already held by caller */
+-int netvsc_send(struct net_device_context *ndev_ctx,
++int netvsc_send(struct net_device *ndev,
+ struct hv_netvsc_packet *packet,
+ struct rndis_message *rndis_msg,
+ struct hv_page_buffer *pb,
+ struct sk_buff *skb)
+ {
++ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ struct netvsc_device *net_device
+ = rcu_dereference_bh(ndev_ctx->nvdev);
+ struct hv_device *device = ndev_ctx->device_ctx;
+@@ -844,8 +844,7 @@ int netvsc_send(struct net_device_contex
+ struct multi_send_data *msdp;
+ struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
+ struct sk_buff *msd_skb = NULL;
+- bool try_batch;
+- bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
++ bool try_batch, xmit_more;
+
+ /* If device is rescinded, return error and packet will get dropped. */
+ if (unlikely(!net_device || net_device->destroy))
+@@ -896,10 +895,17 @@ int netvsc_send(struct net_device_contex
+ }
+ }
+
++ /* Keep aggregating only if stack says more data is coming
++ * and not doing mixed modes send and not flow blocked
++ */
++ xmit_more = skb->xmit_more &&
++ !packet->cp_partial &&
++ !netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
++
+ if (section_index != NETVSC_INVALID_INDEX) {
+ netvsc_copy_to_send_buf(net_device,
+ section_index, msd_len,
+- packet, rndis_msg, pb, skb);
++ packet, rndis_msg, pb, xmit_more);
+
+ packet->send_buf_index = section_index;
+
+@@ -919,7 +925,7 @@ int netvsc_send(struct net_device_contex
+ if (msdp->skb)
+ dev_consume_skb_any(msdp->skb);
+
+- if (xmit_more && !packet->cp_partial) {
++ if (xmit_more) {
+ msdp->skb = skb;
+ msdp->pkt = packet;
+ msdp->count++;
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -614,7 +614,7 @@ static int netvsc_start_xmit(struct sk_b
+ /* timestamp packet in software */
+ skb_tx_timestamp(skb);
+
+- ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb);
++ ret = netvsc_send(net, packet, rndis_msg, pb, skb);
+ if (likely(ret == 0))
+ return NETDEV_TX_OK;
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -217,7 +217,6 @@ static int rndis_filter_send_request(str
+ struct hv_netvsc_packet *packet;
+ struct hv_page_buffer page_buf[2];
+ struct hv_page_buffer *pb = page_buf;
+- struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
+ int ret;
+
+ /* Setup the packet to send it */
+@@ -245,7 +244,7 @@ static int rndis_filter_send_request(str
+ }
+
+ rcu_read_lock_bh();
+- ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL);
++ ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL);
+ rcu_read_unlock_bh();
+
+ return ret;
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Mohammed Gamal <mgamal@redhat.com>
+Date: Mon, 14 May 2018 15:32:21 -0700
+Subject: hv_netvsc: Ensure correct teardown message sequence order
+
+From: Mohammed Gamal <mgamal@redhat.com>
+
+[ Commit a56d99d714665591fed8527b90eef21530ea61e0 upstream. ]
+
+Prior to commit 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split")
+the call sequence in netvsc_device_remove() was as follows (as
+implemented in netvsc_destroy_buf()):
+1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message
+2- Teardown receive buffer GPADL
+3- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message
+4- Teardown send buffer GPADL
+5- Close vmbus
+
+This didn't work for WS2016 hosts. Commit 0cf737808ae7
+("hv_netvsc: netvsc_teardown_gpadl() split") rearranged the
+teardown sequence as follows:
+1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message
+2- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message
+3- Close vmbus
+4- Teardown receive buffer GPADL
+5- Teardown send buffer GPADL
+
+That worked well for WS2016 hosts, but it prevented guests on older hosts from
+shutting down after changing network settings. Commit 0ef58b0a05c1
+("hv_netvsc: change GPAD teardown order on older versions") ensured the
+following message sequence for older hosts
+1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message
+2- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message
+3- Teardown receive buffer GPADL
+4- Teardown send buffer GPADL
+5- Close vmbus
+
+However, with this sequence calling `ip link set eth0 mtu 1000` hangs and the
+process becomes uninterruptible. On futher analysis it turns out that on tearing
+down the receive buffer GPADL the kernel is waiting indefinitely
+in vmbus_teardown_gpadl() for a completion to be signaled.
+
+Here is a snippet of where this occurs:
+int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
+{
+ struct vmbus_channel_gpadl_teardown *msg;
+ struct vmbus_channel_msginfo *info;
+ unsigned long flags;
+ int ret;
+
+ info = kmalloc(sizeof(*info) +
+ sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ init_completion(&info->waitevent);
+ info->waiting_channel = channel;
+[....]
+ ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown),
+ true);
+
+ if (ret)
+ goto post_msg_err;
+
+ wait_for_completion(&info->waitevent);
+[....]
+}
+
+The completion is signaled from vmbus_ongpadl_torndown(), which gets called when
+the corresponding message is received from the host, which apparently never happens
+in that case.
+This patch works around the issue by restoring the first mentioned message sequence
+for older hosts
+
+Fixes: 0ef58b0a05c1 ("hv_netvsc: change GPAD teardown order on older versions")
+Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -571,8 +571,17 @@ void netvsc_device_remove(struct hv_devi
+ = rtnl_dereference(net_device_ctx->nvdev);
+ int i;
+
++ /*
++ * Revoke receive buffer. If host is pre-Win2016 then tear down
++ * receive buffer GPADL. Do the same for send buffer.
++ */
+ netvsc_revoke_recv_buf(device, net_device);
++ if (vmbus_proto_version < VERSION_WIN10)
++ netvsc_teardown_recv_gpadl(device, net_device);
++
+ netvsc_revoke_send_buf(device, net_device);
++ if (vmbus_proto_version < VERSION_WIN10)
++ netvsc_teardown_send_gpadl(device, net_device);
+
+ RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+
+@@ -586,15 +595,13 @@ void netvsc_device_remove(struct hv_devi
+ */
+ netdev_dbg(ndev, "net device safe to remove\n");
+
+- /* older versions require that buffer be revoked before close */
+- if (vmbus_proto_version < VERSION_WIN10) {
+- netvsc_teardown_recv_gpadl(device, net_device);
+- netvsc_teardown_send_gpadl(device, net_device);
+- }
+-
+ /* Now, we can close the channel safely */
+ vmbus_close(device->channel);
+
++ /*
++ * If host is Win2016 or higher then we do the GPADL tear down
++ * here after VMBus is closed.
++ */
+ if (vmbus_proto_version >= VERSION_WIN10) {
+ netvsc_teardown_recv_gpadl(device, net_device);
+ netvsc_teardown_send_gpadl(device, net_device);
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:11 -0700
+Subject: hv_netvsc: fix error unwind handling if vmbus_open fails
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit fcfb4a00d1e514e8313277a01ef919de1113025b upstream. ]
+
+Need to delete NAPI association if vmbus_open fails.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -1288,7 +1288,6 @@ struct netvsc_device *netvsc_device_add(
+ net_device->chan_table);
+
+ if (ret != 0) {
+- netif_napi_del(&net_device->chan_table[0].napi);
+ netdev_err(ndev, "unable to open channel: %d\n", ret);
+ goto cleanup;
+ }
+@@ -1321,6 +1320,7 @@ close:
+ vmbus_close(device->channel);
+
+ cleanup:
++ netif_napi_del(&net_device->chan_table[0].napi);
+ free_netvsc_device(&net_device->rcu);
+
+ return ERR_PTR(ret);
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Mohammed Gamal <mgamal@redhat.com>
+Date: Mon, 14 May 2018 15:32:22 -0700
+Subject: hv_netvsc: Fix net device attach on older Windows hosts
+
+From: Mohammed Gamal <mgamal@redhat.com>
+
+[ Commit 55be9f25be1ca5bda75c39808fc77e42691bc07f upstream. ]
+
+On older windows hosts the net_device instance is returned to
+the caller of rndis_filter_device_add() without having the presence
+bit set first. This would cause any subsequent calls to network device
+operations (e.g. MTU change, channel change) to fail after the device
+is detached once, returning -ENODEV.
+
+Instead of returning the device instabce, we take the exit path where
+we call netif_device_attach()
+
+Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic")
+Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
+Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/rndis_filter.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1276,7 +1276,7 @@ struct netvsc_device *rndis_filter_devic
+ rndis_device->link_state ? "down" : "up");
+
+ if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
+- return net_device;
++ goto out;
+
+ rndis_filter_query_link_speed(rndis_device, net_device);
+
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:13 -0700
+Subject: hv_netvsc: fix race in napi poll when rescheduling
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit d64e38ae690e3337db0d38d9b149a193a1646c4b upstream. ]
+
+There is a race between napi_reschedule and re-enabling interrupts
+which could lead to missed host interrrupts. This occurs when
+interrupts are re-enabled (hv_end_read) and vmbus irq callback
+(netvsc_channel_cb) has already scheduled NAPI.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -1205,9 +1205,10 @@ int netvsc_poll(struct napi_struct *napi
+ if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
+ work_done < budget &&
+ napi_complete_done(napi, work_done) &&
+- hv_end_read(&channel->inbound)) {
++ hv_end_read(&channel->inbound) &&
++ napi_schedule_prep(napi)) {
+ hv_begin_read(&channel->inbound);
+- napi_reschedule(napi);
++ __napi_schedule(napi);
+ }
+
+ /* Driver may overshoot since multiple packets per descriptor */
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:00 -0700
+Subject: hv_netvsc: Fix the real number of queues of non-vRSS cases
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit 6450f8f269a9271985e4a8c13920b7e4cf21c0f3 upstream. ]
+
+For older hosts without multi-channel (vRSS) support, and some error
+cases, we still need to set the real number of queues to one.
+This patch adds this missing setting.
+
+Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug")
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -1932,6 +1932,12 @@ static int netvsc_probe(struct hv_device
+ /* We always need headroom for rndis header */
+ net->needed_headroom = RNDIS_AND_PPI_SIZE;
+
++ /* Initialize the number of queues to be 1, we may change it if more
++ * channels are offered later.
++ */
++ netif_set_real_num_tx_queues(net, 1);
++ netif_set_real_num_rx_queues(net, 1);
++
+ /* Notify the netvsc driver of the new device */
+ memset(&device_info, 0, sizeof(device_info));
+ device_info.ring_size = ring_size;
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Mon, 14 May 2018 15:32:05 -0700
+Subject: hv_netvsc: netvsc_teardown_gpadl() split
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+[ Commit 0cf737808ae7cb25e952be619db46b9147a92f46 upstream. ]
+
+It was found that in some cases host refuses to teardown GPADL for send/
+receive buffers (probably when some work with these buffere is scheduled or
+ongoing). Change the teardown logic to be:
+1) Send NVSP_MSG1_TYPE_REVOKE_* messages
+2) Close the channel
+3) Teardown GPADLs.
+This seems to work reliably.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 69 ++++++++++++++++++++++----------------------
+ 1 file changed, 36 insertions(+), 33 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -100,12 +100,11 @@ static void free_netvsc_device_rcu(struc
+ call_rcu(&nvdev->rcu, free_netvsc_device);
+ }
+
+-static void netvsc_destroy_buf(struct hv_device *device)
++static void netvsc_revoke_buf(struct hv_device *device,
++ struct netvsc_device *net_device)
+ {
+ struct nvsp_message *revoke_packet;
+ struct net_device *ndev = hv_get_drvdata(device);
+- struct net_device_context *ndc = netdev_priv(ndev);
+- struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
+ int ret;
+
+ /*
+@@ -148,28 +147,6 @@ static void netvsc_destroy_buf(struct hv
+ net_device->recv_section_cnt = 0;
+ }
+
+- /* Teardown the gpadl on the vsp end */
+- if (net_device->recv_buf_gpadl_handle) {
+- ret = vmbus_teardown_gpadl(device->channel,
+- net_device->recv_buf_gpadl_handle);
+-
+- /* If we failed here, we might as well return and have a leak
+- * rather than continue and a bugchk
+- */
+- if (ret != 0) {
+- netdev_err(ndev,
+- "unable to teardown receive buffer's gpadl\n");
+- return;
+- }
+- net_device->recv_buf_gpadl_handle = 0;
+- }
+-
+- if (net_device->recv_buf) {
+- /* Free up the receive buffer */
+- vfree(net_device->recv_buf);
+- net_device->recv_buf = NULL;
+- }
+-
+ /* Deal with the send buffer we may have setup.
+ * If we got a send section size, it means we received a
+ * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
+@@ -210,7 +187,35 @@ static void netvsc_destroy_buf(struct hv
+ }
+ net_device->send_section_cnt = 0;
+ }
+- /* Teardown the gpadl on the vsp end */
++}
++
++static void netvsc_teardown_gpadl(struct hv_device *device,
++ struct netvsc_device *net_device)
++{
++ struct net_device *ndev = hv_get_drvdata(device);
++ int ret;
++
++ if (net_device->recv_buf_gpadl_handle) {
++ ret = vmbus_teardown_gpadl(device->channel,
++ net_device->recv_buf_gpadl_handle);
++
++ /* If we failed here, we might as well return and have a leak
++ * rather than continue and a bugchk
++ */
++ if (ret != 0) {
++ netdev_err(ndev,
++ "unable to teardown receive buffer's gpadl\n");
++ return;
++ }
++ net_device->recv_buf_gpadl_handle = 0;
++ }
++
++ if (net_device->recv_buf) {
++ /* Free up the receive buffer */
++ vfree(net_device->recv_buf);
++ net_device->recv_buf = NULL;
++ }
++
+ if (net_device->send_buf_gpadl_handle) {
+ ret = vmbus_teardown_gpadl(device->channel,
+ net_device->send_buf_gpadl_handle);
+@@ -425,7 +430,8 @@ static int netvsc_init_buf(struct hv_dev
+ goto exit;
+
+ cleanup:
+- netvsc_destroy_buf(device);
++ netvsc_revoke_buf(device, net_device);
++ netvsc_teardown_gpadl(device, net_device);
+
+ exit:
+ return ret;
+@@ -544,11 +550,6 @@ cleanup:
+ return ret;
+ }
+
+-static void netvsc_disconnect_vsp(struct hv_device *device)
+-{
+- netvsc_destroy_buf(device);
+-}
+-
+ /*
+ * netvsc_device_remove - Callback when the root bus device is removed
+ */
+@@ -562,7 +563,7 @@ void netvsc_device_remove(struct hv_devi
+
+ cancel_work_sync(&net_device->subchan_work);
+
+- netvsc_disconnect_vsp(device);
++ netvsc_revoke_buf(device, net_device);
+
+ RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+
+@@ -575,6 +576,8 @@ void netvsc_device_remove(struct hv_devi
+ /* Now, we can close the channel safely */
+ vmbus_close(device->channel);
+
++ netvsc_teardown_gpadl(device, net_device);
++
+ /* And dissassociate NAPI context from device */
+ for (i = 0; i < net_device->num_chn; i++)
+ netif_napi_del(&net_device->chan_table[i].napi);
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:10 -0700
+Subject: hv_netvsc: only wake transmit queue if link is up
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit f4950e4586dfc957e0a28226eeb992ddc049b5a2 upstream. ]
+
+Don't wake transmit queues if link is not up yet.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -88,12 +88,11 @@ static int netvsc_open(struct net_device
+ return ret;
+ }
+
+- netif_tx_wake_all_queues(net);
+-
+ rdev = nvdev->extension;
+-
+- if (!rdev->link_state)
++ if (!rdev->link_state) {
+ netif_carrier_on(net);
++ netif_tx_wake_all_queues(net);
++ }
+
+ if (vf_netdev) {
+ /* Setting synthetic device up transparently sets
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Mon, 14 May 2018 15:32:06 -0700
+Subject: hv_netvsc: preserve hw_features on mtu/channels/ringparam changes
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+[ Commit aefd80e874e98a864915df5b7d90824a4340b450 upstream. ]
+
+rndis_filter_device_add() is called both from netvsc_probe() when we
+initially create the device and from set channels/mtu/ringparam
+routines where we basically remove the device and add it back.
+
+hw_features is reset in rndis_filter_device_add() and filled with
+host data. However, we lose all additional flags which are set outside
+of the driver, e.g. register_netdevice() adds NETIF_F_SOFT_FEATURES and
+many others.
+
+Unfortunately, calls to rndis_{query_hwcaps(), _set_offload_params()}
+calls cannot be avoided on every RNDIS reset: host expects us to set
+required features explicitly. Moreover, in theory hardware capabilities
+can change and we need to reflect the change in hw_features.
+
+Reset net->hw_features bits according to host data in
+rndis_netdev_set_hwcaps(), clear corresponding feature bits
+from net->features in case some features went missing (will never happen
+in real life I guess but let's be consistent).
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/hyperv_net.h | 4 +
+ drivers/net/hyperv/netvsc_drv.c | 2
+ drivers/net/hyperv/rndis_filter.c | 136 +++++++++++++++++++++-----------------
+ 3 files changed, 83 insertions(+), 59 deletions(-)
+
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -659,6 +659,10 @@ struct nvsp_message {
+ #define NETVSC_RECEIVE_BUFFER_ID 0xcafe
+ #define NETVSC_SEND_BUFFER_ID 0
+
++#define NETVSC_SUPPORTED_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | \
++ NETIF_F_TSO | NETIF_F_IPV6_CSUM | \
++ NETIF_F_TSO6)
++
+ #define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */
+ #define VRSS_CHANNEL_MAX 64
+ #define VRSS_CHANNEL_DEFAULT 8
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -1956,7 +1956,7 @@ static int netvsc_probe(struct hv_device
+
+ memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
+
+- /* hw_features computed in rndis_filter_device_add */
++ /* hw_features computed in rndis_netdev_set_hwcaps() */
+ net->features = net->hw_features |
+ NETIF_F_HIGHDMA | NETIF_F_SG |
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1131,69 +1131,20 @@ unlock:
+ rtnl_unlock();
+ }
+
+-struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+- struct netvsc_device_info *device_info)
++static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
++ struct netvsc_device *nvdev)
+ {
+- struct net_device *net = hv_get_drvdata(dev);
++ struct net_device *net = rndis_device->ndev;
+ struct net_device_context *net_device_ctx = netdev_priv(net);
+- struct netvsc_device *net_device;
+- struct rndis_device *rndis_device;
+ struct ndis_offload hwcaps;
+ struct ndis_offload_params offloads;
+- struct ndis_recv_scale_cap rsscap;
+- u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
+ unsigned int gso_max_size = GSO_MAX_SIZE;
+- u32 mtu, size;
+- const struct cpumask *node_cpu_mask;
+- u32 num_possible_rss_qs;
+- int i, ret;
+-
+- rndis_device = get_rndis_device();
+- if (!rndis_device)
+- return ERR_PTR(-ENODEV);
+-
+- /*
+- * Let the inner driver handle this first to create the netvsc channel
+- * NOTE! Once the channel is created, we may get a receive callback
+- * (RndisFilterOnReceive()) before this call is completed
+- */
+- net_device = netvsc_device_add(dev, device_info);
+- if (IS_ERR(net_device)) {
+- kfree(rndis_device);
+- return net_device;
+- }
+-
+- /* Initialize the rndis device */
+- net_device->max_chn = 1;
+- net_device->num_chn = 1;
+-
+- net_device->extension = rndis_device;
+- rndis_device->ndev = net;
+-
+- /* Send the rndis initialization message */
+- ret = rndis_filter_init_device(rndis_device, net_device);
+- if (ret != 0)
+- goto err_dev_remv;
+-
+- /* Get the MTU from the host */
+- size = sizeof(u32);
+- ret = rndis_filter_query_device(rndis_device, net_device,
+- RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
+- &mtu, &size);
+- if (ret == 0 && size == sizeof(u32) && mtu < net->mtu)
+- net->mtu = mtu;
+-
+- /* Get the mac address */
+- ret = rndis_filter_query_device_mac(rndis_device, net_device);
+- if (ret != 0)
+- goto err_dev_remv;
+-
+- memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
++ int ret;
+
+ /* Find HW offload capabilities */
+- ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps);
++ ret = rndis_query_hwcaps(rndis_device, nvdev, &hwcaps);
+ if (ret != 0)
+- goto err_dev_remv;
++ return ret;
+
+ /* A value of zero means "no change"; now turn on what we want. */
+ memset(&offloads, 0, sizeof(struct ndis_offload_params));
+@@ -1201,8 +1152,12 @@ struct netvsc_device *rndis_filter_devic
+ /* Linux does not care about IP checksum, always does in kernel */
+ offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED;
+
++ /* Reset previously set hw_features flags */
++ net->hw_features &= ~NETVSC_SUPPORTED_HW_FEATURES;
++ net_device_ctx->tx_checksum_mask = 0;
++
+ /* Compute tx offload settings based on hw capabilities */
+- net->hw_features = NETIF_F_RXCSUM;
++ net->hw_features |= NETIF_F_RXCSUM;
+
+ if ((hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_ALL_TCP4) == NDIS_TXCSUM_ALL_TCP4) {
+ /* Can checksum TCP */
+@@ -1246,10 +1201,75 @@ struct netvsc_device *rndis_filter_devic
+ }
+ }
+
++ /* In case some hw_features disappeared we need to remove them from
++ * net->features list as they're no longer supported.
++ */
++ net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features;
++
+ netif_set_gso_max_size(net, gso_max_size);
+
+- ret = rndis_filter_set_offload_params(net, net_device, &offloads);
+- if (ret)
++ ret = rndis_filter_set_offload_params(net, nvdev, &offloads);
++
++ return ret;
++}
++
++struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
++ struct netvsc_device_info *device_info)
++{
++ struct net_device *net = hv_get_drvdata(dev);
++ struct netvsc_device *net_device;
++ struct rndis_device *rndis_device;
++ struct ndis_recv_scale_cap rsscap;
++ u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
++ u32 mtu, size;
++ const struct cpumask *node_cpu_mask;
++ u32 num_possible_rss_qs;
++ int i, ret;
++
++ rndis_device = get_rndis_device();
++ if (!rndis_device)
++ return ERR_PTR(-ENODEV);
++
++ /* Let the inner driver handle this first to create the netvsc channel
++ * NOTE! Once the channel is created, we may get a receive callback
++ * (RndisFilterOnReceive()) before this call is completed
++ */
++ net_device = netvsc_device_add(dev, device_info);
++ if (IS_ERR(net_device)) {
++ kfree(rndis_device);
++ return net_device;
++ }
++
++ /* Initialize the rndis device */
++ net_device->max_chn = 1;
++ net_device->num_chn = 1;
++
++ net_device->extension = rndis_device;
++ rndis_device->ndev = net;
++
++ /* Send the rndis initialization message */
++ ret = rndis_filter_init_device(rndis_device, net_device);
++ if (ret != 0)
++ goto err_dev_remv;
++
++ /* Get the MTU from the host */
++ size = sizeof(u32);
++ ret = rndis_filter_query_device(rndis_device, net_device,
++ RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
++ &mtu, &size);
++ if (ret == 0 && size == sizeof(u32) && mtu < net->mtu)
++ net->mtu = mtu;
++
++ /* Get the mac address */
++ ret = rndis_filter_query_device_mac(rndis_device, net_device);
++ if (ret != 0)
++ goto err_dev_remv;
++
++ memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
++
++ /* Query and set hardware capabilities */
++ ret = rndis_netdev_set_hwcaps(rndis_device, net_device);
++ if (ret != 0)
+ goto err_dev_remv;
+
+ rndis_filter_query_device_link_status(rndis_device, net_device);
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:01 -0700
+Subject: hv_netvsc: Rename ind_table to rx_table
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit 47371300dfc269dd8d150e5b872bdbbda98ba809 upstream. ]
+
+Rename this variable because it is the Receive indirection
+table.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/hyperv_net.h | 2 +-
+ drivers/net/hyperv/netvsc_drv.c | 4 ++--
+ drivers/net/hyperv/rndis_filter.c | 6 +++---
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -179,7 +179,7 @@ struct rndis_device {
+
+ u8 hw_mac_adr[ETH_ALEN];
+ u8 rss_key[NETVSC_HASH_KEYLEN];
+- u16 ind_table[ITAB_NUM];
++ u16 rx_table[ITAB_NUM];
+ };
+
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -1378,7 +1378,7 @@ static int netvsc_get_rxfh(struct net_de
+ rndis_dev = ndev->extension;
+ if (indir) {
+ for (i = 0; i < ITAB_NUM; i++)
+- indir[i] = rndis_dev->ind_table[i];
++ indir[i] = rndis_dev->rx_table[i];
+ }
+
+ if (key)
+@@ -1408,7 +1408,7 @@ static int netvsc_set_rxfh(struct net_de
+ return -EINVAL;
+
+ for (i = 0; i < ITAB_NUM; i++)
+- rndis_dev->ind_table[i] = indir[i];
++ rndis_dev->rx_table[i] = indir[i];
+ }
+
+ if (!key) {
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -759,7 +759,7 @@ int rndis_filter_set_rss_param(struct rn
+ /* Set indirection table entries */
+ itab = (u32 *)(rssp + 1);
+ for (i = 0; i < ITAB_NUM; i++)
+- itab[i] = rdev->ind_table[i];
++ itab[i] = rdev->rx_table[i];
+
+ /* Set hask key values */
+ keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
+@@ -1284,8 +1284,8 @@ struct netvsc_device *rndis_filter_devic
+ net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
+
+ for (i = 0; i < ITAB_NUM; i++)
+- rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
+- net_device->num_chn);
++ rndis_device->rx_table[i] = ethtool_rxfh_indir_default(
++ i, net_device->num_chn);
+
+ atomic_set(&net_device->open_chn, 1);
+ vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:02 -0700
+Subject: hv_netvsc: Rename tx_send_table to tx_table
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit 39e91cfbf6f5fb26ba64cc2e8874372baf1671e7 upstream. ]
+
+Simplify the variable name: tx_send_table
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/hyperv_net.h | 2 +-
+ drivers/net/hyperv/netvsc.c | 2 +-
+ drivers/net/hyperv/netvsc_drv.c | 4 ++--
+ 3 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -734,7 +734,7 @@ struct net_device_context {
+
+ u32 tx_checksum_mask;
+
+- u32 tx_send_table[VRSS_SEND_TAB_SIZE];
++ u32 tx_table[VRSS_SEND_TAB_SIZE];
+
+ /* Ethtool settings */
+ bool udp4_l4_hash;
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -1107,7 +1107,7 @@ static void netvsc_send_table(struct hv_
+ nvmsg->msg.v5_msg.send_table.offset);
+
+ for (i = 0; i < count; i++)
+- net_device_ctx->tx_send_table[i] = tab[i];
++ net_device_ctx->tx_table[i] = tab[i];
+ }
+
+ static void netvsc_send_vf(struct net_device_context *net_device_ctx,
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -234,8 +234,8 @@ static inline int netvsc_get_tx_queue(st
+ struct sock *sk = skb->sk;
+ int q_idx;
+
+- q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) &
+- (VRSS_SEND_TAB_SIZE - 1)];
++ q_idx = ndc->tx_table[netvsc_get_hash(skb, ndc) &
++ (VRSS_SEND_TAB_SIZE - 1)];
+
+ /* If queue index changed record the new value */
+ if (q_idx != old_idx &&
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:04 -0700
+Subject: hv_netvsc: Set tx_table to equal weight after subchannels open
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit a6fb6aa3cfa9047b62653dbcfc9bcde6e2272b41 upstream. ]
+
+In some cases, like internal vSwitch, the host doesn't provide
+send indirection table updates. This patch sets the table to be
+equal weight after subchannels are all open. Otherwise, all workload
+will be on one TX channel.
+
+As tested, this patch has largely increased the throughput over
+internal vSwitch.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/rndis_filter.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1114,6 +1114,9 @@ void rndis_set_subchannel(struct work_st
+ netif_set_real_num_tx_queues(ndev, nvdev->num_chn);
+ netif_set_real_num_rx_queues(ndev, nvdev->num_chn);
+
++ for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
++ ndev_ctx->tx_table[i] = i % nvdev->num_chn;
++
+ rtnl_unlock();
+ return;
+
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Mohammed Gamal <mgamal@redhat.com>
+Date: Mon, 14 May 2018 15:32:20 -0700
+Subject: hv_netvsc: Split netvsc_revoke_buf() and netvsc_teardown_gpadl()
+
+From: Mohammed Gamal <mgamal@redhat.com>
+
+[ Commit 7992894c305eaf504d005529637ff8283d0a849d upstream. ]
+
+Split each of the functions into two for each of send/recv buffers.
+This will be needed in order to implement a fine-grained messaging
+sequence to the host so that we accommodate the requirements of
+different Windows versions
+
+Fixes: 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order on older versions")
+Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 46 ++++++++++++++++++++++++++++++++------------
+ 1 file changed, 34 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -105,11 +105,11 @@ static void free_netvsc_device_rcu(struc
+ call_rcu(&nvdev->rcu, free_netvsc_device);
+ }
+
+-static void netvsc_revoke_buf(struct hv_device *device,
+- struct netvsc_device *net_device)
++static void netvsc_revoke_recv_buf(struct hv_device *device,
++ struct netvsc_device *net_device)
+ {
+- struct nvsp_message *revoke_packet;
+ struct net_device *ndev = hv_get_drvdata(device);
++ struct nvsp_message *revoke_packet;
+ int ret;
+
+ /*
+@@ -151,6 +151,14 @@ static void netvsc_revoke_buf(struct hv_
+ }
+ net_device->recv_section_cnt = 0;
+ }
++}
++
++static void netvsc_revoke_send_buf(struct hv_device *device,
++ struct netvsc_device *net_device)
++{
++ struct net_device *ndev = hv_get_drvdata(device);
++ struct nvsp_message *revoke_packet;
++ int ret;
+
+ /* Deal with the send buffer we may have setup.
+ * If we got a send section size, it means we received a
+@@ -194,8 +202,8 @@ static void netvsc_revoke_buf(struct hv_
+ }
+ }
+
+-static void netvsc_teardown_gpadl(struct hv_device *device,
+- struct netvsc_device *net_device)
++static void netvsc_teardown_recv_gpadl(struct hv_device *device,
++ struct netvsc_device *net_device)
+ {
+ struct net_device *ndev = hv_get_drvdata(device);
+ int ret;
+@@ -214,6 +222,13 @@ static void netvsc_teardown_gpadl(struct
+ }
+ net_device->recv_buf_gpadl_handle = 0;
+ }
++}
++
++static void netvsc_teardown_send_gpadl(struct hv_device *device,
++ struct netvsc_device *net_device)
++{
++ struct net_device *ndev = hv_get_drvdata(device);
++ int ret;
+
+ if (net_device->send_buf_gpadl_handle) {
+ ret = vmbus_teardown_gpadl(device->channel,
+@@ -423,8 +438,10 @@ static int netvsc_init_buf(struct hv_dev
+ goto exit;
+
+ cleanup:
+- netvsc_revoke_buf(device, net_device);
+- netvsc_teardown_gpadl(device, net_device);
++ netvsc_revoke_recv_buf(device, net_device);
++ netvsc_revoke_send_buf(device, net_device);
++ netvsc_teardown_recv_gpadl(device, net_device);
++ netvsc_teardown_send_gpadl(device, net_device);
+
+ exit:
+ return ret;
+@@ -554,7 +571,8 @@ void netvsc_device_remove(struct hv_devi
+ = rtnl_dereference(net_device_ctx->nvdev);
+ int i;
+
+- netvsc_revoke_buf(device, net_device);
++ netvsc_revoke_recv_buf(device, net_device);
++ netvsc_revoke_send_buf(device, net_device);
+
+ RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+
+@@ -569,14 +587,18 @@ void netvsc_device_remove(struct hv_devi
+ netdev_dbg(ndev, "net device safe to remove\n");
+
+ /* older versions require that buffer be revoked before close */
+- if (vmbus_proto_version < VERSION_WIN10)
+- netvsc_teardown_gpadl(device, net_device);
++ if (vmbus_proto_version < VERSION_WIN10) {
++ netvsc_teardown_recv_gpadl(device, net_device);
++ netvsc_teardown_send_gpadl(device, net_device);
++ }
+
+ /* Now, we can close the channel safely */
+ vmbus_close(device->channel);
+
+- if (vmbus_proto_version >= VERSION_WIN10)
+- netvsc_teardown_gpadl(device, net_device);
++ if (vmbus_proto_version >= VERSION_WIN10) {
++ netvsc_teardown_recv_gpadl(device, net_device);
++ netvsc_teardown_send_gpadl(device, net_device);
++ }
+
+ /* Release all resources */
+ free_netvsc_device_rcu(net_device);
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:16 -0700
+Subject: hv_netvsc: use RCU to fix concurrent rx and queue changes
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit 02400fcee2542ee334a2394e0d9f6efd969fe782 upstream. ]
+
+The receive processing may continue to happen while the
+internal network device state is in RCU grace period.
+The internal RNDIS structure is associated with the
+internal netvsc_device structure; both have the same
+RCU lifetime.
+
+Defer freeing all associated parts until after grace
+period.
+
+Fixes: 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split")
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 17 ++++------------
+ drivers/net/hyperv/rndis_filter.c | 39 ++++++++++++++++----------------------
+ 2 files changed, 22 insertions(+), 34 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -89,6 +89,11 @@ static void free_netvsc_device(struct rc
+ = container_of(head, struct netvsc_device, rcu);
+ int i;
+
++ kfree(nvdev->extension);
++ vfree(nvdev->recv_buf);
++ vfree(nvdev->send_buf);
++ kfree(nvdev->send_section_map);
++
+ for (i = 0; i < VRSS_CHANNEL_MAX; i++)
+ vfree(nvdev->chan_table[i].mrc.slots);
+
+@@ -210,12 +215,6 @@ static void netvsc_teardown_gpadl(struct
+ net_device->recv_buf_gpadl_handle = 0;
+ }
+
+- if (net_device->recv_buf) {
+- /* Free up the receive buffer */
+- vfree(net_device->recv_buf);
+- net_device->recv_buf = NULL;
+- }
+-
+ if (net_device->send_buf_gpadl_handle) {
+ ret = vmbus_teardown_gpadl(device->channel,
+ net_device->send_buf_gpadl_handle);
+@@ -230,12 +229,6 @@ static void netvsc_teardown_gpadl(struct
+ }
+ net_device->send_buf_gpadl_handle = 0;
+ }
+- if (net_device->send_buf) {
+- /* Free up the send buffer */
+- vfree(net_device->send_buf);
+- net_device->send_buf = NULL;
+- }
+- kfree(net_device->send_section_map);
+ }
+
+ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -266,13 +266,23 @@ static void rndis_set_link_state(struct
+ }
+ }
+
+-static void rndis_filter_receive_response(struct rndis_device *dev,
+- struct rndis_message *resp)
++static void rndis_filter_receive_response(struct net_device *ndev,
++ struct netvsc_device *nvdev,
++ const struct rndis_message *resp)
+ {
++ struct rndis_device *dev = nvdev->extension;
+ struct rndis_request *request = NULL;
+ bool found = false;
+ unsigned long flags;
+- struct net_device *ndev = dev->ndev;
++
++ /* This should never happen, it means control message
++ * response received after device removed.
++ */
++ if (dev->state == RNDIS_DEV_UNINITIALIZED) {
++ netdev_err(ndev,
++ "got rndis message uninitialized\n");
++ return;
++ }
+
+ spin_lock_irqsave(&dev->request_lock, flags);
+ list_for_each_entry(request, &dev->req_list, list_ent) {
+@@ -353,7 +363,7 @@ static inline void *rndis_get_ppi(struct
+ }
+
+ static int rndis_filter_receive_data(struct net_device *ndev,
+- struct rndis_device *dev,
++ struct netvsc_device *nvdev,
+ struct rndis_message *msg,
+ struct vmbus_channel *channel,
+ void *data, u32 data_buflen)
+@@ -373,7 +383,7 @@ static int rndis_filter_receive_data(str
+ * should be the data packet size plus the trailer padding size
+ */
+ if (unlikely(data_buflen < rndis_pkt->data_len)) {
+- netdev_err(dev->ndev, "rndis message buffer "
++ netdev_err(ndev, "rndis message buffer "
+ "overflow detected (got %u, min %u)"
+ "...dropping this message!\n",
+ data_buflen, rndis_pkt->data_len);
+@@ -401,34 +411,20 @@ int rndis_filter_receive(struct net_devi
+ void *data, u32 buflen)
+ {
+ struct net_device_context *net_device_ctx = netdev_priv(ndev);
+- struct rndis_device *rndis_dev = net_dev->extension;
+ struct rndis_message *rndis_msg = data;
+
+- /* Make sure the rndis device state is initialized */
+- if (unlikely(!rndis_dev)) {
+- netif_err(net_device_ctx, rx_err, ndev,
+- "got rndis message but no rndis device!\n");
+- return NVSP_STAT_FAIL;
+- }
+-
+- if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
+- netif_err(net_device_ctx, rx_err, ndev,
+- "got rndis message uninitialized\n");
+- return NVSP_STAT_FAIL;
+- }
+-
+ if (netif_msg_rx_status(net_device_ctx))
+ dump_rndis_message(dev, rndis_msg);
+
+ switch (rndis_msg->ndis_msg_type) {
+ case RNDIS_MSG_PACKET:
+- return rndis_filter_receive_data(ndev, rndis_dev, rndis_msg,
++ return rndis_filter_receive_data(ndev, net_dev, rndis_msg,
+ channel, data, buflen);
+ case RNDIS_MSG_INIT_C:
+ case RNDIS_MSG_QUERY_C:
+ case RNDIS_MSG_SET_C:
+ /* completion msgs */
+- rndis_filter_receive_response(rndis_dev, rndis_msg);
++ rndis_filter_receive_response(ndev, net_dev, rndis_msg);
+ break;
+
+ case RNDIS_MSG_INDICATE:
+@@ -1349,7 +1345,6 @@ void rndis_filter_device_remove(struct h
+ net_dev->extension = NULL;
+
+ netvsc_device_remove(dev);
+- kfree(rndis_dev);
+ }
+
+ int rndis_filter_open(struct netvsc_device *nvdev)
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:08 -0700
+Subject: hv_netvsc: Use the num_online_cpus() for channel limit
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit 25a39f7f975c3c26a0052fbf9b59201c06744332 upstream. ]
+
+Since we no longer localize channel/CPU affiliation within one NUMA
+node, num_online_cpus() is used as the number of channel cap, instead of
+the number of processors in a NUMA node.
+
+This patch allows a bigger range for tuning the number of channels.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/rndis_filter.c | 11 ++---------
+ 1 file changed, 2 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1221,7 +1221,6 @@ struct netvsc_device *rndis_filter_devic
+ struct ndis_recv_scale_cap rsscap;
+ u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
+ u32 mtu, size;
+- const struct cpumask *node_cpu_mask;
+ u32 num_possible_rss_qs;
+ int i, ret;
+
+@@ -1290,14 +1289,8 @@ struct netvsc_device *rndis_filter_devic
+ if (ret || rsscap.num_recv_que < 2)
+ goto out;
+
+- /*
+- * We will limit the VRSS channels to the number CPUs in the NUMA node
+- * the primary channel is currently bound to.
+- *
+- * This also guarantees that num_possible_rss_qs <= num_online_cpus
+- */
+- node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu));
+- num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask),
++ /* This guarantees that num_possible_rss_qs <= num_online_cpus */
++ num_possible_rss_qs = min_t(u32, num_online_cpus(),
+ rsscap.num_recv_que);
+
+ net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs);
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Mohammed Gamal <mgamal@redhat.com>
+Date: Mon, 14 May 2018 15:32:19 -0700
+Subject: hv_netvsc: Use Windows version instead of NVSP version on GPAD teardown
+
+From: Mohammed Gamal <mgamal@redhat.com>
+
+commit 2afc5d61a7197de25a61f54ea4ecfb4cb62b1d42A upstram
+
+When changing network interface settings, Windows guests
+older than WS2016 can no longer shutdown. This was addressed
+by commit 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order
+on older versions"), however the issue also occurs on WS2012
+guests that share NVSP protocol versions with WS2016 guests.
+Hence we use Windows version directly to differentiate them.
+
+Fixes: 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order on older versions")
+Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -569,13 +569,13 @@ void netvsc_device_remove(struct hv_devi
+ netdev_dbg(ndev, "net device safe to remove\n");
+
+ /* older versions require that buffer be revoked before close */
+- if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
++ if (vmbus_proto_version < VERSION_WIN10)
+ netvsc_teardown_gpadl(device, net_device);
+
+ /* Now, we can close the channel safely */
+ vmbus_close(device->channel);
+
+- if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
++ if (vmbus_proto_version >= VERSION_WIN10)
+ netvsc_teardown_gpadl(device, net_device);
+
+ /* Release all resources */
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Amritha Nambiar <amritha.nambiar@intel.com>
+Date: Thu, 17 May 2018 14:50:44 -0700
+Subject: net: Fix a bug in removing queues from XPS map
+
+From: Amritha Nambiar <amritha.nambiar@intel.com>
+
+[ Upstream commit 6358d49ac23995fdfe157cc8747ab0f274d3954b ]
+
+While removing queues from the XPS map, the individual CPU ID
+alone was used to index the CPUs map, this should be changed to also
+factor in the traffic class mapping for the CPU-to-queue lookup.
+
+Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes")
+Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
+Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2058,7 +2058,7 @@ static bool remove_xps_queue_cpu(struct
+ int i, j;
+
+ for (i = count, j = offset; i--; j++) {
+- if (!remove_xps_queue(dev_maps, cpu, j))
++ if (!remove_xps_queue(dev_maps, tci, j))
+ break;
+ }
+
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Tarick Bedeir <tarick@google.com>
+Date: Sun, 13 May 2018 16:38:45 -0700
+Subject: net/mlx4_core: Fix error handling in mlx4_init_port_info.
+
+From: Tarick Bedeir <tarick@google.com>
+
+[ Upstream commit 57f6f99fdad9984801cde05c1db68fe39b474a10 ]
+
+Avoid exiting the function with a lingering sysfs file (if the first
+call to device_create_file() fails while the second succeeds), and avoid
+calling devlink_port_unregister() twice.
+
+In other words, either mlx4_init_port_info() succeeds and returns zero, or
+it fails, returns non-zero, and requires no cleanup.
+
+Fixes: 096335b3f983 ("mlx4_core: Allow dynamic MTU configuration for IB ports")
+Signed-off-by: Tarick Bedeir <tarick@google.com>
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/main.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/main.c
++++ b/drivers/net/ethernet/mellanox/mlx4/main.c
+@@ -3007,6 +3007,7 @@ static int mlx4_init_port_info(struct ml
+ mlx4_err(dev, "Failed to create file for port %d\n", port);
+ devlink_port_unregister(&info->devlink_port);
+ info->port = -1;
++ return err;
+ }
+
+ sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
+@@ -3028,9 +3029,10 @@ static int mlx4_init_port_info(struct ml
+ &info->port_attr);
+ devlink_port_unregister(&info->devlink_port);
+ info->port = -1;
++ return err;
+ }
+
+- return err;
++ return 0;
+ }
+
+ static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Wed, 16 May 2018 12:54:29 +0200
+Subject: net/sched: fix refcnt leak in the error path of tcf_vlan_init()
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit 5a4931ae0193f8a4a97e8260fd0df1d705d83299 ]
+
+Similarly to what was done with commit a52956dfc503 ("net sched actions:
+fix refcnt leak in skbmod"), fix the error path of tcf_vlan_init() to avoid
+refcnt leaks when wrong value of TCA_VLAN_PUSH_VLAN_PROTOCOL is given.
+
+Fixes: 5026c9b1bafc ("net sched: vlan action fix late binding")
+CC: Roman Mashak <mrv@mojatatu.com>
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_vlan.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/sched/act_vlan.c
++++ b/net/sched/act_vlan.c
+@@ -154,6 +154,8 @@ static int tcf_vlan_init(struct net *net
+ case htons(ETH_P_8021AD):
+ break;
+ default:
++ if (exists)
++ tcf_idr_release(*a, bind);
+ return -EPROTONOSUPPORT;
+ }
+ } else {
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 18 May 2018 14:51:44 +0200
+Subject: net: sched: red: avoid hashing NULL child
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 44a63b137f7b6e4c7bd6c9cc21615941cb36509d ]
+
+Hangbin reported an Oops triggered by the syzkaller qdisc rules:
+
+ kasan: GPF could be caused by NULL-ptr deref or user memory access
+ general protection fault: 0000 [#1] SMP KASAN PTI
+ Modules linked in: sch_red
+ CPU: 0 PID: 28699 Comm: syz-executor5 Not tainted 4.17.0-rc4.kcov #1
+ Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+ RIP: 0010:qdisc_hash_add+0x26/0xa0
+ RSP: 0018:ffff8800589cf470 EFLAGS: 00010203
+ RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff824ad971
+ RDX: 0000000000000007 RSI: ffffc9000ce9f000 RDI: 000000000000003c
+ RBP: 0000000000000001 R08: ffffed000b139ea2 R09: ffff8800589cf4f0
+ R10: ffff8800589cf50f R11: ffffed000b139ea2 R12: ffff880054019fc0
+ R13: ffff880054019fb4 R14: ffff88005c0af600 R15: ffff880054019fb0
+ FS: 00007fa6edcb1700(0000) GS:ffff88005ce00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000020000740 CR3: 000000000fc16000 CR4: 00000000000006f0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ red_change+0x2d2/0xed0 [sch_red]
+ qdisc_create+0x57e/0xef0
+ tc_modify_qdisc+0x47f/0x14e0
+ rtnetlink_rcv_msg+0x6a8/0x920
+ netlink_rcv_skb+0x2a2/0x3c0
+ netlink_unicast+0x511/0x740
+ netlink_sendmsg+0x825/0xc30
+ sock_sendmsg+0xc5/0x100
+ ___sys_sendmsg+0x778/0x8e0
+ __sys_sendmsg+0xf5/0x1b0
+ do_syscall_64+0xbd/0x3b0
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ RIP: 0033:0x450869
+ RSP: 002b:00007fa6edcb0c48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+ RAX: ffffffffffffffda RBX: 00007fa6edcb16b4 RCX: 0000000000450869
+ RDX: 0000000000000000 RSI: 00000000200000c0 RDI: 0000000000000013
+ RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
+ R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
+ R13: 0000000000008778 R14: 0000000000702838 R15: 00007fa6edcb1700
+ Code: e9 0b fe ff ff 0f 1f 44 00 00 55 53 48 89 fb 89 f5 e8 3f 07 f3 fe 48 8d 7b 3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 51
+ RIP: qdisc_hash_add+0x26/0xa0 RSP: ffff8800589cf470
+
+When a red qdisc is updated with a 0 limit, the child qdisc is left
+unmodified, no additional scheduler is created in red_change(),
+the 'child' local variable is rightfully NULL and must not add it
+to the hash table.
+
+This change addresses the above issue moving qdisc_hash_add() right
+after the child qdisc creation. It additionally removes unneeded checks
+for noop_qdisc.
+
+Reported-by: Hangbin Liu <liuhangbin@gmail.com>
+Fixes: 49b499718fa1 ("net: sched: make default fifo qdiscs appear in the dump")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_red.c | 5 +++--
+ net/sched/sch_tbf.c | 5 +++--
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_red.c
++++ b/net/sched/sch_red.c
+@@ -191,10 +191,11 @@ static int red_change(struct Qdisc *sch,
+ child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
+ if (IS_ERR(child))
+ return PTR_ERR(child);
+- }
+
+- if (child != &noop_qdisc)
++ /* child is fifo, no need to check for noop_qdisc */
+ qdisc_hash_add(child, true);
++ }
++
+ sch_tree_lock(sch);
+ q->flags = ctl->flags;
+ q->limit = ctl->limit;
+--- a/net/sched/sch_tbf.c
++++ b/net/sched/sch_tbf.c
+@@ -388,6 +388,9 @@ static int tbf_change(struct Qdisc *sch,
+ err = PTR_ERR(child);
+ goto done;
+ }
++
++ /* child is fifo, no need to check for noop_qdisc */
++ qdisc_hash_add(child, true);
+ }
+
+ sch_tree_lock(sch);
+@@ -396,8 +399,6 @@ static int tbf_change(struct Qdisc *sch,
+ q->qdisc->qstats.backlog);
+ qdisc_destroy(q->qdisc);
+ q->qdisc = child;
+- if (child != &noop_qdisc)
+- qdisc_hash_add(child, true);
+ }
+ q->limit = qopt->limit;
+ if (tb[TCA_TBF_PBURST])
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Eric Biggers <ebiggers@google.com>
+Date: Sun, 13 May 2018 17:01:30 -0700
+Subject: net/smc: check for missing nlattrs in SMC_PNETID messages
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit d49baa7e12ee70c0a7b821d088a770c94c02e494 ]
+
+It's possible to crash the kernel in several different ways by sending
+messages to the SMC_PNETID generic netlink family that are missing the
+expected attributes:
+
+- Missing SMC_PNETID_NAME => null pointer dereference when comparing
+ names.
+- Missing SMC_PNETID_ETHNAME => null pointer dereference accessing
+ smc_pnetentry::ndev.
+- Missing SMC_PNETID_IBNAME => null pointer dereference accessing
+ smc_pnetentry::smcibdev.
+- Missing SMC_PNETID_IBPORT => out of bounds array access to
+ smc_ib_device::pattr[-1].
+
+Fix it by validating that all expected attributes are present and that
+SMC_PNETID_IBPORT is nonzero.
+
+Reported-by: syzbot+5cd61039dc9b8bfa6e47@syzkaller.appspotmail.com
+Fixes: 6812baabf24d ("smc: establish pnet table management")
+Cc: <stable@vger.kernel.org> # v4.11+
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/smc/smc_pnet.c | 71 +++++++++++++++++++++++++++++------------------------
+ 1 file changed, 40 insertions(+), 31 deletions(-)
+
+--- a/net/smc/smc_pnet.c
++++ b/net/smc/smc_pnet.c
+@@ -245,40 +245,45 @@ out:
+ static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
+ struct nlattr *tb[])
+ {
+- char *string, *ibname = NULL;
+- int rc = 0;
++ char *string, *ibname;
++ int rc;
+
+ memset(pnetelem, 0, sizeof(*pnetelem));
+ INIT_LIST_HEAD(&pnetelem->list);
+- if (tb[SMC_PNETID_NAME]) {
+- string = (char *)nla_data(tb[SMC_PNETID_NAME]);
+- if (!smc_pnetid_valid(string, pnetelem->pnet_name)) {
+- rc = -EINVAL;
+- goto error;
+- }
+- }
+- if (tb[SMC_PNETID_ETHNAME]) {
+- string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
+- pnetelem->ndev = dev_get_by_name(net, string);
+- if (!pnetelem->ndev)
+- return -ENOENT;
+- }
+- if (tb[SMC_PNETID_IBNAME]) {
+- ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+- ibname = strim(ibname);
+- pnetelem->smcibdev = smc_pnet_find_ib(ibname);
+- if (!pnetelem->smcibdev) {
+- rc = -ENOENT;
+- goto error;
+- }
+- }
+- if (tb[SMC_PNETID_IBPORT]) {
+- pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+- if (pnetelem->ib_port > SMC_MAX_PORTS) {
+- rc = -EINVAL;
+- goto error;
+- }
+- }
++
++ rc = -EINVAL;
++ if (!tb[SMC_PNETID_NAME])
++ goto error;
++ string = (char *)nla_data(tb[SMC_PNETID_NAME]);
++ if (!smc_pnetid_valid(string, pnetelem->pnet_name))
++ goto error;
++
++ rc = -EINVAL;
++ if (!tb[SMC_PNETID_ETHNAME])
++ goto error;
++ rc = -ENOENT;
++ string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
++ pnetelem->ndev = dev_get_by_name(net, string);
++ if (!pnetelem->ndev)
++ goto error;
++
++ rc = -EINVAL;
++ if (!tb[SMC_PNETID_IBNAME])
++ goto error;
++ rc = -ENOENT;
++ ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
++ ibname = strim(ibname);
++ pnetelem->smcibdev = smc_pnet_find_ib(ibname);
++ if (!pnetelem->smcibdev)
++ goto error;
++
++ rc = -EINVAL;
++ if (!tb[SMC_PNETID_IBPORT])
++ goto error;
++ pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
++ if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
++ goto error;
++
+ return 0;
+
+ error:
+@@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff *
+ void *hdr;
+ int rc;
+
++ if (!info->attrs[SMC_PNETID_NAME])
++ return -EINVAL;
+ pnetelem = smc_pnet_find_pnetid(
+ (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
+ if (!pnetelem)
+@@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff *
+
+ static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
+ {
++ if (!info->attrs[SMC_PNETID_NAME])
++ return -EINVAL;
+ return smc_pnet_remove_by_pnetid(
+ (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
+ }
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Willem de Bruijn <willemb@google.com>
+Date: Thu, 17 May 2018 13:13:29 -0400
+Subject: net: test tailroom before appending to linear skb
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit 113f99c3358564a0647d444c2ae34e8b1abfd5b9 ]
+
+Device features may change during transmission. In particular with
+corking, a device may toggle scatter-gather in between allocating
+and writing to an skb.
+
+Do not unconditionally assume that !NETIF_F_SG at write time implies
+that the same held at alloc time and thus the skb has sufficient
+tailroom.
+
+This issue predates git history.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_output.c | 3 ++-
+ net/ipv6/ip6_output.c | 3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1040,7 +1040,8 @@ alloc_new_skb:
+ if (copy > length)
+ copy = length;
+
+- if (!(rt->dst.dev->features&NETIF_F_SG)) {
++ if (!(rt->dst.dev->features&NETIF_F_SG) &&
++ skb_tailroom(skb) >= copy) {
+ unsigned int off;
+
+ off = skb->len;
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1488,7 +1488,8 @@ alloc_new_skb:
+ if (copy > length)
+ copy = length;
+
+- if (!(rt->dst.dev->features&NETIF_F_SG)) {
++ if (!(rt->dst.dev->features&NETIF_F_SG) &&
++ skb_tailroom(skb) >= copy) {
+ unsigned int off;
+
+ off = skb->len;
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Willem de Bruijn <willemb@google.com>
+Date: Fri, 11 May 2018 13:24:25 -0400
+Subject: packet: in packet_snd start writing at link layer allocation
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit b84bbaf7a6c8cca24f8acf25a2c8e46913a947ba ]
+
+Packet sockets allow construction of packets shorter than
+dev->hard_header_len to accommodate protocols with variable length
+link layer headers. These packets are padded to dev->hard_header_len,
+because some device drivers interpret that as a minimum packet size.
+
+packet_snd reserves dev->hard_header_len bytes on allocation.
+SOCK_DGRAM sockets call skb_push in dev_hard_header() to ensure that
+link layer headers are stored in the reserved range. SOCK_RAW sockets
+do the same in tpacket_snd, but not in packet_snd.
+
+Syzbot was able to send a zero byte packet to a device with massive
+116B link layer header, causing padding to cross over into skb_shinfo.
+Fix this by writing from the start of the llheader reserved range also
+in the case of packet_snd/SOCK_RAW.
+
+Update skb_set_network_header to the new offset. This also corrects
+it for SOCK_DGRAM, where it incorrectly double counted reserve due to
+the skb_push in dev_hard_header.
+
+Fixes: 9ed988cd5915 ("packet: validate variable length ll headers")
+Reported-by: syzbot+71d74a5406d02057d559@syzkaller.appspotmail.com
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2912,13 +2912,15 @@ static int packet_snd(struct socket *soc
+ if (skb == NULL)
+ goto out_unlock;
+
+- skb_set_network_header(skb, reserve);
++ skb_reset_network_header(skb);
+
+ err = -EINVAL;
+ if (sock->type == SOCK_DGRAM) {
+ offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
+ if (unlikely(offset < 0))
+ goto out_free;
++ } else if (reserve) {
++ skb_push(skb, reserve);
+ }
+
+ /* Returns -EFAULT on error */
--- /dev/null
+net-fix-a-bug-in-removing-queues-from-xps-map.patch
+net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch
+net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch
+net-sched-red-avoid-hashing-null-child.patch
+net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch
+net-test-tailroom-before-appending-to-linear-skb.patch
+packet-in-packet_snd-start-writing-at-link-layer-allocation.patch
+sock_diag-fix-use-after-free-read-in-__sk_free.patch
+tcp-purge-write-queue-in-tcp_connect_init.patch
+vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch
+vmxnet3-use-dma-memory-barriers-where-required.patch
+hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch
+hv_netvsc-rename-ind_table-to-rx_table.patch
+hv_netvsc-rename-tx_send_table-to-tx_table.patch
+hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch
+hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch
+hv_netvsc-netvsc_teardown_gpadl-split.patch
+hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch
+hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch
+hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch
+hv_netvsc-avoid-retry-on-send-during-shutdown.patch
+hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch
+hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch
+hv_netvsc-cancel-subchannel-setup-before-halting-device.patch
+hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch
+hv_netvsc-defer-queue-selection-to-vf.patch
+hv_netvsc-disable-napi-before-channel-close.patch
+hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch
+hv_netvsc-change-gpad-teardown-order-on-older-versions.patch
+hv_netvsc-common-detach-logic.patch
+hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch
+hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch
+hv_netvsc-ensure-correct-teardown-message-sequence-order.patch
+hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch
+sparc-vio-use-put_device-instead-of-kfree.patch
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 18 May 2018 04:47:55 -0700
+Subject: sock_diag: fix use-after-free read in __sk_free
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9709020c86f6bf8439ca3effc58cfca49a5de192 ]
+
+We must not call sock_diag_has_destroy_listeners(sk) on a socket
+that has no reference on net structure.
+
+BUG: KASAN: use-after-free in sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline]
+BUG: KASAN: use-after-free in __sk_free+0x329/0x340 net/core/sock.c:1609
+Read of size 8 at addr ffff88018a02e3a0 by task swapper/1/0
+
+CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.17.0-rc5+ #54
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1b9/0x294 lib/dump_stack.c:113
+ print_address_description+0x6c/0x20b mm/kasan/report.c:256
+ kasan_report_error mm/kasan/report.c:354 [inline]
+ kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
+ __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
+ sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline]
+ __sk_free+0x329/0x340 net/core/sock.c:1609
+ sk_free+0x42/0x50 net/core/sock.c:1623
+ sock_put include/net/sock.h:1664 [inline]
+ reqsk_free include/net/request_sock.h:116 [inline]
+ reqsk_put include/net/request_sock.h:124 [inline]
+ inet_csk_reqsk_queue_drop_and_put net/ipv4/inet_connection_sock.c:672 [inline]
+ reqsk_timer_handler+0xe27/0x10e0 net/ipv4/inet_connection_sock.c:739
+ call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
+ expire_timers kernel/time/timer.c:1363 [inline]
+ __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
+ run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
+ __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
+ invoke_softirq kernel/softirq.c:365 [inline]
+ irq_exit+0x1d1/0x200 kernel/softirq.c:405
+ exiting_irq arch/x86/include/asm/apic.h:525 [inline]
+ smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
+ apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
+ </IRQ>
+RIP: 0010:native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:54
+RSP: 0018:ffff8801d9ae7c38 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13
+RAX: dffffc0000000000 RBX: 1ffff1003b35cf8a RCX: 0000000000000000
+RDX: 1ffffffff11a30d0 RSI: 0000000000000001 RDI: ffffffff88d18680
+RBP: ffff8801d9ae7c38 R08: ffffed003b5e46c3 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001
+R13: ffff8801d9ae7cf0 R14: ffffffff897bef20 R15: 0000000000000000
+ arch_safe_halt arch/x86/include/asm/paravirt.h:94 [inline]
+ default_idle+0xc2/0x440 arch/x86/kernel/process.c:354
+ arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:345
+ default_idle_call+0x6d/0x90 kernel/sched/idle.c:93
+ cpuidle_idle_call kernel/sched/idle.c:153 [inline]
+ do_idle+0x395/0x560 kernel/sched/idle.c:262
+ cpu_startup_entry+0x104/0x120 kernel/sched/idle.c:368
+ start_secondary+0x426/0x5b0 arch/x86/kernel/smpboot.c:269
+ secondary_startup_64+0xa5/0xb0 arch/x86/kernel/head_64.S:242
+
+Allocated by task 4557:
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:448
+ set_track mm/kasan/kasan.c:460 [inline]
+ kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
+ kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490
+ kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554
+ kmem_cache_zalloc include/linux/slab.h:691 [inline]
+ net_alloc net/core/net_namespace.c:383 [inline]
+ copy_net_ns+0x159/0x4c0 net/core/net_namespace.c:423
+ create_new_namespaces+0x69d/0x8f0 kernel/nsproxy.c:107
+ unshare_nsproxy_namespaces+0xc3/0x1f0 kernel/nsproxy.c:206
+ ksys_unshare+0x708/0xf90 kernel/fork.c:2408
+ __do_sys_unshare kernel/fork.c:2476 [inline]
+ __se_sys_unshare kernel/fork.c:2474 [inline]
+ __x64_sys_unshare+0x31/0x40 kernel/fork.c:2474
+ do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+Freed by task 69:
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:448
+ set_track mm/kasan/kasan.c:460 [inline]
+ __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
+ kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
+ __cache_free mm/slab.c:3498 [inline]
+ kmem_cache_free+0x86/0x2d0 mm/slab.c:3756
+ net_free net/core/net_namespace.c:399 [inline]
+ net_drop_ns.part.14+0x11a/0x130 net/core/net_namespace.c:406
+ net_drop_ns net/core/net_namespace.c:405 [inline]
+ cleanup_net+0x6a1/0xb20 net/core/net_namespace.c:541
+ process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
+ worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
+ kthread+0x345/0x410 kernel/kthread.c:240
+ ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
+
+The buggy address belongs to the object at ffff88018a02c140
+ which belongs to the cache net_namespace of size 8832
+The buggy address is located 8800 bytes inside of
+ 8832-byte region [ffff88018a02c140, ffff88018a02e3c0)
+The buggy address belongs to the page:
+page:ffffea0006280b00 count:1 mapcount:0 mapping:ffff88018a02c140 index:0x0 compound_mapcount: 0
+flags: 0x2fffc0000008100(slab|head)
+raw: 02fffc0000008100 ffff88018a02c140 0000000000000000 0000000100000001
+raw: ffffea00062a1320 ffffea0006268020 ffff8801d9bdde40 0000000000000000
+page dumped because: kasan: bad access detected
+
+Fixes: b922622ec6ef ("sock_diag: don't broadcast kernel sockets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Craig Gallek <kraig@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1595,7 +1595,7 @@ void sk_destruct(struct sock *sk)
+
+ static void __sk_free(struct sock *sk)
+ {
+- if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
++ if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
+ sock_diag_broadcast_destroy(sk);
+ else
+ sk_destruct(sk);
--- /dev/null
+From 00ad691ab140b54ab9f5de5e74cb994f552e8124 Mon Sep 17 00:00:00 2001
+From: Arvind Yadav <arvind.yadav.cs@gmail.com>
+Date: Wed, 25 Apr 2018 20:26:14 +0530
+Subject: sparc: vio: use put_device() instead of kfree()
+
+From: Arvind Yadav <arvind.yadav.cs@gmail.com>
+
+[ Upstream commit 00ad691ab140b54ab9f5de5e74cb994f552e8124 ]
+
+Never directly free @dev after calling device_register(), even
+if it returned an error. Always use put_device() to give up the
+reference initialized.
+
+Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/vio.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/sparc/kernel/vio.c
++++ b/arch/sparc/kernel/vio.c
+@@ -403,7 +403,7 @@ static struct vio_dev *vio_create_one(st
+ if (err) {
+ printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
+ dev_name(&vdev->dev), err);
+- kfree(vdev);
++ put_device(&vdev->dev);
+ return NULL;
+ }
+ if (vdev->dp)
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 14 May 2018 21:14:26 -0700
+Subject: tcp: purge write queue in tcp_connect_init()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7f582b248d0a86bae5788c548d7bb5bca6f7691a ]
+
+syzkaller found a reliable way to crash the host, hitting a BUG()
+in __tcp_retransmit_skb()
+
+Malicous MSG_FASTOPEN is the root cause. We need to purge write queue
+in tcp_connect_init() at the point we init snd_una/write_seq.
+
+This patch also replaces the BUG() by a less intrusive WARN_ON_ONCE()
+
+kernel BUG at net/ipv4/tcp_output.c:2837!
+invalid opcode: 0000 [#1] SMP KASAN
+Dumping ftrace buffer:
+ (ftrace buffer empty)
+Modules linked in:
+CPU: 0 PID: 5276 Comm: syz-executor0 Not tainted 4.17.0-rc3+ #51
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:__tcp_retransmit_skb+0x2992/0x2eb0 net/ipv4/tcp_output.c:2837
+RSP: 0000:ffff8801dae06ff8 EFLAGS: 00010206
+RAX: ffff8801b9fe61c0 RBX: 00000000ffc18a16 RCX: ffffffff864e1a49
+RDX: 0000000000000100 RSI: ffffffff864e2e12 RDI: 0000000000000005
+RBP: ffff8801dae073a0 R08: ffff8801b9fe61c0 R09: ffffed0039c40dd2
+R10: ffffed0039c40dd2 R11: ffff8801ce206e93 R12: 00000000421eeaad
+R13: ffff8801ce206d4e R14: ffff8801ce206cc0 R15: ffff8801cd4f4a80
+FS: 0000000000000000(0000) GS:ffff8801dae00000(0063) knlGS:00000000096bc900
+CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033
+CR2: 0000000020000000 CR3: 00000001c47b6000 CR4: 00000000001406f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <IRQ>
+ tcp_retransmit_skb+0x2e/0x250 net/ipv4/tcp_output.c:2923
+ tcp_retransmit_timer+0xc50/0x3060 net/ipv4/tcp_timer.c:488
+ tcp_write_timer_handler+0x339/0x960 net/ipv4/tcp_timer.c:573
+ tcp_write_timer+0x111/0x1d0 net/ipv4/tcp_timer.c:593
+ call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
+ expire_timers kernel/time/timer.c:1363 [inline]
+ __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
+ run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
+ __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
+ invoke_softirq kernel/softirq.c:365 [inline]
+ irq_exit+0x1d1/0x200 kernel/softirq.c:405
+ exiting_irq arch/x86/include/asm/apic.h:525 [inline]
+ smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
+ apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
+
+Fixes: cf60af03ca4e ("net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2814,8 +2814,10 @@ int __tcp_retransmit_skb(struct sock *sk
+ return -EBUSY;
+
+ if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
+- if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+- BUG();
++ if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
++ WARN_ON_ONCE(1);
++ return -EINVAL;
++ }
+ if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
+ return -ENOMEM;
+ }
+@@ -3312,6 +3314,7 @@ static void tcp_connect_init(struct sock
+ sock_reset_flag(sk, SOCK_DONE);
+ tp->snd_wnd = 0;
+ tcp_init_wl(tp, 0);
++ tcp_write_queue_purge(sk);
+ tp->snd_una = tp->write_seq;
+ tp->snd_sml = tp->write_seq;
+ tp->snd_up = tp->write_seq;
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+Date: Mon, 14 May 2018 08:14:34 -0400
+Subject: vmxnet3: set the DMA mask before the first DMA map operation
+
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+
+[ Upstream commit 61aeecea40afb2b89933e27cd4adb10fc2e75cfd ]
+
+The DMA mask must be set before, not after, the first DMA map operation, or
+the first DMA map operation could in theory fail on some systems.
+
+Fixes: b0eb57cb97e78 ("VMXNET3: Add support for virtual IOMMU")
+Signed-off-by: Regis Duchesne <hpreg@vmware.com>
+Acked-by: Ronak Doshi <doshir@vmware.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c | 50 +++++++++++++++++++-------------------
+ 1 file changed, 25 insertions(+), 25 deletions(-)
+
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -2675,7 +2675,7 @@ vmxnet3_set_mac_addr(struct net_device *
+ /* ==================== initialization and cleanup routines ============ */
+
+ static int
+-vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
++vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter)
+ {
+ int err;
+ unsigned long mmio_start, mmio_len;
+@@ -2687,30 +2687,12 @@ vmxnet3_alloc_pci_resources(struct vmxne
+ return err;
+ }
+
+- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
+- if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
+- dev_err(&pdev->dev,
+- "pci_set_consistent_dma_mask failed\n");
+- err = -EIO;
+- goto err_set_mask;
+- }
+- *dma64 = true;
+- } else {
+- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
+- dev_err(&pdev->dev,
+- "pci_set_dma_mask failed\n");
+- err = -EIO;
+- goto err_set_mask;
+- }
+- *dma64 = false;
+- }
+-
+ err = pci_request_selected_regions(pdev, (1 << 2) - 1,
+ vmxnet3_driver_name);
+ if (err) {
+ dev_err(&pdev->dev,
+ "Failed to request region for adapter: error %d\n", err);
+- goto err_set_mask;
++ goto err_enable_device;
+ }
+
+ pci_set_master(pdev);
+@@ -2738,7 +2720,7 @@ err_bar1:
+ iounmap(adapter->hw_addr0);
+ err_ioremap:
+ pci_release_selected_regions(pdev, (1 << 2) - 1);
+-err_set_mask:
++err_enable_device:
+ pci_disable_device(pdev);
+ return err;
+ }
+@@ -3243,7 +3225,7 @@ vmxnet3_probe_device(struct pci_dev *pde
+ #endif
+ };
+ int err;
+- bool dma64 = false; /* stupid gcc */
++ bool dma64;
+ u32 ver;
+ struct net_device *netdev;
+ struct vmxnet3_adapter *adapter;
+@@ -3289,6 +3271,24 @@ vmxnet3_probe_device(struct pci_dev *pde
+ adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
+ adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
+
++ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
++ if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
++ dev_err(&pdev->dev,
++ "pci_set_consistent_dma_mask failed\n");
++ err = -EIO;
++ goto err_set_mask;
++ }
++ dma64 = true;
++ } else {
++ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
++ dev_err(&pdev->dev,
++ "pci_set_dma_mask failed\n");
++ err = -EIO;
++ goto err_set_mask;
++ }
++ dma64 = false;
++ }
++
+ spin_lock_init(&adapter->cmd_lock);
+ adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
+ sizeof(struct vmxnet3_adapter),
+@@ -3296,7 +3296,7 @@ vmxnet3_probe_device(struct pci_dev *pde
+ if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
+ dev_err(&pdev->dev, "Failed to map dma\n");
+ err = -EFAULT;
+- goto err_dma_map;
++ goto err_set_mask;
+ }
+ adapter->shared = dma_alloc_coherent(
+ &adapter->pdev->dev,
+@@ -3347,7 +3347,7 @@ vmxnet3_probe_device(struct pci_dev *pde
+ }
+ #endif /* VMXNET3_RSS */
+
+- err = vmxnet3_alloc_pci_resources(adapter, &dma64);
++ err = vmxnet3_alloc_pci_resources(adapter);
+ if (err < 0)
+ goto err_alloc_pci;
+
+@@ -3493,7 +3493,7 @@ err_alloc_queue_desc:
+ err_alloc_shared:
+ dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
+ sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
+-err_dma_map:
++err_set_mask:
+ free_netdev(netdev);
+ return err;
+ }
--- /dev/null
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+Date: Mon, 14 May 2018 08:14:49 -0400
+Subject: vmxnet3: use DMA memory barriers where required
+
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+
+[ Upstream commit f3002c1374fb2367c9d8dbb28852791ef90d2bac ]
+
+The gen bits must be read first from (resp. written last to) DMA memory.
+The proper way to enforce this on Linux is to call dma_rmb() (resp.
+dma_wmb()).
+
+Signed-off-by: Regis Duchesne <hpreg@vmware.com>
+Acked-by: Ronak Doshi <doshir@vmware.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx
+
+ gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
+ while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
++ /* Prevent any &gdesc->tcd field from being (speculatively)
++ * read before (&gdesc->tcd)->gen is read.
++ */
++ dma_rmb();
++
+ completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
+ &gdesc->tcd), tq, adapter->pdev,
+ adapter);
+@@ -1099,6 +1104,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, str
+ gdesc->txd.tci = skb_vlan_tag_get(skb);
+ }
+
++ /* Ensure that the write to (&gdesc->txd)->gen will be observed after
++ * all other writes to &gdesc->txd.
++ */
++ dma_wmb();
++
+ /* finally flips the GEN bit of the SOP desc. */
+ gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
+ VMXNET3_TXD_GEN);
+@@ -1286,6 +1296,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx
+ */
+ break;
+ }
++
++ /* Prevent any rcd field from being (speculatively) read before
++ * rcd->gen is read.
++ */
++ dma_rmb();
++
+ BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 &&
+ rcd->rqID != rq->dataRingQid);
+ idx = rcd->rxdIdx;
+@@ -1515,6 +1531,12 @@ rcd_done:
+ ring->next2comp = idx;
+ num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
+ ring = rq->rx_ring + ring_idx;
++
++ /* Ensure that the writes to rxd->gen bits will be observed
++ * after all other writes to rxd objects.
++ */
++ dma_wmb();
++
+ while (num_to_alloc) {
+ vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
+ &rxCmdDesc);