]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 22 May 2018 18:12:36 +0000 (20:12 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 22 May 2018 18:12:36 +0000 (20:12 +0200)
added patches:
hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch
hv_netvsc-avoid-retry-on-send-during-shutdown.patch
hv_netvsc-cancel-subchannel-setup-before-halting-device.patch
hv_netvsc-change-gpad-teardown-order-on-older-versions.patch
hv_netvsc-common-detach-logic.patch
hv_netvsc-defer-queue-selection-to-vf.patch
hv_netvsc-disable-napi-before-channel-close.patch
hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch
hv_netvsc-ensure-correct-teardown-message-sequence-order.patch
hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch
hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch
hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch
hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch
hv_netvsc-netvsc_teardown_gpadl-split.patch
hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch
hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch
hv_netvsc-rename-ind_table-to-rx_table.patch
hv_netvsc-rename-tx_send_table-to-tx_table.patch
hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch
hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch
hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch
hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch
hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch
net-fix-a-bug-in-removing-queues-from-xps-map.patch
net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch
net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch
net-sched-red-avoid-hashing-null-child.patch
net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch
net-test-tailroom-before-appending-to-linear-skb.patch
packet-in-packet_snd-start-writing-at-link-layer-allocation.patch
sock_diag-fix-use-after-free-read-in-__sk_free.patch
sparc-vio-use-put_device-instead-of-kfree.patch
tcp-purge-write-queue-in-tcp_connect_init.patch
vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch
vmxnet3-use-dma-memory-barriers-where-required.patch

36 files changed:
queue-4.14/hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-avoid-retry-on-send-during-shutdown.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-cancel-subchannel-setup-before-halting-device.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-change-gpad-teardown-order-on-older-versions.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-common-detach-logic.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-defer-queue-selection-to-vf.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-disable-napi-before-channel-close.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-ensure-correct-teardown-message-sequence-order.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-netvsc_teardown_gpadl-split.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-rename-ind_table-to-rx_table.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-rename-tx_send_table-to-tx_table.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch [new file with mode: 0644]
queue-4.14/hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch [new file with mode: 0644]
queue-4.14/net-fix-a-bug-in-removing-queues-from-xps-map.patch [new file with mode: 0644]
queue-4.14/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch [new file with mode: 0644]
queue-4.14/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch [new file with mode: 0644]
queue-4.14/net-sched-red-avoid-hashing-null-child.patch [new file with mode: 0644]
queue-4.14/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch [new file with mode: 0644]
queue-4.14/net-test-tailroom-before-appending-to-linear-skb.patch [new file with mode: 0644]
queue-4.14/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch [new file with mode: 0644]
queue-4.14/series [new file with mode: 0644]
queue-4.14/sock_diag-fix-use-after-free-read-in-__sk_free.patch [new file with mode: 0644]
queue-4.14/sparc-vio-use-put_device-instead-of-kfree.patch [new file with mode: 0644]
queue-4.14/tcp-purge-write-queue-in-tcp_connect_init.patch [new file with mode: 0644]
queue-4.14/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch [new file with mode: 0644]
queue-4.14/vmxnet3-use-dma-memory-barriers-where-required.patch [new file with mode: 0644]

diff --git a/queue-4.14/hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch b/queue-4.14/hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch
new file mode 100644 (file)
index 0000000..4f4c832
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:03 -0700
+Subject: hv_netvsc: Add initialization of tx_table in netvsc_device_add()
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit 6b0cbe315868d613123cf387052ccda5f09d49ea upstream. ]
+
+tx_table is part of the private data of kernel net_device. It is only
+zero-ed out when allocating net_device.
+
+We may recreate netvsc_device w/o recreating net_device, so the private
+netdev data, including tx_table, are not zeroed. It may contain channel
+numbers for the older netvsc_device.
+
+This patch adds initialization of tx_table each time we recreate
+netvsc_device.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -1252,6 +1252,9 @@ struct netvsc_device *netvsc_device_add(
+       if (!net_device)
+               return ERR_PTR(-ENOMEM);
++      for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
++              net_device_ctx->tx_table[i] = 0;
++
+       net_device->ring_size = ring_size;
+       /* Because the device uses NAPI, all the interrupt batching and
diff --git a/queue-4.14/hv_netvsc-avoid-retry-on-send-during-shutdown.patch b/queue-4.14/hv_netvsc-avoid-retry-on-send-during-shutdown.patch
new file mode 100644 (file)
index 0000000..87359d7
--- /dev/null
@@ -0,0 +1,84 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:09 -0700
+Subject: hv_netvsc: avoid retry on send during shutdown
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit 12f69661a49446840d742d8feb593ace022d9f66 upstream. ]
+
+Change the initialization order so that the device is ready to transmit
+(ie connect vsp is completed) before setting the internal reference
+to the device with RCU.
+
+This avoids any races on initialization and prevents retry issues
+on shutdown.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c |   24 +++++++-----------------
+ 1 file changed, 7 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -850,13 +850,6 @@ int netvsc_send(struct net_device *ndev,
+       if (unlikely(!net_device || net_device->destroy))
+               return -ENODEV;
+-      /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
+-       * here before the negotiation with the host is finished and
+-       * send_section_map may not be allocated yet.
+-       */
+-      if (unlikely(!net_device->send_section_map))
+-              return -EAGAIN;
+-
+       nvchan = &net_device->chan_table[packet->q_idx];
+       packet->send_buf_index = NETVSC_INVALID_INDEX;
+       packet->cp_partial = false;
+@@ -864,10 +857,8 @@ int netvsc_send(struct net_device *ndev,
+       /* Send control message directly without accessing msd (Multi-Send
+        * Data) field which may be changed during data packet processing.
+        */
+-      if (!skb) {
+-              cur_send = packet;
+-              goto send_now;
+-      }
++      if (!skb)
++              return netvsc_send_pkt(device, packet, net_device, pb, skb);
+       /* batch packets in send buffer if possible */
+       msdp = &nvchan->msd;
+@@ -951,7 +942,6 @@ int netvsc_send(struct net_device *ndev,
+               }
+       }
+-send_now:
+       if (cur_send)
+               ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
+@@ -1308,11 +1298,6 @@ struct netvsc_device *netvsc_device_add(
+       napi_enable(&net_device->chan_table[0].napi);
+-      /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
+-       * populated.
+-       */
+-      rcu_assign_pointer(net_device_ctx->nvdev, net_device);
+-
+       /* Connect with the NetVsp */
+       ret = netvsc_connect_vsp(device, net_device, device_info);
+       if (ret != 0) {
+@@ -1321,6 +1306,11 @@ struct netvsc_device *netvsc_device_add(
+               goto close;
+       }
++      /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
++       * populated.
++       */
++      rcu_assign_pointer(net_device_ctx->nvdev, net_device);
++
+       return net_device;
+ close:
diff --git a/queue-4.14/hv_netvsc-cancel-subchannel-setup-before-halting-device.patch b/queue-4.14/hv_netvsc-cancel-subchannel-setup-before-halting-device.patch
new file mode 100644 (file)
index 0000000..b08e59a
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:12 -0700
+Subject: hv_netvsc: cancel subchannel setup before halting device
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit a7483ec0267c69b34e818738da60b392623da94b upstream. ]
+
+Block setup of multiple channels earlier in the teardown
+process. This avoids possible races between halt and subchannel
+initialization.
+
+Suggested-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/rndis_filter.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1340,6 +1340,9 @@ void rndis_filter_device_remove(struct h
+ {
+       struct rndis_device *rndis_dev = net_dev->extension;
++      /* Don't try and setup sub channels if about to halt */
++      cancel_work_sync(&net_dev->subchan_work);
++
+       /* Halt and release the rndis device */
+       rndis_filter_halt_device(rndis_dev);
diff --git a/queue-4.14/hv_netvsc-change-gpad-teardown-order-on-older-versions.patch b/queue-4.14/hv_netvsc-change-gpad-teardown-order-on-older-versions.patch
new file mode 100644 (file)
index 0000000..85950a8
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:17 -0700
+Subject: hv_netvsc: change GPAD teardown order on older versions
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit 0ef58b0a05c127762f975c3dfe8b922e4aa87a29 upstream. ]
+
+On older versions of Windows, the host ignores messages after
+vmbus channel is closed.
+
+Workaround this by doing what Windows does and send the teardown
+before close on older versions of NVSP protocol.
+
+Reported-by: Mohammed Gamal <mgamal@redhat.com>
+Fixes: 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split")
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -570,10 +570,15 @@ void netvsc_device_remove(struct hv_devi
+        */
+       netdev_dbg(ndev, "net device safe to remove\n");
++      /* older versions require that buffer be revoked before close */
++      if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
++              netvsc_teardown_gpadl(device, net_device);
++
+       /* Now, we can close the channel safely */
+       vmbus_close(device->channel);
+-      netvsc_teardown_gpadl(device, net_device);
++      if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
++              netvsc_teardown_gpadl(device, net_device);
+       /* Release all resources */
+       free_netvsc_device_rcu(net_device);
diff --git a/queue-4.14/hv_netvsc-common-detach-logic.patch b/queue-4.14/hv_netvsc-common-detach-logic.patch
new file mode 100644 (file)
index 0000000..7fb7208
--- /dev/null
@@ -0,0 +1,559 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:18 -0700
+Subject: hv_netvsc: common detach logic
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit 7b2ee50c0cd513a176a26a71f2989facdd75bfea upstream. ]
+
+Make common function for detaching internals of device
+during changes to MTU and RSS. Make sure no more packets
+are transmitted and all packets have been received before
+doing device teardown.
+
+Change the wait logic to be common and use usleep_range().
+
+Changes transmit enabling logic so that transmit queues are disabled
+during the period when lower device is being changed. And enabled
+only after sub channels are setup. This avoids issue where it could
+be that a packet was being sent while subchannel was not initialized.
+
+Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug")
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/hyperv_net.h   |    1 
+ drivers/net/hyperv/netvsc.c       |   21 +-
+ drivers/net/hyperv/netvsc_drv.c   |  280 +++++++++++++++++++++-----------------
+ drivers/net/hyperv/rndis_filter.c |   15 --
+ 4 files changed, 175 insertions(+), 142 deletions(-)
+
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -208,7 +208,6 @@ void netvsc_channel_cb(void *context);
+ int netvsc_poll(struct napi_struct *napi, int budget);
+ void rndis_set_subchannel(struct work_struct *w);
+-bool rndis_filter_opened(const struct netvsc_device *nvdev);
+ int rndis_filter_open(struct netvsc_device *nvdev);
+ int rndis_filter_close(struct netvsc_device *nvdev);
+ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -554,8 +554,6 @@ void netvsc_device_remove(struct hv_devi
+               = rtnl_dereference(net_device_ctx->nvdev);
+       int i;
+-      cancel_work_sync(&net_device->subchan_work);
+-
+       netvsc_revoke_buf(device, net_device);
+       RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+@@ -644,13 +642,18 @@ static void netvsc_send_tx_complete(stru
+       queue_sends =
+               atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
+-      if (net_device->destroy && queue_sends == 0)
+-              wake_up(&net_device->wait_drain);
+-
+-      if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
+-          (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
+-           queue_sends < 1))
+-              netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
++      if (unlikely(net_device->destroy)) {
++              if (queue_sends == 0)
++                      wake_up(&net_device->wait_drain);
++      } else {
++              struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
++
++              if (netif_tx_queue_stopped(txq) &&
++                  (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
++                   queue_sends < 1)) {
++                      netif_tx_wake_queue(txq);
++              }
++      }
+ }
+ static void netvsc_send_completion(struct netvsc_device *net_device,
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -45,7 +45,10 @@
+ #include "hyperv_net.h"
+-#define RING_SIZE_MIN         64
++#define RING_SIZE_MIN 64
++#define RETRY_US_LO   5000
++#define RETRY_US_HI   10000
++#define RETRY_MAX     2000    /* >10 sec */
+ #define LINKCHANGE_INT (2 * HZ)
+ #define VF_TAKEOVER_INT (HZ / 10)
+@@ -89,10 +92,8 @@ static int netvsc_open(struct net_device
+       }
+       rdev = nvdev->extension;
+-      if (!rdev->link_state) {
++      if (!rdev->link_state)
+               netif_carrier_on(net);
+-              netif_tx_wake_all_queues(net);
+-      }
+       if (vf_netdev) {
+               /* Setting synthetic device up transparently sets
+@@ -108,36 +109,25 @@ static int netvsc_open(struct net_device
+       return 0;
+ }
+-static int netvsc_close(struct net_device *net)
++static int netvsc_wait_until_empty(struct netvsc_device *nvdev)
+ {
+-      struct net_device_context *net_device_ctx = netdev_priv(net);
+-      struct net_device *vf_netdev
+-              = rtnl_dereference(net_device_ctx->vf_netdev);
+-      struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
+-      int ret = 0;
+-      u32 aread, i, msec = 10, retry = 0, retry_max = 20;
+-      struct vmbus_channel *chn;
+-
+-      netif_tx_disable(net);
+-
+-      /* No need to close rndis filter if it is removed already */
+-      if (!nvdev)
+-              goto out;
+-
+-      ret = rndis_filter_close(nvdev);
+-      if (ret != 0) {
+-              netdev_err(net, "unable to close device (ret %d).\n", ret);
+-              return ret;
+-      }
++      unsigned int retry = 0;
++      int i;
+       /* Ensure pending bytes in ring are read */
+-      while (true) {
+-              aread = 0;
++      for (;;) {
++              u32 aread = 0;
++
+               for (i = 0; i < nvdev->num_chn; i++) {
+-                      chn = nvdev->chan_table[i].channel;
++                      struct vmbus_channel *chn
++                              = nvdev->chan_table[i].channel;
++
+                       if (!chn)
+                               continue;
++                      /* make sure receive not running now */
++                      napi_synchronize(&nvdev->chan_table[i].napi);
++
+                       aread = hv_get_bytes_to_read(&chn->inbound);
+                       if (aread)
+                               break;
+@@ -147,22 +137,40 @@ static int netvsc_close(struct net_devic
+                               break;
+               }
+-              retry++;
+-              if (retry > retry_max || aread == 0)
+-                      break;
++              if (aread == 0)
++                      return 0;
+-              msleep(msec);
++              if (++retry > RETRY_MAX)
++                      return -ETIMEDOUT;
+-              if (msec < 1000)
+-                      msec *= 2;
++              usleep_range(RETRY_US_LO, RETRY_US_HI);
+       }
++}
+-      if (aread) {
+-              netdev_err(net, "Ring buffer not empty after closing rndis\n");
+-              ret = -ETIMEDOUT;
++static int netvsc_close(struct net_device *net)
++{
++      struct net_device_context *net_device_ctx = netdev_priv(net);
++      struct net_device *vf_netdev
++              = rtnl_dereference(net_device_ctx->vf_netdev);
++      struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
++      int ret;
++
++      netif_tx_disable(net);
++
++      /* No need to close rndis filter if it is removed already */
++      if (!nvdev)
++              return 0;
++
++      ret = rndis_filter_close(nvdev);
++      if (ret != 0) {
++              netdev_err(net, "unable to close device (ret %d).\n", ret);
++              return ret;
+       }
+-out:
++      ret = netvsc_wait_until_empty(nvdev);
++      if (ret)
++              netdev_err(net, "Ring buffer not empty after closing rndis\n");
++
+       if (vf_netdev)
+               dev_close(vf_netdev);
+@@ -820,16 +828,81 @@ static void netvsc_get_channels(struct n
+       }
+ }
++static int netvsc_detach(struct net_device *ndev,
++                       struct netvsc_device *nvdev)
++{
++      struct net_device_context *ndev_ctx = netdev_priv(ndev);
++      struct hv_device *hdev = ndev_ctx->device_ctx;
++      int ret;
++
++      /* Don't try continuing to try and setup sub channels */
++      if (cancel_work_sync(&nvdev->subchan_work))
++              nvdev->num_chn = 1;
++
++      /* If device was up (receiving) then shutdown */
++      if (netif_running(ndev)) {
++              netif_tx_disable(ndev);
++
++              ret = rndis_filter_close(nvdev);
++              if (ret) {
++                      netdev_err(ndev,
++                                 "unable to close device (ret %d).\n", ret);
++                      return ret;
++              }
++
++              ret = netvsc_wait_until_empty(nvdev);
++              if (ret) {
++                      netdev_err(ndev,
++                                 "Ring buffer not empty after closing rndis\n");
++                      return ret;
++              }
++      }
++
++      netif_device_detach(ndev);
++
++      rndis_filter_device_remove(hdev, nvdev);
++
++      return 0;
++}
++
++static int netvsc_attach(struct net_device *ndev,
++                       struct netvsc_device_info *dev_info)
++{
++      struct net_device_context *ndev_ctx = netdev_priv(ndev);
++      struct hv_device *hdev = ndev_ctx->device_ctx;
++      struct netvsc_device *nvdev;
++      struct rndis_device *rdev;
++      int ret;
++
++      nvdev = rndis_filter_device_add(hdev, dev_info);
++      if (IS_ERR(nvdev))
++              return PTR_ERR(nvdev);
++
++      /* Note: enable and attach happen when sub-channels setup */
++
++      netif_carrier_off(ndev);
++
++      if (netif_running(ndev)) {
++              ret = rndis_filter_open(nvdev);
++              if (ret)
++                      return ret;
++
++              rdev = nvdev->extension;
++              if (!rdev->link_state)
++                      netif_carrier_on(ndev);
++      }
++
++      return 0;
++}
++
+ static int netvsc_set_channels(struct net_device *net,
+                              struct ethtool_channels *channels)
+ {
+       struct net_device_context *net_device_ctx = netdev_priv(net);
+-      struct hv_device *dev = net_device_ctx->device_ctx;
+       struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
+       unsigned int orig, count = channels->combined_count;
+       struct netvsc_device_info device_info;
+-      bool was_opened;
+-      int ret = 0;
++      int ret;
+       /* We do not support separate count for rx, tx, or other */
+       if (count == 0 ||
+@@ -846,9 +919,6 @@ static int netvsc_set_channels(struct ne
+               return -EINVAL;
+       orig = nvdev->num_chn;
+-      was_opened = rndis_filter_opened(nvdev);
+-      if (was_opened)
+-              rndis_filter_close(nvdev);
+       memset(&device_info, 0, sizeof(device_info));
+       device_info.num_chn = count;
+@@ -858,28 +928,17 @@ static int netvsc_set_channels(struct ne
+       device_info.recv_sections = nvdev->recv_section_cnt;
+       device_info.recv_section_size = nvdev->recv_section_size;
+-      rndis_filter_device_remove(dev, nvdev);
++      ret = netvsc_detach(net, nvdev);
++      if (ret)
++              return ret;
+-      nvdev = rndis_filter_device_add(dev, &device_info);
+-      if (IS_ERR(nvdev)) {
+-              ret = PTR_ERR(nvdev);
++      ret = netvsc_attach(net, &device_info);
++      if (ret) {
+               device_info.num_chn = orig;
+-              nvdev = rndis_filter_device_add(dev, &device_info);
+-
+-              if (IS_ERR(nvdev)) {
+-                      netdev_err(net, "restoring channel setting failed: %ld\n",
+-                                 PTR_ERR(nvdev));
+-                      return ret;
+-              }
++              if (netvsc_attach(net, &device_info))
++                      netdev_err(net, "restoring channel setting failed\n");
+       }
+-      if (was_opened)
+-              rndis_filter_open(nvdev);
+-
+-      /* We may have missed link change notifications */
+-      net_device_ctx->last_reconfig = 0;
+-      schedule_delayed_work(&net_device_ctx->dwork, 0);
+-
+       return ret;
+ }
+@@ -946,10 +1005,8 @@ static int netvsc_change_mtu(struct net_
+       struct net_device_context *ndevctx = netdev_priv(ndev);
+       struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
+       struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+-      struct hv_device *hdev = ndevctx->device_ctx;
+       int orig_mtu = ndev->mtu;
+       struct netvsc_device_info device_info;
+-      bool was_opened;
+       int ret = 0;
+       if (!nvdev || nvdev->destroy)
+@@ -962,11 +1019,6 @@ static int netvsc_change_mtu(struct net_
+                       return ret;
+       }
+-      netif_device_detach(ndev);
+-      was_opened = rndis_filter_opened(nvdev);
+-      if (was_opened)
+-              rndis_filter_close(nvdev);
+-
+       memset(&device_info, 0, sizeof(device_info));
+       device_info.ring_size = ring_size;
+       device_info.num_chn = nvdev->num_chn;
+@@ -975,35 +1027,27 @@ static int netvsc_change_mtu(struct net_
+       device_info.recv_sections = nvdev->recv_section_cnt;
+       device_info.recv_section_size = nvdev->recv_section_size;
+-      rndis_filter_device_remove(hdev, nvdev);
++      ret = netvsc_detach(ndev, nvdev);
++      if (ret)
++              goto rollback_vf;
+       ndev->mtu = mtu;
+-      nvdev = rndis_filter_device_add(hdev, &device_info);
+-      if (IS_ERR(nvdev)) {
+-              ret = PTR_ERR(nvdev);
+-
+-              /* Attempt rollback to original MTU */
+-              ndev->mtu = orig_mtu;
+-              nvdev = rndis_filter_device_add(hdev, &device_info);
+-
+-              if (vf_netdev)
+-                      dev_set_mtu(vf_netdev, orig_mtu);
+-
+-              if (IS_ERR(nvdev)) {
+-                      netdev_err(ndev, "restoring mtu failed: %ld\n",
+-                                 PTR_ERR(nvdev));
+-                      return ret;
+-              }
+-      }
+-
+-      if (was_opened)
+-              rndis_filter_open(nvdev);
++      ret = netvsc_attach(ndev, &device_info);
++      if (ret)
++              goto rollback;
+-      netif_device_attach(ndev);
++      return 0;
+-      /* We may have missed link change notifications */
+-      schedule_delayed_work(&ndevctx->dwork, 0);
++rollback:
++      /* Attempt rollback to original MTU */
++      ndev->mtu = orig_mtu;
++
++      if (netvsc_attach(ndev, &device_info))
++              netdev_err(ndev, "restoring mtu failed\n");
++rollback_vf:
++      if (vf_netdev)
++              dev_set_mtu(vf_netdev, orig_mtu);
+       return ret;
+ }
+@@ -1469,11 +1513,9 @@ static int netvsc_set_ringparam(struct n
+ {
+       struct net_device_context *ndevctx = netdev_priv(ndev);
+       struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+-      struct hv_device *hdev = ndevctx->device_ctx;
+       struct netvsc_device_info device_info;
+       struct ethtool_ringparam orig;
+       u32 new_tx, new_rx;
+-      bool was_opened;
+       int ret = 0;
+       if (!nvdev || nvdev->destroy)
+@@ -1499,34 +1541,18 @@ static int netvsc_set_ringparam(struct n
+       device_info.recv_sections = new_rx;
+       device_info.recv_section_size = nvdev->recv_section_size;
+-      netif_device_detach(ndev);
+-      was_opened = rndis_filter_opened(nvdev);
+-      if (was_opened)
+-              rndis_filter_close(nvdev);
+-
+-      rndis_filter_device_remove(hdev, nvdev);
+-
+-      nvdev = rndis_filter_device_add(hdev, &device_info);
+-      if (IS_ERR(nvdev)) {
+-              ret = PTR_ERR(nvdev);
++      ret = netvsc_detach(ndev, nvdev);
++      if (ret)
++              return ret;
++      ret = netvsc_attach(ndev, &device_info);
++      if (ret) {
+               device_info.send_sections = orig.tx_pending;
+               device_info.recv_sections = orig.rx_pending;
+-              nvdev = rndis_filter_device_add(hdev, &device_info);
+-              if (IS_ERR(nvdev)) {
+-                      netdev_err(ndev, "restoring ringparam failed: %ld\n",
+-                                 PTR_ERR(nvdev));
+-                      return ret;
+-              }
+-      }
+-      if (was_opened)
+-              rndis_filter_open(nvdev);
+-      netif_device_attach(ndev);
+-
+-      /* We may have missed link change notifications */
+-      ndevctx->last_reconfig = 0;
+-      schedule_delayed_work(&ndevctx->dwork, 0);
++              if (netvsc_attach(ndev, &device_info))
++                      netdev_err(ndev, "restoring ringparam failed");
++      }
+       return ret;
+ }
+@@ -2003,8 +2029,8 @@ no_net:
+ static int netvsc_remove(struct hv_device *dev)
+ {
+       struct net_device_context *ndev_ctx;
+-      struct net_device *vf_netdev;
+-      struct net_device *net;
++      struct net_device *vf_netdev, *net;
++      struct netvsc_device *nvdev;
+       net = hv_get_drvdata(dev);
+       if (net == NULL) {
+@@ -2014,10 +2040,14 @@ static int netvsc_remove(struct hv_devic
+       ndev_ctx = netdev_priv(net);
+-      netif_device_detach(net);
+-
+       cancel_delayed_work_sync(&ndev_ctx->dwork);
++      rcu_read_lock();
++      nvdev = rcu_dereference(ndev_ctx->nvdev);
++
++      if  (nvdev)
++              cancel_work_sync(&nvdev->subchan_work);
++
+       /*
+        * Call to the vsc driver to let it know that the device is being
+        * removed. Also blocks mtu and channel changes.
+@@ -2027,11 +2057,13 @@ static int netvsc_remove(struct hv_devic
+       if (vf_netdev)
+               netvsc_unregister_vf(vf_netdev);
++      if (nvdev)
++              rndis_filter_device_remove(dev, nvdev);
++
+       unregister_netdevice(net);
+-      rndis_filter_device_remove(dev,
+-                                 rtnl_dereference(ndev_ctx->nvdev));
+       rtnl_unlock();
++      rcu_read_unlock();
+       hv_set_drvdata(dev, NULL);
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1112,6 +1112,7 @@ void rndis_set_subchannel(struct work_st
+       for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
+               ndev_ctx->tx_table[i] = i % nvdev->num_chn;
++      netif_device_attach(ndev);
+       rtnl_unlock();
+       return;
+@@ -1122,6 +1123,8 @@ failed:
+       nvdev->max_chn = 1;
+       nvdev->num_chn = 1;
++
++      netif_device_attach(ndev);
+ unlock:
+       rtnl_unlock();
+ }
+@@ -1324,6 +1327,10 @@ out:
+               net_device->num_chn = 1;
+       }
++      /* No sub channels, device is ready */
++      if (net_device->num_chn == 1)
++              netif_device_attach(net);
++
+       return net_device;
+ err_dev_remv:
+@@ -1336,9 +1343,6 @@ void rndis_filter_device_remove(struct h
+ {
+       struct rndis_device *rndis_dev = net_dev->extension;
+-      /* Don't try and setup sub channels if about to halt */
+-      cancel_work_sync(&net_dev->subchan_work);
+-
+       /* Halt and release the rndis device */
+       rndis_filter_halt_device(rndis_dev);
+@@ -1368,8 +1372,3 @@ int rndis_filter_close(struct netvsc_dev
+       return rndis_filter_close_device(nvdev->extension);
+ }
+-
+-bool rndis_filter_opened(const struct netvsc_device *nvdev)
+-{
+-      return atomic_read(&nvdev->open_cnt) > 0;
+-}
diff --git a/queue-4.14/hv_netvsc-defer-queue-selection-to-vf.patch b/queue-4.14/hv_netvsc-defer-queue-selection-to-vf.patch
new file mode 100644 (file)
index 0000000..f46695e
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:14 -0700
+Subject: hv_netvsc: defer queue selection to VF
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit b3bf5666a51068ad5ddd89a76ed877101ef3bc16 upstream. ]
+
+When VF is used for accelerated networking it will likely have
+more queues (and different policy) than the synthetic NIC.
+This patch defers the queue policy to the VF so that all the
+queues can be used. This impacts workloads like local generate UDP.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c |   15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -283,8 +283,19 @@ static u16 netvsc_select_queue(struct ne
+       rcu_read_lock();
+       vf_netdev = rcu_dereference(ndc->vf_netdev);
+       if (vf_netdev) {
+-              txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+-              qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
++              const struct net_device_ops *vf_ops = vf_netdev->netdev_ops;
++
++              if (vf_ops->ndo_select_queue)
++                      txq = vf_ops->ndo_select_queue(vf_netdev, skb,
++                                                     accel_priv, fallback);
++              else
++                      txq = fallback(vf_netdev, skb);
++
++              /* Record the queue selected by VF so that it can be
++               * used for common case where VF has more queues than
++               * the synthetic device.
++               */
++              qdisc_skb_cb(skb)->slave_dev_queue_mapping = txq;
+       } else {
+               txq = netvsc_pick_tx(ndev, skb);
+       }
diff --git a/queue-4.14/hv_netvsc-disable-napi-before-channel-close.patch b/queue-4.14/hv_netvsc-disable-napi-before-channel-close.patch
new file mode 100644 (file)
index 0000000..621a044
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:15 -0700
+Subject: hv_netvsc: disable NAPI before channel close
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit 8348e0460ab1473f06c8b824699dd2eed3c1979d upstream. ]
+
+This makes sure that no CPU is still process packets when
+the channel is closed.
+
+Fixes: 76bb5db5c749 ("netvsc: fix use after free on module removal")
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -567,6 +567,10 @@ void netvsc_device_remove(struct hv_devi
+       RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
++      /* And disassociate NAPI context from device */
++      for (i = 0; i < net_device->num_chn; i++)
++              netif_napi_del(&net_device->chan_table[i].napi);
++
+       /*
+        * At this point, no one should be accessing net_device
+        * except in here
+@@ -578,10 +582,6 @@ void netvsc_device_remove(struct hv_devi
+       netvsc_teardown_gpadl(device, net_device);
+-      /* And dissassociate NAPI context from device */
+-      for (i = 0; i < net_device->num_chn; i++)
+-              netif_napi_del(&net_device->chan_table[i].napi);
+-
+       /* Release all resources */
+       free_netvsc_device_rcu(net_device);
+ }
diff --git a/queue-4.14/hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch b/queue-4.14/hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch
new file mode 100644 (file)
index 0000000..6357ed1
--- /dev/null
@@ -0,0 +1,160 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:07 -0700
+Subject: hv_netvsc: empty current transmit aggregation if flow blocked
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit cfd8afd986cdb59ea9adac873c5082498a1eb7c0 upstream. ]
+
+If the transmit queue is known full, then don't keep aggregating
+data. And the cp_partial flag which indicates that the current
+aggregation buffer is full can be folded in to avoid more
+conditionals.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/hyperv_net.h   |    2 +-
+ drivers/net/hyperv/netvsc.c       |   36 +++++++++++++++++++++---------------
+ drivers/net/hyperv/netvsc_drv.c   |    2 +-
+ drivers/net/hyperv/rndis_filter.c |    3 +--
+ 4 files changed, 24 insertions(+), 19 deletions(-)
+
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -192,7 +192,7 @@ struct netvsc_device *netvsc_device_add(
+                                       const struct netvsc_device_info *info);
+ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx);
+ void netvsc_device_remove(struct hv_device *device);
+-int netvsc_send(struct net_device_context *ndc,
++int netvsc_send(struct net_device *net,
+               struct hv_netvsc_packet *packet,
+               struct rndis_message *rndis_msg,
+               struct hv_page_buffer *page_buffer,
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -700,13 +700,13 @@ static u32 netvsc_get_next_send_section(
+       return NETVSC_INVALID_INDEX;
+ }
+-static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+-                                 unsigned int section_index,
+-                                 u32 pend_size,
+-                                 struct hv_netvsc_packet *packet,
+-                                 struct rndis_message *rndis_msg,
+-                                 struct hv_page_buffer *pb,
+-                                 struct sk_buff *skb)
++static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
++                                  unsigned int section_index,
++                                  u32 pend_size,
++                                  struct hv_netvsc_packet *packet,
++                                  struct rndis_message *rndis_msg,
++                                  struct hv_page_buffer *pb,
++                                  bool xmit_more)
+ {
+       char *start = net_device->send_buf;
+       char *dest = start + (section_index * net_device->send_section_size)
+@@ -719,7 +719,8 @@ static u32 netvsc_copy_to_send_buf(struc
+               packet->page_buf_cnt;
+       /* Add padding */
+-      if (skb->xmit_more && remain && !packet->cp_partial) {
++      remain = packet->total_data_buflen & (net_device->pkt_align - 1);
++      if (xmit_more && remain) {
+               padding = net_device->pkt_align - remain;
+               rndis_msg->msg_len += padding;
+               packet->total_data_buflen += padding;
+@@ -739,8 +740,6 @@ static u32 netvsc_copy_to_send_buf(struc
+               memset(dest, 0, padding);
+               msg_size += padding;
+       }
+-
+-      return msg_size;
+ }
+ static inline int netvsc_send_pkt(
+@@ -828,12 +827,13 @@ static inline void move_pkt_msd(struct h
+ }
+ /* RCU already held by caller */
+-int netvsc_send(struct net_device_context *ndev_ctx,
++int netvsc_send(struct net_device *ndev,
+               struct hv_netvsc_packet *packet,
+               struct rndis_message *rndis_msg,
+               struct hv_page_buffer *pb,
+               struct sk_buff *skb)
+ {
++      struct net_device_context *ndev_ctx = netdev_priv(ndev);
+       struct netvsc_device *net_device
+               = rcu_dereference_bh(ndev_ctx->nvdev);
+       struct hv_device *device = ndev_ctx->device_ctx;
+@@ -844,8 +844,7 @@ int netvsc_send(struct net_device_contex
+       struct multi_send_data *msdp;
+       struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
+       struct sk_buff *msd_skb = NULL;
+-      bool try_batch;
+-      bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
++      bool try_batch, xmit_more;
+       /* If device is rescinded, return error and packet will get dropped. */
+       if (unlikely(!net_device || net_device->destroy))
+@@ -896,10 +895,17 @@ int netvsc_send(struct net_device_contex
+               }
+       }
++      /* Keep aggregating only if stack says more data is coming
++       * and not doing mixed modes send and not flow blocked
++       */
++      xmit_more = skb->xmit_more &&
++              !packet->cp_partial &&
++              !netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
++
+       if (section_index != NETVSC_INVALID_INDEX) {
+               netvsc_copy_to_send_buf(net_device,
+                                       section_index, msd_len,
+-                                      packet, rndis_msg, pb, skb);
++                                      packet, rndis_msg, pb, xmit_more);
+               packet->send_buf_index = section_index;
+@@ -919,7 +925,7 @@ int netvsc_send(struct net_device_contex
+               if (msdp->skb)
+                       dev_consume_skb_any(msdp->skb);
+-              if (xmit_more && !packet->cp_partial) {
++              if (xmit_more) {
+                       msdp->skb = skb;
+                       msdp->pkt = packet;
+                       msdp->count++;
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -614,7 +614,7 @@ static int netvsc_start_xmit(struct sk_b
+       /* timestamp packet in software */
+       skb_tx_timestamp(skb);
+-      ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb);
++      ret = netvsc_send(net, packet, rndis_msg, pb, skb);
+       if (likely(ret == 0))
+               return NETDEV_TX_OK;
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -217,7 +217,6 @@ static int rndis_filter_send_request(str
+       struct hv_netvsc_packet *packet;
+       struct hv_page_buffer page_buf[2];
+       struct hv_page_buffer *pb = page_buf;
+-      struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
+       int ret;
+       /* Setup the packet to send it */
+@@ -245,7 +244,7 @@ static int rndis_filter_send_request(str
+       }
+       rcu_read_lock_bh();
+-      ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL);
++      ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL);
+       rcu_read_unlock_bh();
+       return ret;
diff --git a/queue-4.14/hv_netvsc-ensure-correct-teardown-message-sequence-order.patch b/queue-4.14/hv_netvsc-ensure-correct-teardown-message-sequence-order.patch
new file mode 100644 (file)
index 0000000..180d7e8
--- /dev/null
@@ -0,0 +1,122 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Mohammed Gamal <mgamal@redhat.com>
+Date: Mon, 14 May 2018 15:32:21 -0700
+Subject: hv_netvsc: Ensure correct teardown message sequence order
+
+From: Mohammed Gamal <mgamal@redhat.com>
+
+[ Commit a56d99d714665591fed8527b90eef21530ea61e0 upstream. ]
+
+Prior to commit 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split")
+the call sequence in netvsc_device_remove() was as follows (as
+implemented in netvsc_destroy_buf()):
+1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message
+2- Teardown receive buffer GPADL
+3- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message
+4- Teardown send buffer GPADL
+5- Close vmbus
+
+This didn't work for WS2016 hosts. Commit 0cf737808ae7
+("hv_netvsc: netvsc_teardown_gpadl() split") rearranged the
+teardown sequence as follows:
+1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message
+2- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message
+3- Close vmbus
+4- Teardown receive buffer GPADL
+5- Teardown send buffer GPADL
+
+That worked well for WS2016 hosts, but it prevented guests on older hosts from
+shutting down after changing network settings. Commit 0ef58b0a05c1
+("hv_netvsc: change GPAD teardown order on older versions") ensured the
+following message sequence for older hosts
+1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message
+2- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message
+3- Teardown receive buffer GPADL
+4- Teardown send buffer GPADL
+5- Close vmbus
+
+However, with this sequence calling `ip link set eth0 mtu 1000` hangs and the
+process becomes uninterruptible. On futher analysis it turns out that on tearing
+down the receive buffer GPADL the kernel is waiting indefinitely
+in vmbus_teardown_gpadl() for a completion to be signaled.
+
+Here is a snippet of where this occurs:
+int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
+{
+        struct vmbus_channel_gpadl_teardown *msg;
+        struct vmbus_channel_msginfo *info;
+        unsigned long flags;
+        int ret;
+
+        info = kmalloc(sizeof(*info) +
+                       sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL);
+        if (!info)
+                return -ENOMEM;
+
+        init_completion(&info->waitevent);
+        info->waiting_channel = channel;
+[....]
+        ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown),
+                             true);
+
+        if (ret)
+                goto post_msg_err;
+
+        wait_for_completion(&info->waitevent);
+[....]
+}
+
+The completion is signaled from vmbus_ongpadl_torndown(), which gets called when
+the corresponding message is received from the host, which apparently never happens
+in that case.
+This patch works around the issue by restoring the first mentioned message sequence
+for older hosts
+
+Fixes: 0ef58b0a05c1 ("hv_netvsc: change GPAD teardown order on older versions")
+Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c |   19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -571,8 +571,17 @@ void netvsc_device_remove(struct hv_devi
+               = rtnl_dereference(net_device_ctx->nvdev);
+       int i;
++      /*
++       * Revoke receive buffer. If host is pre-Win2016 then tear down
++       * receive buffer GPADL. Do the same for send buffer.
++       */
+       netvsc_revoke_recv_buf(device, net_device);
++      if (vmbus_proto_version < VERSION_WIN10)
++              netvsc_teardown_recv_gpadl(device, net_device);
++
+       netvsc_revoke_send_buf(device, net_device);
++      if (vmbus_proto_version < VERSION_WIN10)
++              netvsc_teardown_send_gpadl(device, net_device);
+       RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+@@ -586,15 +595,13 @@ void netvsc_device_remove(struct hv_devi
+        */
+       netdev_dbg(ndev, "net device safe to remove\n");
+-      /* older versions require that buffer be revoked before close */
+-      if (vmbus_proto_version < VERSION_WIN10) {
+-              netvsc_teardown_recv_gpadl(device, net_device);
+-              netvsc_teardown_send_gpadl(device, net_device);
+-      }
+-
+       /* Now, we can close the channel safely */
+       vmbus_close(device->channel);
++      /*
++       * If host is Win2016 or higher then we do the GPADL tear down
++       * here after VMBus is closed.
++      */
+       if (vmbus_proto_version >= VERSION_WIN10) {
+               netvsc_teardown_recv_gpadl(device, net_device);
+               netvsc_teardown_send_gpadl(device, net_device);
diff --git a/queue-4.14/hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch b/queue-4.14/hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch
new file mode 100644 (file)
index 0000000..a06b54c
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:11 -0700
+Subject: hv_netvsc: fix error unwind handling if vmbus_open fails
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit fcfb4a00d1e514e8313277a01ef919de1113025b upstream. ]
+
+Need to delete NAPI association if vmbus_open fails.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -1288,7 +1288,6 @@ struct netvsc_device *netvsc_device_add(
+                        net_device->chan_table);
+       if (ret != 0) {
+-              netif_napi_del(&net_device->chan_table[0].napi);
+               netdev_err(ndev, "unable to open channel: %d\n", ret);
+               goto cleanup;
+       }
+@@ -1321,6 +1320,7 @@ close:
+       vmbus_close(device->channel);
+ cleanup:
++      netif_napi_del(&net_device->chan_table[0].napi);
+       free_netvsc_device(&net_device->rcu);
+       return ERR_PTR(ret);
diff --git a/queue-4.14/hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch b/queue-4.14/hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch
new file mode 100644 (file)
index 0000000..6082ffb
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Mohammed Gamal <mgamal@redhat.com>
+Date: Mon, 14 May 2018 15:32:22 -0700
+Subject: hv_netvsc: Fix net device attach on older Windows hosts
+
+From: Mohammed Gamal <mgamal@redhat.com>
+
+[ Commit 55be9f25be1ca5bda75c39808fc77e42691bc07f upstream. ]
+
+On older windows hosts the net_device instance is returned to
+the caller of rndis_filter_device_add() without having the presence
+bit set first. This would cause any subsequent calls to network device
+operations (e.g. MTU change, channel change) to fail after the device
+is detached once, returning -ENODEV.
+
+Instead of returning the device instabce, we take the exit path where
+we call netif_device_attach()
+
+Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic")
+Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
+Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/rndis_filter.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1276,7 +1276,7 @@ struct netvsc_device *rndis_filter_devic
+                  rndis_device->link_state ? "down" : "up");
+       if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
+-              return net_device;
++              goto out;
+       rndis_filter_query_link_speed(rndis_device, net_device);
diff --git a/queue-4.14/hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch b/queue-4.14/hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch
new file mode 100644 (file)
index 0000000..2874743
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:13 -0700
+Subject: hv_netvsc: fix race in napi poll when rescheduling
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit d64e38ae690e3337db0d38d9b149a193a1646c4b upstream. ]
+
+There is a race between napi_reschedule and re-enabling interrupts
+which could lead to missed host interrrupts.  This occurs when
+interrupts are re-enabled (hv_end_read) and vmbus irq callback
+(netvsc_channel_cb) has already scheduled NAPI.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -1205,9 +1205,10 @@ int netvsc_poll(struct napi_struct *napi
+       if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
+           work_done < budget &&
+           napi_complete_done(napi, work_done) &&
+-          hv_end_read(&channel->inbound)) {
++          hv_end_read(&channel->inbound) &&
++          napi_schedule_prep(napi)) {
+               hv_begin_read(&channel->inbound);
+-              napi_reschedule(napi);
++              __napi_schedule(napi);
+       }
+       /* Driver may overshoot since multiple packets per descriptor */
diff --git a/queue-4.14/hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch b/queue-4.14/hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch
new file mode 100644 (file)
index 0000000..2fefa93
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:00 -0700
+Subject: hv_netvsc: Fix the real number of queues of non-vRSS cases
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit 6450f8f269a9271985e4a8c13920b7e4cf21c0f3 upstream. ]
+
+For older hosts without multi-channel (vRSS) support, and some error
+cases, we still need to set the real number of queues to one.
+This patch adds this missing setting.
+
+Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug")
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -1932,6 +1932,12 @@ static int netvsc_probe(struct hv_device
+       /* We always need headroom for rndis header */
+       net->needed_headroom = RNDIS_AND_PPI_SIZE;
++      /* Initialize the number of queues to be 1, we may change it if more
++       * channels are offered later.
++       */
++      netif_set_real_num_tx_queues(net, 1);
++      netif_set_real_num_rx_queues(net, 1);
++
+       /* Notify the netvsc driver of the new device */
+       memset(&device_info, 0, sizeof(device_info));
+       device_info.ring_size = ring_size;
diff --git a/queue-4.14/hv_netvsc-netvsc_teardown_gpadl-split.patch b/queue-4.14/hv_netvsc-netvsc_teardown_gpadl-split.patch
new file mode 100644 (file)
index 0000000..1c90a78
--- /dev/null
@@ -0,0 +1,147 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Mon, 14 May 2018 15:32:05 -0700
+Subject: hv_netvsc: netvsc_teardown_gpadl() split
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+[ Commit 0cf737808ae7cb25e952be619db46b9147a92f46 upstream. ]
+
+It was found that in some cases host refuses to teardown GPADL for send/
+receive buffers (probably when some work with these buffere is scheduled or
+ongoing). Change the teardown logic to be:
+1) Send NVSP_MSG1_TYPE_REVOKE_* messages
+2) Close the channel
+3) Teardown GPADLs.
+This seems to work reliably.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c |   69 ++++++++++++++++++++++----------------------
+ 1 file changed, 36 insertions(+), 33 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -100,12 +100,11 @@ static void free_netvsc_device_rcu(struc
+       call_rcu(&nvdev->rcu, free_netvsc_device);
+ }
+-static void netvsc_destroy_buf(struct hv_device *device)
++static void netvsc_revoke_buf(struct hv_device *device,
++                            struct netvsc_device *net_device)
+ {
+       struct nvsp_message *revoke_packet;
+       struct net_device *ndev = hv_get_drvdata(device);
+-      struct net_device_context *ndc = netdev_priv(ndev);
+-      struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
+       int ret;
+       /*
+@@ -148,28 +147,6 @@ static void netvsc_destroy_buf(struct hv
+               net_device->recv_section_cnt = 0;
+       }
+-      /* Teardown the gpadl on the vsp end */
+-      if (net_device->recv_buf_gpadl_handle) {
+-              ret = vmbus_teardown_gpadl(device->channel,
+-                                         net_device->recv_buf_gpadl_handle);
+-
+-              /* If we failed here, we might as well return and have a leak
+-               * rather than continue and a bugchk
+-               */
+-              if (ret != 0) {
+-                      netdev_err(ndev,
+-                                 "unable to teardown receive buffer's gpadl\n");
+-                      return;
+-              }
+-              net_device->recv_buf_gpadl_handle = 0;
+-      }
+-
+-      if (net_device->recv_buf) {
+-              /* Free up the receive buffer */
+-              vfree(net_device->recv_buf);
+-              net_device->recv_buf = NULL;
+-      }
+-
+       /* Deal with the send buffer we may have setup.
+        * If we got a  send section size, it means we received a
+        * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
+@@ -210,7 +187,35 @@ static void netvsc_destroy_buf(struct hv
+               }
+               net_device->send_section_cnt = 0;
+       }
+-      /* Teardown the gpadl on the vsp end */
++}
++
++static void netvsc_teardown_gpadl(struct hv_device *device,
++                                struct netvsc_device *net_device)
++{
++      struct net_device *ndev = hv_get_drvdata(device);
++      int ret;
++
++      if (net_device->recv_buf_gpadl_handle) {
++              ret = vmbus_teardown_gpadl(device->channel,
++                                         net_device->recv_buf_gpadl_handle);
++
++              /* If we failed here, we might as well return and have a leak
++               * rather than continue and a bugchk
++               */
++              if (ret != 0) {
++                      netdev_err(ndev,
++                                 "unable to teardown receive buffer's gpadl\n");
++                      return;
++              }
++              net_device->recv_buf_gpadl_handle = 0;
++      }
++
++      if (net_device->recv_buf) {
++              /* Free up the receive buffer */
++              vfree(net_device->recv_buf);
++              net_device->recv_buf = NULL;
++      }
++
+       if (net_device->send_buf_gpadl_handle) {
+               ret = vmbus_teardown_gpadl(device->channel,
+                                          net_device->send_buf_gpadl_handle);
+@@ -425,7 +430,8 @@ static int netvsc_init_buf(struct hv_dev
+       goto exit;
+ cleanup:
+-      netvsc_destroy_buf(device);
++      netvsc_revoke_buf(device, net_device);
++      netvsc_teardown_gpadl(device, net_device);
+ exit:
+       return ret;
+@@ -544,11 +550,6 @@ cleanup:
+       return ret;
+ }
+-static void netvsc_disconnect_vsp(struct hv_device *device)
+-{
+-      netvsc_destroy_buf(device);
+-}
+-
+ /*
+  * netvsc_device_remove - Callback when the root bus device is removed
+  */
+@@ -562,7 +563,7 @@ void netvsc_device_remove(struct hv_devi
+       cancel_work_sync(&net_device->subchan_work);
+-      netvsc_disconnect_vsp(device);
++      netvsc_revoke_buf(device, net_device);
+       RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+@@ -575,6 +576,8 @@ void netvsc_device_remove(struct hv_devi
+       /* Now, we can close the channel safely */
+       vmbus_close(device->channel);
++      netvsc_teardown_gpadl(device, net_device);
++
+       /* And dissassociate NAPI context from device */
+       for (i = 0; i < net_device->num_chn; i++)
+               netif_napi_del(&net_device->chan_table[i].napi);
diff --git a/queue-4.14/hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch b/queue-4.14/hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch
new file mode 100644 (file)
index 0000000..9277574
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:10 -0700
+Subject: hv_netvsc: only wake transmit queue if link is up
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit f4950e4586dfc957e0a28226eeb992ddc049b5a2 upstream. ]
+
+Don't wake transmit queues if link is not up yet.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -88,12 +88,11 @@ static int netvsc_open(struct net_device
+               return ret;
+       }
+-      netif_tx_wake_all_queues(net);
+-
+       rdev = nvdev->extension;
+-
+-      if (!rdev->link_state)
++      if (!rdev->link_state) {
+               netif_carrier_on(net);
++              netif_tx_wake_all_queues(net);
++      }
+       if (vf_netdev) {
+               /* Setting synthetic device up transparently sets
diff --git a/queue-4.14/hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch b/queue-4.14/hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch
new file mode 100644 (file)
index 0000000..88a885c
--- /dev/null
@@ -0,0 +1,232 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Mon, 14 May 2018 15:32:06 -0700
+Subject: hv_netvsc: preserve hw_features on mtu/channels/ringparam changes
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+[ Commit aefd80e874e98a864915df5b7d90824a4340b450 upstream. ]
+
+rndis_filter_device_add() is called both from netvsc_probe() when we
+initially create the device and from set channels/mtu/ringparam
+routines where we basically remove the device and add it back.
+
+hw_features is reset in rndis_filter_device_add() and filled with
+host data. However, we lose all additional flags which are set outside
+of the driver, e.g. register_netdevice() adds NETIF_F_SOFT_FEATURES and
+many others.
+
+Unfortunately, calls to rndis_{query_hwcaps(), _set_offload_params()}
+calls cannot be avoided on every RNDIS reset: host expects us to set
+required features explicitly. Moreover, in theory hardware capabilities
+can change and we need to reflect the change in hw_features.
+
+Reset net->hw_features bits according to host data in
+rndis_netdev_set_hwcaps(), clear corresponding feature bits
+from net->features in case some features went missing (will never happen
+in real life I guess but let's be consistent).
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/hyperv_net.h   |    4 +
+ drivers/net/hyperv/netvsc_drv.c   |    2 
+ drivers/net/hyperv/rndis_filter.c |  136 +++++++++++++++++++++-----------------
+ 3 files changed, 83 insertions(+), 59 deletions(-)
+
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -659,6 +659,10 @@ struct nvsp_message {
+ #define NETVSC_RECEIVE_BUFFER_ID              0xcafe
+ #define NETVSC_SEND_BUFFER_ID                 0
++#define NETVSC_SUPPORTED_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | \
++                                    NETIF_F_TSO | NETIF_F_IPV6_CSUM | \
++                                    NETIF_F_TSO6)
++
+ #define VRSS_SEND_TAB_SIZE 16  /* must be power of 2 */
+ #define VRSS_CHANNEL_MAX 64
+ #define VRSS_CHANNEL_DEFAULT 8
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -1956,7 +1956,7 @@ static int netvsc_probe(struct hv_device
+       memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
+-      /* hw_features computed in rndis_filter_device_add */
++      /* hw_features computed in rndis_netdev_set_hwcaps() */
+       net->features = net->hw_features |
+               NETIF_F_HIGHDMA | NETIF_F_SG |
+               NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1131,69 +1131,20 @@ unlock:
+       rtnl_unlock();
+ }
+-struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+-                                    struct netvsc_device_info *device_info)
++static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
++                                 struct netvsc_device *nvdev)
+ {
+-      struct net_device *net = hv_get_drvdata(dev);
++      struct net_device *net = rndis_device->ndev;
+       struct net_device_context *net_device_ctx = netdev_priv(net);
+-      struct netvsc_device *net_device;
+-      struct rndis_device *rndis_device;
+       struct ndis_offload hwcaps;
+       struct ndis_offload_params offloads;
+-      struct ndis_recv_scale_cap rsscap;
+-      u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
+       unsigned int gso_max_size = GSO_MAX_SIZE;
+-      u32 mtu, size;
+-      const struct cpumask *node_cpu_mask;
+-      u32 num_possible_rss_qs;
+-      int i, ret;
+-
+-      rndis_device = get_rndis_device();
+-      if (!rndis_device)
+-              return ERR_PTR(-ENODEV);
+-
+-      /*
+-       * Let the inner driver handle this first to create the netvsc channel
+-       * NOTE! Once the channel is created, we may get a receive callback
+-       * (RndisFilterOnReceive()) before this call is completed
+-       */
+-      net_device = netvsc_device_add(dev, device_info);
+-      if (IS_ERR(net_device)) {
+-              kfree(rndis_device);
+-              return net_device;
+-      }
+-
+-      /* Initialize the rndis device */
+-      net_device->max_chn = 1;
+-      net_device->num_chn = 1;
+-
+-      net_device->extension = rndis_device;
+-      rndis_device->ndev = net;
+-
+-      /* Send the rndis initialization message */
+-      ret = rndis_filter_init_device(rndis_device, net_device);
+-      if (ret != 0)
+-              goto err_dev_remv;
+-
+-      /* Get the MTU from the host */
+-      size = sizeof(u32);
+-      ret = rndis_filter_query_device(rndis_device, net_device,
+-                                      RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
+-                                      &mtu, &size);
+-      if (ret == 0 && size == sizeof(u32) && mtu < net->mtu)
+-              net->mtu = mtu;
+-
+-      /* Get the mac address */
+-      ret = rndis_filter_query_device_mac(rndis_device, net_device);
+-      if (ret != 0)
+-              goto err_dev_remv;
+-
+-      memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
++      int ret;
+       /* Find HW offload capabilities */
+-      ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps);
++      ret = rndis_query_hwcaps(rndis_device, nvdev, &hwcaps);
+       if (ret != 0)
+-              goto err_dev_remv;
++              return ret;
+       /* A value of zero means "no change"; now turn on what we want. */
+       memset(&offloads, 0, sizeof(struct ndis_offload_params));
+@@ -1201,8 +1152,12 @@ struct netvsc_device *rndis_filter_devic
+       /* Linux does not care about IP checksum, always does in kernel */
+       offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED;
++      /* Reset previously set hw_features flags */
++      net->hw_features &= ~NETVSC_SUPPORTED_HW_FEATURES;
++      net_device_ctx->tx_checksum_mask = 0;
++
+       /* Compute tx offload settings based on hw capabilities */
+-      net->hw_features = NETIF_F_RXCSUM;
++      net->hw_features |= NETIF_F_RXCSUM;
+       if ((hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_ALL_TCP4) == NDIS_TXCSUM_ALL_TCP4) {
+               /* Can checksum TCP */
+@@ -1246,10 +1201,75 @@ struct netvsc_device *rndis_filter_devic
+               }
+       }
++      /* In case some hw_features disappeared we need to remove them from
++       * net->features list as they're no longer supported.
++       */
++      net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features;
++
+       netif_set_gso_max_size(net, gso_max_size);
+-      ret = rndis_filter_set_offload_params(net, net_device, &offloads);
+-      if (ret)
++      ret = rndis_filter_set_offload_params(net, nvdev, &offloads);
++
++      return ret;
++}
++
++struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
++                                    struct netvsc_device_info *device_info)
++{
++      struct net_device *net = hv_get_drvdata(dev);
++      struct netvsc_device *net_device;
++      struct rndis_device *rndis_device;
++      struct ndis_recv_scale_cap rsscap;
++      u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
++      u32 mtu, size;
++      const struct cpumask *node_cpu_mask;
++      u32 num_possible_rss_qs;
++      int i, ret;
++
++      rndis_device = get_rndis_device();
++      if (!rndis_device)
++              return ERR_PTR(-ENODEV);
++
++      /* Let the inner driver handle this first to create the netvsc channel
++       * NOTE! Once the channel is created, we may get a receive callback
++       * (RndisFilterOnReceive()) before this call is completed
++       */
++      net_device = netvsc_device_add(dev, device_info);
++      if (IS_ERR(net_device)) {
++              kfree(rndis_device);
++              return net_device;
++      }
++
++      /* Initialize the rndis device */
++      net_device->max_chn = 1;
++      net_device->num_chn = 1;
++
++      net_device->extension = rndis_device;
++      rndis_device->ndev = net;
++
++      /* Send the rndis initialization message */
++      ret = rndis_filter_init_device(rndis_device, net_device);
++      if (ret != 0)
++              goto err_dev_remv;
++
++      /* Get the MTU from the host */
++      size = sizeof(u32);
++      ret = rndis_filter_query_device(rndis_device, net_device,
++                                      RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
++                                      &mtu, &size);
++      if (ret == 0 && size == sizeof(u32) && mtu < net->mtu)
++              net->mtu = mtu;
++
++      /* Get the mac address */
++      ret = rndis_filter_query_device_mac(rndis_device, net_device);
++      if (ret != 0)
++              goto err_dev_remv;
++
++      memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
++
++      /* Query and set hardware capabilities */
++      ret = rndis_netdev_set_hwcaps(rndis_device, net_device);
++      if (ret != 0)
+               goto err_dev_remv;
+       rndis_filter_query_device_link_status(rndis_device, net_device);
diff --git a/queue-4.14/hv_netvsc-rename-ind_table-to-rx_table.patch b/queue-4.14/hv_netvsc-rename-ind_table-to-rx_table.patch
new file mode 100644 (file)
index 0000000..8f54a63
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:01 -0700
+Subject: hv_netvsc: Rename ind_table to rx_table
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit 47371300dfc269dd8d150e5b872bdbbda98ba809 upstream. ]
+
+Rename this variable because it is the Receive indirection
+table.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/hyperv_net.h   |    2 +-
+ drivers/net/hyperv/netvsc_drv.c   |    4 ++--
+ drivers/net/hyperv/rndis_filter.c |    6 +++---
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -179,7 +179,7 @@ struct rndis_device {
+       u8 hw_mac_adr[ETH_ALEN];
+       u8 rss_key[NETVSC_HASH_KEYLEN];
+-      u16 ind_table[ITAB_NUM];
++      u16 rx_table[ITAB_NUM];
+ };
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -1378,7 +1378,7 @@ static int netvsc_get_rxfh(struct net_de
+       rndis_dev = ndev->extension;
+       if (indir) {
+               for (i = 0; i < ITAB_NUM; i++)
+-                      indir[i] = rndis_dev->ind_table[i];
++                      indir[i] = rndis_dev->rx_table[i];
+       }
+       if (key)
+@@ -1408,7 +1408,7 @@ static int netvsc_set_rxfh(struct net_de
+                               return -EINVAL;
+               for (i = 0; i < ITAB_NUM; i++)
+-                      rndis_dev->ind_table[i] = indir[i];
++                      rndis_dev->rx_table[i] = indir[i];
+       }
+       if (!key) {
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -759,7 +759,7 @@ int rndis_filter_set_rss_param(struct rn
+       /* Set indirection table entries */
+       itab = (u32 *)(rssp + 1);
+       for (i = 0; i < ITAB_NUM; i++)
+-              itab[i] = rdev->ind_table[i];
++              itab[i] = rdev->rx_table[i];
+       /* Set hask key values */
+       keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
+@@ -1284,8 +1284,8 @@ struct netvsc_device *rndis_filter_devic
+       net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
+       for (i = 0; i < ITAB_NUM; i++)
+-              rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
+-                                                      net_device->num_chn);
++              rndis_device->rx_table[i] = ethtool_rxfh_indir_default(
++                                              i, net_device->num_chn);
+       atomic_set(&net_device->open_chn, 1);
+       vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
diff --git a/queue-4.14/hv_netvsc-rename-tx_send_table-to-tx_table.patch b/queue-4.14/hv_netvsc-rename-tx_send_table-to-tx_table.patch
new file mode 100644 (file)
index 0000000..4adc099
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:02 -0700
+Subject: hv_netvsc: Rename tx_send_table to tx_table
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit 39e91cfbf6f5fb26ba64cc2e8874372baf1671e7 upstream. ]
+
+Simplify the variable name: tx_send_table
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/hyperv_net.h |    2 +-
+ drivers/net/hyperv/netvsc.c     |    2 +-
+ drivers/net/hyperv/netvsc_drv.c |    4 ++--
+ 3 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -734,7 +734,7 @@ struct net_device_context {
+       u32 tx_checksum_mask;
+-      u32 tx_send_table[VRSS_SEND_TAB_SIZE];
++      u32 tx_table[VRSS_SEND_TAB_SIZE];
+       /* Ethtool settings */
+       bool udp4_l4_hash;
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -1107,7 +1107,7 @@ static void netvsc_send_table(struct hv_
+                     nvmsg->msg.v5_msg.send_table.offset);
+       for (i = 0; i < count; i++)
+-              net_device_ctx->tx_send_table[i] = tab[i];
++              net_device_ctx->tx_table[i] = tab[i];
+ }
+ static void netvsc_send_vf(struct net_device_context *net_device_ctx,
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -234,8 +234,8 @@ static inline int netvsc_get_tx_queue(st
+       struct sock *sk = skb->sk;
+       int q_idx;
+-      q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) &
+-                                 (VRSS_SEND_TAB_SIZE - 1)];
++      q_idx = ndc->tx_table[netvsc_get_hash(skb, ndc) &
++                            (VRSS_SEND_TAB_SIZE - 1)];
+       /* If queue index changed record the new value */
+       if (q_idx != old_idx &&
diff --git a/queue-4.14/hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch b/queue-4.14/hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch
new file mode 100644 (file)
index 0000000..aad569b
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:04 -0700
+Subject: hv_netvsc: Set tx_table to equal weight after subchannels open
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit a6fb6aa3cfa9047b62653dbcfc9bcde6e2272b41 upstream. ]
+
+In some cases, like internal vSwitch, the host doesn't provide
+send indirection table updates. This patch sets the table to be
+equal weight after subchannels are all open. Otherwise, all workload
+will be on one TX channel.
+
+As tested, this patch has largely increased the throughput over
+internal vSwitch.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/rndis_filter.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1114,6 +1114,9 @@ void rndis_set_subchannel(struct work_st
+       netif_set_real_num_tx_queues(ndev, nvdev->num_chn);
+       netif_set_real_num_rx_queues(ndev, nvdev->num_chn);
++      for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
++              ndev_ctx->tx_table[i] = i % nvdev->num_chn;
++
+       rtnl_unlock();
+       return;
diff --git a/queue-4.14/hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch b/queue-4.14/hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch
new file mode 100644 (file)
index 0000000..6d15acb
--- /dev/null
@@ -0,0 +1,125 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Mohammed Gamal <mgamal@redhat.com>
+Date: Mon, 14 May 2018 15:32:20 -0700
+Subject: hv_netvsc: Split netvsc_revoke_buf() and netvsc_teardown_gpadl()
+
+From: Mohammed Gamal <mgamal@redhat.com>
+
+[ Commit 7992894c305eaf504d005529637ff8283d0a849d upstream. ]
+
+Split each of the functions into two for each of send/recv buffers.
+This will be needed in order to implement a fine-grained messaging
+sequence to the host so that we accommodate the requirements of
+different Windows versions
+
+Fixes: 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order on older versions")
+Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c |   46 ++++++++++++++++++++++++++++++++------------
+ 1 file changed, 34 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -105,11 +105,11 @@ static void free_netvsc_device_rcu(struc
+       call_rcu(&nvdev->rcu, free_netvsc_device);
+ }
+-static void netvsc_revoke_buf(struct hv_device *device,
+-                            struct netvsc_device *net_device)
++static void netvsc_revoke_recv_buf(struct hv_device *device,
++                                 struct netvsc_device *net_device)
+ {
+-      struct nvsp_message *revoke_packet;
+       struct net_device *ndev = hv_get_drvdata(device);
++      struct nvsp_message *revoke_packet;
+       int ret;
+       /*
+@@ -151,6 +151,14 @@ static void netvsc_revoke_buf(struct hv_
+               }
+               net_device->recv_section_cnt = 0;
+       }
++}
++
++static void netvsc_revoke_send_buf(struct hv_device *device,
++                                 struct netvsc_device *net_device)
++{
++      struct net_device *ndev = hv_get_drvdata(device);
++      struct nvsp_message *revoke_packet;
++      int ret;
+       /* Deal with the send buffer we may have setup.
+        * If we got a  send section size, it means we received a
+@@ -194,8 +202,8 @@ static void netvsc_revoke_buf(struct hv_
+       }
+ }
+-static void netvsc_teardown_gpadl(struct hv_device *device,
+-                                struct netvsc_device *net_device)
++static void netvsc_teardown_recv_gpadl(struct hv_device *device,
++                                     struct netvsc_device *net_device)
+ {
+       struct net_device *ndev = hv_get_drvdata(device);
+       int ret;
+@@ -214,6 +222,13 @@ static void netvsc_teardown_gpadl(struct
+               }
+               net_device->recv_buf_gpadl_handle = 0;
+       }
++}
++
++static void netvsc_teardown_send_gpadl(struct hv_device *device,
++                                     struct netvsc_device *net_device)
++{
++      struct net_device *ndev = hv_get_drvdata(device);
++      int ret;
+       if (net_device->send_buf_gpadl_handle) {
+               ret = vmbus_teardown_gpadl(device->channel,
+@@ -423,8 +438,10 @@ static int netvsc_init_buf(struct hv_dev
+       goto exit;
+ cleanup:
+-      netvsc_revoke_buf(device, net_device);
+-      netvsc_teardown_gpadl(device, net_device);
++      netvsc_revoke_recv_buf(device, net_device);
++      netvsc_revoke_send_buf(device, net_device);
++      netvsc_teardown_recv_gpadl(device, net_device);
++      netvsc_teardown_send_gpadl(device, net_device);
+ exit:
+       return ret;
+@@ -554,7 +571,8 @@ void netvsc_device_remove(struct hv_devi
+               = rtnl_dereference(net_device_ctx->nvdev);
+       int i;
+-      netvsc_revoke_buf(device, net_device);
++      netvsc_revoke_recv_buf(device, net_device);
++      netvsc_revoke_send_buf(device, net_device);
+       RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+@@ -569,14 +587,18 @@ void netvsc_device_remove(struct hv_devi
+       netdev_dbg(ndev, "net device safe to remove\n");
+       /* older versions require that buffer be revoked before close */
+-      if (vmbus_proto_version < VERSION_WIN10)
+-              netvsc_teardown_gpadl(device, net_device);
++      if (vmbus_proto_version < VERSION_WIN10) {
++              netvsc_teardown_recv_gpadl(device, net_device);
++              netvsc_teardown_send_gpadl(device, net_device);
++      }
+       /* Now, we can close the channel safely */
+       vmbus_close(device->channel);
+-      if (vmbus_proto_version >= VERSION_WIN10)
+-              netvsc_teardown_gpadl(device, net_device);
++      if (vmbus_proto_version >= VERSION_WIN10) {
++              netvsc_teardown_recv_gpadl(device, net_device);
++              netvsc_teardown_send_gpadl(device, net_device);
++      }
+       /* Release all resources */
+       free_netvsc_device_rcu(net_device);
diff --git a/queue-4.14/hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch b/queue-4.14/hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch
new file mode 100644 (file)
index 0000000..bcbb737
--- /dev/null
@@ -0,0 +1,159 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Mon, 14 May 2018 15:32:16 -0700
+Subject: hv_netvsc: use RCU to fix concurrent rx and queue changes
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Commit 02400fcee2542ee334a2394e0d9f6efd969fe782 upstream. ]
+
+The receive processing may continue to happen while the
+internal network device state is in RCU grace period.
+The internal RNDIS structure is associated with the
+internal netvsc_device structure; both have the same
+RCU lifetime.
+
+Defer freeing all associated parts until after grace
+period.
+
+Fixes: 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split")
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c       |   17 ++++------------
+ drivers/net/hyperv/rndis_filter.c |   39 ++++++++++++++++----------------------
+ 2 files changed, 22 insertions(+), 34 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -89,6 +89,11 @@ static void free_netvsc_device(struct rc
+               = container_of(head, struct netvsc_device, rcu);
+       int i;
++      kfree(nvdev->extension);
++      vfree(nvdev->recv_buf);
++      vfree(nvdev->send_buf);
++      kfree(nvdev->send_section_map);
++
+       for (i = 0; i < VRSS_CHANNEL_MAX; i++)
+               vfree(nvdev->chan_table[i].mrc.slots);
+@@ -210,12 +215,6 @@ static void netvsc_teardown_gpadl(struct
+               net_device->recv_buf_gpadl_handle = 0;
+       }
+-      if (net_device->recv_buf) {
+-              /* Free up the receive buffer */
+-              vfree(net_device->recv_buf);
+-              net_device->recv_buf = NULL;
+-      }
+-
+       if (net_device->send_buf_gpadl_handle) {
+               ret = vmbus_teardown_gpadl(device->channel,
+                                          net_device->send_buf_gpadl_handle);
+@@ -230,12 +229,6 @@ static void netvsc_teardown_gpadl(struct
+               }
+               net_device->send_buf_gpadl_handle = 0;
+       }
+-      if (net_device->send_buf) {
+-              /* Free up the send buffer */
+-              vfree(net_device->send_buf);
+-              net_device->send_buf = NULL;
+-      }
+-      kfree(net_device->send_section_map);
+ }
+ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -266,13 +266,23 @@ static void rndis_set_link_state(struct
+       }
+ }
+-static void rndis_filter_receive_response(struct rndis_device *dev,
+-                                     struct rndis_message *resp)
++static void rndis_filter_receive_response(struct net_device *ndev,
++                                        struct netvsc_device *nvdev,
++                                        const struct rndis_message *resp)
+ {
++      struct rndis_device *dev = nvdev->extension;
+       struct rndis_request *request = NULL;
+       bool found = false;
+       unsigned long flags;
+-      struct net_device *ndev = dev->ndev;
++
++      /* This should never happen, it means control message
++       * response received after device removed.
++       */
++      if (dev->state == RNDIS_DEV_UNINITIALIZED) {
++              netdev_err(ndev,
++                         "got rndis message uninitialized\n");
++              return;
++      }
+       spin_lock_irqsave(&dev->request_lock, flags);
+       list_for_each_entry(request, &dev->req_list, list_ent) {
+@@ -353,7 +363,7 @@ static inline void *rndis_get_ppi(struct
+ }
+ static int rndis_filter_receive_data(struct net_device *ndev,
+-                                   struct rndis_device *dev,
++                                   struct netvsc_device *nvdev,
+                                    struct rndis_message *msg,
+                                    struct vmbus_channel *channel,
+                                    void *data, u32 data_buflen)
+@@ -373,7 +383,7 @@ static int rndis_filter_receive_data(str
+        * should be the data packet size plus the trailer padding size
+        */
+       if (unlikely(data_buflen < rndis_pkt->data_len)) {
+-              netdev_err(dev->ndev, "rndis message buffer "
++              netdev_err(ndev, "rndis message buffer "
+                          "overflow detected (got %u, min %u)"
+                          "...dropping this message!\n",
+                          data_buflen, rndis_pkt->data_len);
+@@ -401,34 +411,20 @@ int rndis_filter_receive(struct net_devi
+                        void *data, u32 buflen)
+ {
+       struct net_device_context *net_device_ctx = netdev_priv(ndev);
+-      struct rndis_device *rndis_dev = net_dev->extension;
+       struct rndis_message *rndis_msg = data;
+-      /* Make sure the rndis device state is initialized */
+-      if (unlikely(!rndis_dev)) {
+-              netif_err(net_device_ctx, rx_err, ndev,
+-                        "got rndis message but no rndis device!\n");
+-              return NVSP_STAT_FAIL;
+-      }
+-
+-      if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
+-              netif_err(net_device_ctx, rx_err, ndev,
+-                        "got rndis message uninitialized\n");
+-              return NVSP_STAT_FAIL;
+-      }
+-
+       if (netif_msg_rx_status(net_device_ctx))
+               dump_rndis_message(dev, rndis_msg);
+       switch (rndis_msg->ndis_msg_type) {
+       case RNDIS_MSG_PACKET:
+-              return rndis_filter_receive_data(ndev, rndis_dev, rndis_msg,
++              return rndis_filter_receive_data(ndev, net_dev, rndis_msg,
+                                                channel, data, buflen);
+       case RNDIS_MSG_INIT_C:
+       case RNDIS_MSG_QUERY_C:
+       case RNDIS_MSG_SET_C:
+               /* completion msgs */
+-              rndis_filter_receive_response(rndis_dev, rndis_msg);
++              rndis_filter_receive_response(ndev, net_dev, rndis_msg);
+               break;
+       case RNDIS_MSG_INDICATE:
+@@ -1349,7 +1345,6 @@ void rndis_filter_device_remove(struct h
+       net_dev->extension = NULL;
+       netvsc_device_remove(dev);
+-      kfree(rndis_dev);
+ }
+ int rndis_filter_open(struct netvsc_device *nvdev)
diff --git a/queue-4.14/hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch b/queue-4.14/hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch
new file mode 100644 (file)
index 0000000..3a53991
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 14 May 2018 15:32:08 -0700
+Subject: hv_netvsc: Use the num_online_cpus() for channel limit
+
+From: Haiyang Zhang <haiyangz@microsoft.com>
+
+[ Commit 25a39f7f975c3c26a0052fbf9b59201c06744332 upstream. ]
+
+Since we no longer localize channel/CPU affiliation within one NUMA
+node, num_online_cpus() is used as the number of channel cap, instead of
+the number of processors in a NUMA node.
+
+This patch allows a bigger range for tuning the number of channels.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/rndis_filter.c |   11 ++---------
+ 1 file changed, 2 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -1221,7 +1221,6 @@ struct netvsc_device *rndis_filter_devic
+       struct ndis_recv_scale_cap rsscap;
+       u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
+       u32 mtu, size;
+-      const struct cpumask *node_cpu_mask;
+       u32 num_possible_rss_qs;
+       int i, ret;
+@@ -1290,14 +1289,8 @@ struct netvsc_device *rndis_filter_devic
+       if (ret || rsscap.num_recv_que < 2)
+               goto out;
+-      /*
+-       * We will limit the VRSS channels to the number CPUs in the NUMA node
+-       * the primary channel is currently bound to.
+-       *
+-       * This also guarantees that num_possible_rss_qs <= num_online_cpus
+-       */
+-      node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu));
+-      num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask),
++      /* This guarantees that num_possible_rss_qs <= num_online_cpus */
++      num_possible_rss_qs = min_t(u32, num_online_cpus(),
+                                   rsscap.num_recv_que);
+       net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs);
diff --git a/queue-4.14/hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch b/queue-4.14/hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch
new file mode 100644 (file)
index 0000000..3f5e38c
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Mohammed Gamal <mgamal@redhat.com>
+Date: Mon, 14 May 2018 15:32:19 -0700
+Subject: hv_netvsc: Use Windows version instead of NVSP version on GPAD teardown
+
+From: Mohammed Gamal <mgamal@redhat.com>
+
+commit 2afc5d61a7197de25a61f54ea4ecfb4cb62b1d42A upstram
+
+When changing network interface settings, Windows guests
+older than WS2016 can no longer shutdown. This was addressed
+by commit 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order
+on older versions"), however the issue also occurs on WS2012
+guests that share NVSP protocol versions with WS2016 guests.
+Hence we use Windows version directly to differentiate them.
+
+Fixes: 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order on older versions")
+Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -569,13 +569,13 @@ void netvsc_device_remove(struct hv_devi
+       netdev_dbg(ndev, "net device safe to remove\n");
+       /* older versions require that buffer be revoked before close */
+-      if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
++      if (vmbus_proto_version < VERSION_WIN10)
+               netvsc_teardown_gpadl(device, net_device);
+       /* Now, we can close the channel safely */
+       vmbus_close(device->channel);
+-      if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
++      if (vmbus_proto_version >= VERSION_WIN10)
+               netvsc_teardown_gpadl(device, net_device);
+       /* Release all resources */
diff --git a/queue-4.14/net-fix-a-bug-in-removing-queues-from-xps-map.patch b/queue-4.14/net-fix-a-bug-in-removing-queues-from-xps-map.patch
new file mode 100644 (file)
index 0000000..301d67f
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Amritha Nambiar <amritha.nambiar@intel.com>
+Date: Thu, 17 May 2018 14:50:44 -0700
+Subject: net: Fix a bug in removing queues from XPS map
+
+From: Amritha Nambiar <amritha.nambiar@intel.com>
+
+[ Upstream commit 6358d49ac23995fdfe157cc8747ab0f274d3954b ]
+
+While removing queues from the XPS map, the individual CPU ID
+alone was used to index the CPUs map, this should be changed to also
+factor in the traffic class mapping for the CPU-to-queue lookup.
+
+Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes")
+Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
+Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2058,7 +2058,7 @@ static bool remove_xps_queue_cpu(struct
+               int i, j;
+               for (i = count, j = offset; i--; j++) {
+-                      if (!remove_xps_queue(dev_maps, cpu, j))
++                      if (!remove_xps_queue(dev_maps, tci, j))
+                               break;
+               }
diff --git a/queue-4.14/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch b/queue-4.14/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch
new file mode 100644 (file)
index 0000000..e9238c7
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Tarick Bedeir <tarick@google.com>
+Date: Sun, 13 May 2018 16:38:45 -0700
+Subject: net/mlx4_core: Fix error handling in mlx4_init_port_info.
+
+From: Tarick Bedeir <tarick@google.com>
+
+[ Upstream commit 57f6f99fdad9984801cde05c1db68fe39b474a10 ]
+
+Avoid exiting the function with a lingering sysfs file (if the first
+call to device_create_file() fails while the second succeeds), and avoid
+calling devlink_port_unregister() twice.
+
+In other words, either mlx4_init_port_info() succeeds and returns zero, or
+it fails, returns non-zero, and requires no cleanup.
+
+Fixes: 096335b3f983 ("mlx4_core: Allow dynamic MTU configuration for IB ports")
+Signed-off-by: Tarick Bedeir <tarick@google.com>
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/main.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/main.c
++++ b/drivers/net/ethernet/mellanox/mlx4/main.c
+@@ -3007,6 +3007,7 @@ static int mlx4_init_port_info(struct ml
+               mlx4_err(dev, "Failed to create file for port %d\n", port);
+               devlink_port_unregister(&info->devlink_port);
+               info->port = -1;
++              return err;
+       }
+       sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
+@@ -3028,9 +3029,10 @@ static int mlx4_init_port_info(struct ml
+                                  &info->port_attr);
+               devlink_port_unregister(&info->devlink_port);
+               info->port = -1;
++              return err;
+       }
+-      return err;
++      return 0;
+ }
+ static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
diff --git a/queue-4.14/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch b/queue-4.14/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch
new file mode 100644 (file)
index 0000000..63d3586
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Wed, 16 May 2018 12:54:29 +0200
+Subject: net/sched: fix refcnt leak in the error path of tcf_vlan_init()
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit 5a4931ae0193f8a4a97e8260fd0df1d705d83299 ]
+
+Similarly to what was done with commit a52956dfc503 ("net sched actions:
+fix refcnt leak in skbmod"), fix the error path of tcf_vlan_init() to avoid
+refcnt leaks when wrong value of TCA_VLAN_PUSH_VLAN_PROTOCOL is given.
+
+Fixes: 5026c9b1bafc ("net sched: vlan action fix late binding")
+CC: Roman Mashak <mrv@mojatatu.com>
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_vlan.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/sched/act_vlan.c
++++ b/net/sched/act_vlan.c
+@@ -154,6 +154,8 @@ static int tcf_vlan_init(struct net *net
+                       case htons(ETH_P_8021AD):
+                               break;
+                       default:
++                              if (exists)
++                                      tcf_idr_release(*a, bind);
+                               return -EPROTONOSUPPORT;
+                       }
+               } else {
diff --git a/queue-4.14/net-sched-red-avoid-hashing-null-child.patch b/queue-4.14/net-sched-red-avoid-hashing-null-child.patch
new file mode 100644 (file)
index 0000000..4b8cc6a
--- /dev/null
@@ -0,0 +1,108 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 18 May 2018 14:51:44 +0200
+Subject: net: sched: red: avoid hashing NULL child
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 44a63b137f7b6e4c7bd6c9cc21615941cb36509d ]
+
+Hangbin reported an Oops triggered by the syzkaller qdisc rules:
+
+ kasan: GPF could be caused by NULL-ptr deref or user memory access
+ general protection fault: 0000 [#1] SMP KASAN PTI
+ Modules linked in: sch_red
+ CPU: 0 PID: 28699 Comm: syz-executor5 Not tainted 4.17.0-rc4.kcov #1
+ Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+ RIP: 0010:qdisc_hash_add+0x26/0xa0
+ RSP: 0018:ffff8800589cf470 EFLAGS: 00010203
+ RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff824ad971
+ RDX: 0000000000000007 RSI: ffffc9000ce9f000 RDI: 000000000000003c
+ RBP: 0000000000000001 R08: ffffed000b139ea2 R09: ffff8800589cf4f0
+ R10: ffff8800589cf50f R11: ffffed000b139ea2 R12: ffff880054019fc0
+ R13: ffff880054019fb4 R14: ffff88005c0af600 R15: ffff880054019fb0
+ FS:  00007fa6edcb1700(0000) GS:ffff88005ce00000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000020000740 CR3: 000000000fc16000 CR4: 00000000000006f0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+  red_change+0x2d2/0xed0 [sch_red]
+  qdisc_create+0x57e/0xef0
+  tc_modify_qdisc+0x47f/0x14e0
+  rtnetlink_rcv_msg+0x6a8/0x920
+  netlink_rcv_skb+0x2a2/0x3c0
+  netlink_unicast+0x511/0x740
+  netlink_sendmsg+0x825/0xc30
+  sock_sendmsg+0xc5/0x100
+  ___sys_sendmsg+0x778/0x8e0
+  __sys_sendmsg+0xf5/0x1b0
+  do_syscall_64+0xbd/0x3b0
+  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ RIP: 0033:0x450869
+ RSP: 002b:00007fa6edcb0c48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+ RAX: ffffffffffffffda RBX: 00007fa6edcb16b4 RCX: 0000000000450869
+ RDX: 0000000000000000 RSI: 00000000200000c0 RDI: 0000000000000013
+ RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
+ R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
+ R13: 0000000000008778 R14: 0000000000702838 R15: 00007fa6edcb1700
+ Code: e9 0b fe ff ff 0f 1f 44 00 00 55 53 48 89 fb 89 f5 e8 3f 07 f3 fe 48 8d 7b 3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 51
+ RIP: qdisc_hash_add+0x26/0xa0 RSP: ffff8800589cf470
+
+When a red qdisc is updated with a 0 limit, the child qdisc is left
+unmodified, no additional scheduler is created in red_change(),
+the 'child' local variable is rightfully NULL and must not add it
+to the hash table.
+
+This change addresses the above issue moving qdisc_hash_add() right
+after the child qdisc creation. It additionally removes unneeded checks
+for noop_qdisc.
+
+Reported-by: Hangbin Liu <liuhangbin@gmail.com>
+Fixes: 49b499718fa1 ("net: sched: make default fifo qdiscs appear in the dump")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_red.c |    5 +++--
+ net/sched/sch_tbf.c |    5 +++--
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_red.c
++++ b/net/sched/sch_red.c
+@@ -191,10 +191,11 @@ static int red_change(struct Qdisc *sch,
+               child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
+               if (IS_ERR(child))
+                       return PTR_ERR(child);
+-      }
+-      if (child != &noop_qdisc)
++              /* child is fifo, no need to check for noop_qdisc */
+               qdisc_hash_add(child, true);
++      }
++
+       sch_tree_lock(sch);
+       q->flags = ctl->flags;
+       q->limit = ctl->limit;
+--- a/net/sched/sch_tbf.c
++++ b/net/sched/sch_tbf.c
+@@ -388,6 +388,9 @@ static int tbf_change(struct Qdisc *sch,
+                       err = PTR_ERR(child);
+                       goto done;
+               }
++
++              /* child is fifo, no need to check for noop_qdisc */
++              qdisc_hash_add(child, true);
+       }
+       sch_tree_lock(sch);
+@@ -396,8 +399,6 @@ static int tbf_change(struct Qdisc *sch,
+                                         q->qdisc->qstats.backlog);
+               qdisc_destroy(q->qdisc);
+               q->qdisc = child;
+-              if (child != &noop_qdisc)
+-                      qdisc_hash_add(child, true);
+       }
+       q->limit = qopt->limit;
+       if (tb[TCA_TBF_PBURST])
diff --git a/queue-4.14/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch b/queue-4.14/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch
new file mode 100644 (file)
index 0000000..26a8a39
--- /dev/null
@@ -0,0 +1,132 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Eric Biggers <ebiggers@google.com>
+Date: Sun, 13 May 2018 17:01:30 -0700
+Subject: net/smc: check for missing nlattrs in SMC_PNETID messages
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit d49baa7e12ee70c0a7b821d088a770c94c02e494 ]
+
+It's possible to crash the kernel in several different ways by sending
+messages to the SMC_PNETID generic netlink family that are missing the
+expected attributes:
+
+- Missing SMC_PNETID_NAME => null pointer dereference when comparing
+  names.
+- Missing SMC_PNETID_ETHNAME => null pointer dereference accessing
+  smc_pnetentry::ndev.
+- Missing SMC_PNETID_IBNAME => null pointer dereference accessing
+  smc_pnetentry::smcibdev.
+- Missing SMC_PNETID_IBPORT => out of bounds array access to
+  smc_ib_device::pattr[-1].
+
+Fix it by validating that all expected attributes are present and that
+SMC_PNETID_IBPORT is nonzero.
+
+Reported-by: syzbot+5cd61039dc9b8bfa6e47@syzkaller.appspotmail.com
+Fixes: 6812baabf24d ("smc: establish pnet table management")
+Cc: <stable@vger.kernel.org> # v4.11+
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/smc/smc_pnet.c |   71 +++++++++++++++++++++++++++++------------------------
+ 1 file changed, 40 insertions(+), 31 deletions(-)
+
+--- a/net/smc/smc_pnet.c
++++ b/net/smc/smc_pnet.c
+@@ -245,40 +245,45 @@ out:
+ static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
+                              struct nlattr *tb[])
+ {
+-      char *string, *ibname = NULL;
+-      int rc = 0;
++      char *string, *ibname;
++      int rc;
+       memset(pnetelem, 0, sizeof(*pnetelem));
+       INIT_LIST_HEAD(&pnetelem->list);
+-      if (tb[SMC_PNETID_NAME]) {
+-              string = (char *)nla_data(tb[SMC_PNETID_NAME]);
+-              if (!smc_pnetid_valid(string, pnetelem->pnet_name)) {
+-                      rc = -EINVAL;
+-                      goto error;
+-              }
+-      }
+-      if (tb[SMC_PNETID_ETHNAME]) {
+-              string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
+-              pnetelem->ndev = dev_get_by_name(net, string);
+-              if (!pnetelem->ndev)
+-                      return -ENOENT;
+-      }
+-      if (tb[SMC_PNETID_IBNAME]) {
+-              ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+-              ibname = strim(ibname);
+-              pnetelem->smcibdev = smc_pnet_find_ib(ibname);
+-              if (!pnetelem->smcibdev) {
+-                      rc = -ENOENT;
+-                      goto error;
+-              }
+-      }
+-      if (tb[SMC_PNETID_IBPORT]) {
+-              pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+-              if (pnetelem->ib_port > SMC_MAX_PORTS) {
+-                      rc = -EINVAL;
+-                      goto error;
+-              }
+-      }
++
++      rc = -EINVAL;
++      if (!tb[SMC_PNETID_NAME])
++              goto error;
++      string = (char *)nla_data(tb[SMC_PNETID_NAME]);
++      if (!smc_pnetid_valid(string, pnetelem->pnet_name))
++              goto error;
++
++      rc = -EINVAL;
++      if (!tb[SMC_PNETID_ETHNAME])
++              goto error;
++      rc = -ENOENT;
++      string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
++      pnetelem->ndev = dev_get_by_name(net, string);
++      if (!pnetelem->ndev)
++              goto error;
++
++      rc = -EINVAL;
++      if (!tb[SMC_PNETID_IBNAME])
++              goto error;
++      rc = -ENOENT;
++      ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
++      ibname = strim(ibname);
++      pnetelem->smcibdev = smc_pnet_find_ib(ibname);
++      if (!pnetelem->smcibdev)
++              goto error;
++
++      rc = -EINVAL;
++      if (!tb[SMC_PNETID_IBPORT])
++              goto error;
++      pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
++      if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
++              goto error;
++
+       return 0;
+ error:
+@@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff *
+       void *hdr;
+       int rc;
++      if (!info->attrs[SMC_PNETID_NAME])
++              return -EINVAL;
+       pnetelem = smc_pnet_find_pnetid(
+                               (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
+       if (!pnetelem)
+@@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff *
+ static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
+ {
++      if (!info->attrs[SMC_PNETID_NAME])
++              return -EINVAL;
+       return smc_pnet_remove_by_pnetid(
+                               (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
+ }
diff --git a/queue-4.14/net-test-tailroom-before-appending-to-linear-skb.patch b/queue-4.14/net-test-tailroom-before-appending-to-linear-skb.patch
new file mode 100644 (file)
index 0000000..ca4f5ce
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Willem de Bruijn <willemb@google.com>
+Date: Thu, 17 May 2018 13:13:29 -0400
+Subject: net: test tailroom before appending to linear skb
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit 113f99c3358564a0647d444c2ae34e8b1abfd5b9 ]
+
+Device features may change during transmission. In particular with
+corking, a device may toggle scatter-gather in between allocating
+and writing to an skb.
+
+Do not unconditionally assume that !NETIF_F_SG at write time implies
+that the same held at alloc time and thus the skb has sufficient
+tailroom.
+
+This issue predates git history.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_output.c  |    3 ++-
+ net/ipv6/ip6_output.c |    3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1040,7 +1040,8 @@ alloc_new_skb:
+               if (copy > length)
+                       copy = length;
+-              if (!(rt->dst.dev->features&NETIF_F_SG)) {
++              if (!(rt->dst.dev->features&NETIF_F_SG) &&
++                  skb_tailroom(skb) >= copy) {
+                       unsigned int off;
+                       off = skb->len;
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1488,7 +1488,8 @@ alloc_new_skb:
+               if (copy > length)
+                       copy = length;
+-              if (!(rt->dst.dev->features&NETIF_F_SG)) {
++              if (!(rt->dst.dev->features&NETIF_F_SG) &&
++                  skb_tailroom(skb) >= copy) {
+                       unsigned int off;
+                       off = skb->len;
diff --git a/queue-4.14/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch b/queue-4.14/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch
new file mode 100644 (file)
index 0000000..897760b
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Willem de Bruijn <willemb@google.com>
+Date: Fri, 11 May 2018 13:24:25 -0400
+Subject: packet: in packet_snd start writing at link layer allocation
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit b84bbaf7a6c8cca24f8acf25a2c8e46913a947ba ]
+
+Packet sockets allow construction of packets shorter than
+dev->hard_header_len to accommodate protocols with variable length
+link layer headers. These packets are padded to dev->hard_header_len,
+because some device drivers interpret that as a minimum packet size.
+
+packet_snd reserves dev->hard_header_len bytes on allocation.
+SOCK_DGRAM sockets call skb_push in dev_hard_header() to ensure that
+link layer headers are stored in the reserved range. SOCK_RAW sockets
+do the same in tpacket_snd, but not in packet_snd.
+
+Syzbot was able to send a zero byte packet to a device with massive
+116B link layer header, causing padding to cross over into skb_shinfo.
+Fix this by writing from the start of the llheader reserved range also
+in the case of packet_snd/SOCK_RAW.
+
+Update skb_set_network_header to the new offset. This also corrects
+it for SOCK_DGRAM, where it incorrectly double counted reserve due to
+the skb_push in dev_hard_header.
+
+Fixes: 9ed988cd5915 ("packet: validate variable length ll headers")
+Reported-by: syzbot+71d74a5406d02057d559@syzkaller.appspotmail.com
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2912,13 +2912,15 @@ static int packet_snd(struct socket *soc
+       if (skb == NULL)
+               goto out_unlock;
+-      skb_set_network_header(skb, reserve);
++      skb_reset_network_header(skb);
+       err = -EINVAL;
+       if (sock->type == SOCK_DGRAM) {
+               offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
+               if (unlikely(offset < 0))
+                       goto out_free;
++      } else if (reserve) {
++              skb_push(skb, reserve);
+       }
+       /* Returns -EFAULT on error */
diff --git a/queue-4.14/series b/queue-4.14/series
new file mode 100644 (file)
index 0000000..f502f40
--- /dev/null
@@ -0,0 +1,35 @@
+net-fix-a-bug-in-removing-queues-from-xps-map.patch
+net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch
+net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch
+net-sched-red-avoid-hashing-null-child.patch
+net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch
+net-test-tailroom-before-appending-to-linear-skb.patch
+packet-in-packet_snd-start-writing-at-link-layer-allocation.patch
+sock_diag-fix-use-after-free-read-in-__sk_free.patch
+tcp-purge-write-queue-in-tcp_connect_init.patch
+vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch
+vmxnet3-use-dma-memory-barriers-where-required.patch
+hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch
+hv_netvsc-rename-ind_table-to-rx_table.patch
+hv_netvsc-rename-tx_send_table-to-tx_table.patch
+hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch
+hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch
+hv_netvsc-netvsc_teardown_gpadl-split.patch
+hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch
+hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch
+hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch
+hv_netvsc-avoid-retry-on-send-during-shutdown.patch
+hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch
+hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch
+hv_netvsc-cancel-subchannel-setup-before-halting-device.patch
+hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch
+hv_netvsc-defer-queue-selection-to-vf.patch
+hv_netvsc-disable-napi-before-channel-close.patch
+hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch
+hv_netvsc-change-gpad-teardown-order-on-older-versions.patch
+hv_netvsc-common-detach-logic.patch
+hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch
+hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch
+hv_netvsc-ensure-correct-teardown-message-sequence-order.patch
+hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch
+sparc-vio-use-put_device-instead-of-kfree.patch
diff --git a/queue-4.14/sock_diag-fix-use-after-free-read-in-__sk_free.patch b/queue-4.14/sock_diag-fix-use-after-free-read-in-__sk_free.patch
new file mode 100644 (file)
index 0000000..377846c
--- /dev/null
@@ -0,0 +1,128 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 18 May 2018 04:47:55 -0700
+Subject: sock_diag: fix use-after-free read in __sk_free
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9709020c86f6bf8439ca3effc58cfca49a5de192 ]
+
+We must not call sock_diag_has_destroy_listeners(sk) on a socket
+that has no reference on net structure.
+
+BUG: KASAN: use-after-free in sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline]
+BUG: KASAN: use-after-free in __sk_free+0x329/0x340 net/core/sock.c:1609
+Read of size 8 at addr ffff88018a02e3a0 by task swapper/1/0
+
+CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.17.0-rc5+ #54
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1b9/0x294 lib/dump_stack.c:113
+ print_address_description+0x6c/0x20b mm/kasan/report.c:256
+ kasan_report_error mm/kasan/report.c:354 [inline]
+ kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
+ __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
+ sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline]
+ __sk_free+0x329/0x340 net/core/sock.c:1609
+ sk_free+0x42/0x50 net/core/sock.c:1623
+ sock_put include/net/sock.h:1664 [inline]
+ reqsk_free include/net/request_sock.h:116 [inline]
+ reqsk_put include/net/request_sock.h:124 [inline]
+ inet_csk_reqsk_queue_drop_and_put net/ipv4/inet_connection_sock.c:672 [inline]
+ reqsk_timer_handler+0xe27/0x10e0 net/ipv4/inet_connection_sock.c:739
+ call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
+ expire_timers kernel/time/timer.c:1363 [inline]
+ __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
+ run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
+ __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
+ invoke_softirq kernel/softirq.c:365 [inline]
+ irq_exit+0x1d1/0x200 kernel/softirq.c:405
+ exiting_irq arch/x86/include/asm/apic.h:525 [inline]
+ smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
+ apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
+ </IRQ>
+RIP: 0010:native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:54
+RSP: 0018:ffff8801d9ae7c38 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13
+RAX: dffffc0000000000 RBX: 1ffff1003b35cf8a RCX: 0000000000000000
+RDX: 1ffffffff11a30d0 RSI: 0000000000000001 RDI: ffffffff88d18680
+RBP: ffff8801d9ae7c38 R08: ffffed003b5e46c3 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001
+R13: ffff8801d9ae7cf0 R14: ffffffff897bef20 R15: 0000000000000000
+ arch_safe_halt arch/x86/include/asm/paravirt.h:94 [inline]
+ default_idle+0xc2/0x440 arch/x86/kernel/process.c:354
+ arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:345
+ default_idle_call+0x6d/0x90 kernel/sched/idle.c:93
+ cpuidle_idle_call kernel/sched/idle.c:153 [inline]
+ do_idle+0x395/0x560 kernel/sched/idle.c:262
+ cpu_startup_entry+0x104/0x120 kernel/sched/idle.c:368
+ start_secondary+0x426/0x5b0 arch/x86/kernel/smpboot.c:269
+ secondary_startup_64+0xa5/0xb0 arch/x86/kernel/head_64.S:242
+
+Allocated by task 4557:
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:448
+ set_track mm/kasan/kasan.c:460 [inline]
+ kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
+ kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490
+ kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554
+ kmem_cache_zalloc include/linux/slab.h:691 [inline]
+ net_alloc net/core/net_namespace.c:383 [inline]
+ copy_net_ns+0x159/0x4c0 net/core/net_namespace.c:423
+ create_new_namespaces+0x69d/0x8f0 kernel/nsproxy.c:107
+ unshare_nsproxy_namespaces+0xc3/0x1f0 kernel/nsproxy.c:206
+ ksys_unshare+0x708/0xf90 kernel/fork.c:2408
+ __do_sys_unshare kernel/fork.c:2476 [inline]
+ __se_sys_unshare kernel/fork.c:2474 [inline]
+ __x64_sys_unshare+0x31/0x40 kernel/fork.c:2474
+ do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+Freed by task 69:
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:448
+ set_track mm/kasan/kasan.c:460 [inline]
+ __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
+ kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
+ __cache_free mm/slab.c:3498 [inline]
+ kmem_cache_free+0x86/0x2d0 mm/slab.c:3756
+ net_free net/core/net_namespace.c:399 [inline]
+ net_drop_ns.part.14+0x11a/0x130 net/core/net_namespace.c:406
+ net_drop_ns net/core/net_namespace.c:405 [inline]
+ cleanup_net+0x6a1/0xb20 net/core/net_namespace.c:541
+ process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
+ worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
+ kthread+0x345/0x410 kernel/kthread.c:240
+ ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
+
+The buggy address belongs to the object at ffff88018a02c140
+ which belongs to the cache net_namespace of size 8832
+The buggy address is located 8800 bytes inside of
+ 8832-byte region [ffff88018a02c140, ffff88018a02e3c0)
+The buggy address belongs to the page:
+page:ffffea0006280b00 count:1 mapcount:0 mapping:ffff88018a02c140 index:0x0 compound_mapcount: 0
+flags: 0x2fffc0000008100(slab|head)
+raw: 02fffc0000008100 ffff88018a02c140 0000000000000000 0000000100000001
+raw: ffffea00062a1320 ffffea0006268020 ffff8801d9bdde40 0000000000000000
+page dumped because: kasan: bad access detected
+
+Fixes: b922622ec6ef ("sock_diag: don't broadcast kernel sockets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Craig Gallek <kraig@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1595,7 +1595,7 @@ void sk_destruct(struct sock *sk)
+ static void __sk_free(struct sock *sk)
+ {
+-      if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
++      if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
+               sock_diag_broadcast_destroy(sk);
+       else
+               sk_destruct(sk);
diff --git a/queue-4.14/sparc-vio-use-put_device-instead-of-kfree.patch b/queue-4.14/sparc-vio-use-put_device-instead-of-kfree.patch
new file mode 100644 (file)
index 0000000..d8831d1
--- /dev/null
@@ -0,0 +1,31 @@
+From 00ad691ab140b54ab9f5de5e74cb994f552e8124 Mon Sep 17 00:00:00 2001
+From: Arvind Yadav <arvind.yadav.cs@gmail.com>
+Date: Wed, 25 Apr 2018 20:26:14 +0530
+Subject: sparc: vio: use put_device() instead of kfree()
+
+From: Arvind Yadav <arvind.yadav.cs@gmail.com>
+
+[ Upstream commit 00ad691ab140b54ab9f5de5e74cb994f552e8124 ]
+
+Never directly free @dev after calling device_register(), even
+if it returned an error. Always use put_device() to give up the
+reference initialized.
+
+Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/kernel/vio.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/sparc/kernel/vio.c
++++ b/arch/sparc/kernel/vio.c
+@@ -403,7 +403,7 @@ static struct vio_dev *vio_create_one(st
+       if (err) {
+               printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
+                      dev_name(&vdev->dev), err);
+-              kfree(vdev);
++              put_device(&vdev->dev);
+               return NULL;
+       }
+       if (vdev->dp)
diff --git a/queue-4.14/tcp-purge-write-queue-in-tcp_connect_init.patch b/queue-4.14/tcp-purge-write-queue-in-tcp_connect_init.patch
new file mode 100644 (file)
index 0000000..ddae667
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 14 May 2018 21:14:26 -0700
+Subject: tcp: purge write queue in tcp_connect_init()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7f582b248d0a86bae5788c548d7bb5bca6f7691a ]
+
+syzkaller found a reliable way to crash the host, hitting a BUG()
+in __tcp_retransmit_skb()
+
+Malicous MSG_FASTOPEN is the root cause. We need to purge write queue
+in tcp_connect_init() at the point we init snd_una/write_seq.
+
+This patch also replaces the BUG() by a less intrusive WARN_ON_ONCE()
+
+kernel BUG at net/ipv4/tcp_output.c:2837!
+invalid opcode: 0000 [#1] SMP KASAN
+Dumping ftrace buffer:
+   (ftrace buffer empty)
+Modules linked in:
+CPU: 0 PID: 5276 Comm: syz-executor0 Not tainted 4.17.0-rc3+ #51
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:__tcp_retransmit_skb+0x2992/0x2eb0 net/ipv4/tcp_output.c:2837
+RSP: 0000:ffff8801dae06ff8 EFLAGS: 00010206
+RAX: ffff8801b9fe61c0 RBX: 00000000ffc18a16 RCX: ffffffff864e1a49
+RDX: 0000000000000100 RSI: ffffffff864e2e12 RDI: 0000000000000005
+RBP: ffff8801dae073a0 R08: ffff8801b9fe61c0 R09: ffffed0039c40dd2
+R10: ffffed0039c40dd2 R11: ffff8801ce206e93 R12: 00000000421eeaad
+R13: ffff8801ce206d4e R14: ffff8801ce206cc0 R15: ffff8801cd4f4a80
+FS:  0000000000000000(0000) GS:ffff8801dae00000(0063) knlGS:00000000096bc900
+CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
+CR2: 0000000020000000 CR3: 00000001c47b6000 CR4: 00000000001406f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <IRQ>
+ tcp_retransmit_skb+0x2e/0x250 net/ipv4/tcp_output.c:2923
+ tcp_retransmit_timer+0xc50/0x3060 net/ipv4/tcp_timer.c:488
+ tcp_write_timer_handler+0x339/0x960 net/ipv4/tcp_timer.c:573
+ tcp_write_timer+0x111/0x1d0 net/ipv4/tcp_timer.c:593
+ call_timer_fn+0x230/0x940 kernel/time/timer.c:1326
+ expire_timers kernel/time/timer.c:1363 [inline]
+ __run_timers+0x79e/0xc50 kernel/time/timer.c:1666
+ run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
+ __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285
+ invoke_softirq kernel/softirq.c:365 [inline]
+ irq_exit+0x1d1/0x200 kernel/softirq.c:405
+ exiting_irq arch/x86/include/asm/apic.h:525 [inline]
+ smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052
+ apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863
+
+Fixes: cf60af03ca4e ("net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2814,8 +2814,10 @@ int __tcp_retransmit_skb(struct sock *sk
+               return -EBUSY;
+       if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
+-              if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+-                      BUG();
++              if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
++                      WARN_ON_ONCE(1);
++                      return -EINVAL;
++              }
+               if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
+                       return -ENOMEM;
+       }
+@@ -3312,6 +3314,7 @@ static void tcp_connect_init(struct sock
+       sock_reset_flag(sk, SOCK_DONE);
+       tp->snd_wnd = 0;
+       tcp_init_wl(tp, 0);
++      tcp_write_queue_purge(sk);
+       tp->snd_una = tp->write_seq;
+       tp->snd_sml = tp->write_seq;
+       tp->snd_up = tp->write_seq;
diff --git a/queue-4.14/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch b/queue-4.14/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch
new file mode 100644 (file)
index 0000000..e741b8e
--- /dev/null
@@ -0,0 +1,134 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+Date: Mon, 14 May 2018 08:14:34 -0400
+Subject: vmxnet3: set the DMA mask before the first DMA map operation
+
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+
+[ Upstream commit 61aeecea40afb2b89933e27cd4adb10fc2e75cfd ]
+
+The DMA mask must be set before, not after, the first DMA map operation, or
+the first DMA map operation could in theory fail on some systems.
+
+Fixes: b0eb57cb97e78 ("VMXNET3: Add support for virtual IOMMU")
+Signed-off-by: Regis Duchesne <hpreg@vmware.com>
+Acked-by: Ronak Doshi <doshir@vmware.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c |   50 +++++++++++++++++++-------------------
+ 1 file changed, 25 insertions(+), 25 deletions(-)
+
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -2675,7 +2675,7 @@ vmxnet3_set_mac_addr(struct net_device *
+ /* ==================== initialization and cleanup routines ============ */
+ static int
+-vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
++vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter)
+ {
+       int err;
+       unsigned long mmio_start, mmio_len;
+@@ -2687,30 +2687,12 @@ vmxnet3_alloc_pci_resources(struct vmxne
+               return err;
+       }
+-      if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
+-              if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
+-                      dev_err(&pdev->dev,
+-                              "pci_set_consistent_dma_mask failed\n");
+-                      err = -EIO;
+-                      goto err_set_mask;
+-              }
+-              *dma64 = true;
+-      } else {
+-              if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
+-                      dev_err(&pdev->dev,
+-                              "pci_set_dma_mask failed\n");
+-                      err = -EIO;
+-                      goto err_set_mask;
+-              }
+-              *dma64 = false;
+-      }
+-
+       err = pci_request_selected_regions(pdev, (1 << 2) - 1,
+                                          vmxnet3_driver_name);
+       if (err) {
+               dev_err(&pdev->dev,
+                       "Failed to request region for adapter: error %d\n", err);
+-              goto err_set_mask;
++              goto err_enable_device;
+       }
+       pci_set_master(pdev);
+@@ -2738,7 +2720,7 @@ err_bar1:
+       iounmap(adapter->hw_addr0);
+ err_ioremap:
+       pci_release_selected_regions(pdev, (1 << 2) - 1);
+-err_set_mask:
++err_enable_device:
+       pci_disable_device(pdev);
+       return err;
+ }
+@@ -3243,7 +3225,7 @@ vmxnet3_probe_device(struct pci_dev *pde
+ #endif
+       };
+       int err;
+-      bool dma64 = false; /* stupid gcc */
++      bool dma64;
+       u32 ver;
+       struct net_device *netdev;
+       struct vmxnet3_adapter *adapter;
+@@ -3289,6 +3271,24 @@ vmxnet3_probe_device(struct pci_dev *pde
+       adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
+       adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
++      if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
++              if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
++                      dev_err(&pdev->dev,
++                              "pci_set_consistent_dma_mask failed\n");
++                      err = -EIO;
++                      goto err_set_mask;
++              }
++              dma64 = true;
++      } else {
++              if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
++                      dev_err(&pdev->dev,
++                              "pci_set_dma_mask failed\n");
++                      err = -EIO;
++                      goto err_set_mask;
++              }
++              dma64 = false;
++      }
++
+       spin_lock_init(&adapter->cmd_lock);
+       adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
+                                            sizeof(struct vmxnet3_adapter),
+@@ -3296,7 +3296,7 @@ vmxnet3_probe_device(struct pci_dev *pde
+       if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
+               dev_err(&pdev->dev, "Failed to map dma\n");
+               err = -EFAULT;
+-              goto err_dma_map;
++              goto err_set_mask;
+       }
+       adapter->shared = dma_alloc_coherent(
+                               &adapter->pdev->dev,
+@@ -3347,7 +3347,7 @@ vmxnet3_probe_device(struct pci_dev *pde
+       }
+ #endif /* VMXNET3_RSS */
+-      err = vmxnet3_alloc_pci_resources(adapter, &dma64);
++      err = vmxnet3_alloc_pci_resources(adapter);
+       if (err < 0)
+               goto err_alloc_pci;
+@@ -3493,7 +3493,7 @@ err_alloc_queue_desc:
+ err_alloc_shared:
+       dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
+                        sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
+-err_dma_map:
++err_set_mask:
+       free_netdev(netdev);
+       return err;
+ }
diff --git a/queue-4.14/vmxnet3-use-dma-memory-barriers-where-required.patch b/queue-4.14/vmxnet3-use-dma-memory-barriers-where-required.patch
new file mode 100644 (file)
index 0000000..d6418f9
--- /dev/null
@@ -0,0 +1,73 @@
+From foo@baz Tue May 22 20:10:42 CEST 2018
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+Date: Mon, 14 May 2018 08:14:49 -0400
+Subject: vmxnet3: use DMA memory barriers where required
+
+From: "hpreg@vmware.com" <hpreg@vmware.com>
+
+[ Upstream commit f3002c1374fb2367c9d8dbb28852791ef90d2bac ]
+
+The gen bits must be read first from (resp. written last to) DMA memory.
+The proper way to enforce this on Linux is to call dma_rmb() (resp.
+dma_wmb()).
+
+Signed-off-by: Regis Duchesne <hpreg@vmware.com>
+Acked-by: Ronak Doshi <doshir@vmware.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c |   22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx
+       gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
+       while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
++              /* Prevent any &gdesc->tcd field from being (speculatively)
++               * read before (&gdesc->tcd)->gen is read.
++               */
++              dma_rmb();
++
+               completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
+                                              &gdesc->tcd), tq, adapter->pdev,
+                                              adapter);
+@@ -1099,6 +1104,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, str
+               gdesc->txd.tci = skb_vlan_tag_get(skb);
+       }
++      /* Ensure that the write to (&gdesc->txd)->gen will be observed after
++       * all other writes to &gdesc->txd.
++       */
++      dma_wmb();
++
+       /* finally flips the GEN bit of the SOP desc. */
+       gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
+                                                 VMXNET3_TXD_GEN);
+@@ -1286,6 +1296,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx
+                        */
+                       break;
+               }
++
++              /* Prevent any rcd field from being (speculatively) read before
++               * rcd->gen is read.
++               */
++              dma_rmb();
++
+               BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 &&
+                      rcd->rqID != rq->dataRingQid);
+               idx = rcd->rxdIdx;
+@@ -1515,6 +1531,12 @@ rcd_done:
+               ring->next2comp = idx;
+               num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
+               ring = rq->rx_ring + ring_idx;
++
++              /* Ensure that the writes to rxd->gen bits will be observed
++               * after all other writes to rxd objects.
++               */
++              dma_wmb();
++
+               while (num_to_alloc) {
+                       vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
+                                         &rxCmdDesc);