From: Greg Kroah-Hartman Date: Tue, 22 May 2018 18:12:36 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v3.18.110~30 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=684f36d1927b5c79cb4a285aef67e5ca2823374d;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch hv_netvsc-avoid-retry-on-send-during-shutdown.patch hv_netvsc-cancel-subchannel-setup-before-halting-device.patch hv_netvsc-change-gpad-teardown-order-on-older-versions.patch hv_netvsc-common-detach-logic.patch hv_netvsc-defer-queue-selection-to-vf.patch hv_netvsc-disable-napi-before-channel-close.patch hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch hv_netvsc-ensure-correct-teardown-message-sequence-order.patch hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch hv_netvsc-netvsc_teardown_gpadl-split.patch hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch hv_netvsc-rename-ind_table-to-rx_table.patch hv_netvsc-rename-tx_send_table-to-tx_table.patch hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch net-fix-a-bug-in-removing-queues-from-xps-map.patch net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch net-sched-red-avoid-hashing-null-child.patch net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch net-test-tailroom-before-appending-to-linear-skb.patch packet-in-packet_snd-start-writing-at-link-layer-allocation.patch sock_diag-fix-use-after-free-read-in-__sk_free.patch sparc-vio-use-put_device-instead-of-kfree.patch tcp-purge-write-queue-in-tcp_connect_init.patch vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch vmxnet3-use-dma-memory-barriers-where-required.patch --- diff --git a/queue-4.14/hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch b/queue-4.14/hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch new file mode 100644 index 00000000000..4f4c8327465 --- /dev/null +++ b/queue-4.14/hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch @@ -0,0 +1,38 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Haiyang Zhang +Date: Mon, 14 May 2018 15:32:03 -0700 +Subject: hv_netvsc: Add initialization of tx_table in netvsc_device_add() + +From: Haiyang Zhang + +[ Commit 6b0cbe315868d613123cf387052ccda5f09d49ea upstream. ] + +tx_table is part of the private data of kernel net_device. It is only +zero-ed out when allocating net_device. + +We may recreate netvsc_device w/o recreating net_device, so the private +netdev data, including tx_table, are not zeroed. It may contain channel +numbers for the older netvsc_device. + +This patch adds initialization of tx_table each time we recreate +netvsc_device. + +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -1252,6 +1252,9 @@ struct netvsc_device *netvsc_device_add( + if (!net_device) + return ERR_PTR(-ENOMEM); + ++ for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) ++ net_device_ctx->tx_table[i] = 0; ++ + net_device->ring_size = ring_size; + + /* Because the device uses NAPI, all the interrupt batching and diff --git a/queue-4.14/hv_netvsc-avoid-retry-on-send-during-shutdown.patch b/queue-4.14/hv_netvsc-avoid-retry-on-send-during-shutdown.patch new file mode 100644 index 00000000000..87359d79a25 --- /dev/null +++ b/queue-4.14/hv_netvsc-avoid-retry-on-send-during-shutdown.patch @@ -0,0 +1,84 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:09 -0700 +Subject: hv_netvsc: avoid retry on send during shutdown + +From: Stephen Hemminger + +[ Commit 12f69661a49446840d742d8feb593ace022d9f66 upstream. ] + +Change the initialization order so that the device is ready to transmit +(ie connect vsp is completed) before setting the internal reference +to the device with RCU. + +This avoids any races on initialization and prevents retry issues +on shutdown. + +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 24 +++++++----------------- + 1 file changed, 7 insertions(+), 17 deletions(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -850,13 +850,6 @@ int netvsc_send(struct net_device *ndev, + if (unlikely(!net_device || net_device->destroy)) + return -ENODEV; + +- /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get +- * here before the negotiation with the host is finished and +- * send_section_map may not be allocated yet. +- */ +- if (unlikely(!net_device->send_section_map)) +- return -EAGAIN; +- + nvchan = &net_device->chan_table[packet->q_idx]; + packet->send_buf_index = NETVSC_INVALID_INDEX; + packet->cp_partial = false; +@@ -864,10 +857,8 @@ int netvsc_send(struct net_device *ndev, + /* Send control message directly without accessing msd (Multi-Send + * Data) field which may be changed during data packet processing. + */ +- if (!skb) { +- cur_send = packet; +- goto send_now; +- } ++ if (!skb) ++ return netvsc_send_pkt(device, packet, net_device, pb, skb); + + /* batch packets in send buffer if possible */ + msdp = &nvchan->msd; +@@ -951,7 +942,6 @@ int netvsc_send(struct net_device *ndev, + } + } + +-send_now: + if (cur_send) + ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb); + +@@ -1308,11 +1298,6 @@ struct netvsc_device *netvsc_device_add( + + napi_enable(&net_device->chan_table[0].napi); + +- /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is +- * populated. +- */ +- rcu_assign_pointer(net_device_ctx->nvdev, net_device); +- + /* Connect with the NetVsp */ + ret = netvsc_connect_vsp(device, net_device, device_info); + if (ret != 0) { +@@ -1321,6 +1306,11 @@ struct netvsc_device *netvsc_device_add( + goto close; + } + ++ /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is ++ * populated. ++ */ ++ rcu_assign_pointer(net_device_ctx->nvdev, net_device); ++ + return net_device; + + close: diff --git a/queue-4.14/hv_netvsc-cancel-subchannel-setup-before-halting-device.patch b/queue-4.14/hv_netvsc-cancel-subchannel-setup-before-halting-device.patch new file mode 100644 index 00000000000..b08e59acfb4 --- /dev/null +++ b/queue-4.14/hv_netvsc-cancel-subchannel-setup-before-halting-device.patch @@ -0,0 +1,33 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:12 -0700 +Subject: hv_netvsc: cancel subchannel setup before halting device + +From: Stephen Hemminger + +[ Commit a7483ec0267c69b34e818738da60b392623da94b upstream. ] + +Block setup of multiple channels earlier in the teardown +process. This avoids possible races between halt and subchannel +initialization. + +Suggested-by: Haiyang Zhang +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/rndis_filter.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -1340,6 +1340,9 @@ void rndis_filter_device_remove(struct h + { + struct rndis_device *rndis_dev = net_dev->extension; + ++ /* Don't try and setup sub channels if about to halt */ ++ cancel_work_sync(&net_dev->subchan_work); ++ + /* Halt and release the rndis device */ + rndis_filter_halt_device(rndis_dev); + diff --git a/queue-4.14/hv_netvsc-change-gpad-teardown-order-on-older-versions.patch b/queue-4.14/hv_netvsc-change-gpad-teardown-order-on-older-versions.patch new file mode 100644 index 00000000000..85950a8c928 --- /dev/null +++ b/queue-4.14/hv_netvsc-change-gpad-teardown-order-on-older-versions.patch @@ -0,0 +1,43 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:17 -0700 +Subject: hv_netvsc: change GPAD teardown order on older versions + +From: Stephen Hemminger + +[ Commit 0ef58b0a05c127762f975c3dfe8b922e4aa87a29 upstream. ] + +On older versions of Windows, the host ignores messages after +vmbus channel is closed. + +Workaround this by doing what Windows does and send the teardown +before close on older versions of NVSP protocol. + +Reported-by: Mohammed Gamal +Fixes: 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split") +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -570,10 +570,15 @@ void netvsc_device_remove(struct hv_devi + */ + netdev_dbg(ndev, "net device safe to remove\n"); + ++ /* older versions require that buffer be revoked before close */ ++ if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4) ++ netvsc_teardown_gpadl(device, net_device); ++ + /* Now, we can close the channel safely */ + vmbus_close(device->channel); + +- netvsc_teardown_gpadl(device, net_device); ++ if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4) ++ netvsc_teardown_gpadl(device, net_device); + + /* Release all resources */ + free_netvsc_device_rcu(net_device); diff --git a/queue-4.14/hv_netvsc-common-detach-logic.patch b/queue-4.14/hv_netvsc-common-detach-logic.patch new file mode 100644 index 00000000000..7fb72088b6f --- /dev/null +++ b/queue-4.14/hv_netvsc-common-detach-logic.patch @@ -0,0 +1,559 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:18 -0700 +Subject: hv_netvsc: common detach logic + +From: Stephen Hemminger + +[ Commit 7b2ee50c0cd513a176a26a71f2989facdd75bfea upstream. ] + +Make common function for detaching internals of device +during changes to MTU and RSS. Make sure no more packets +are transmitted and all packets have been received before +doing device teardown. + +Change the wait logic to be common and use usleep_range(). + +Changes transmit enabling logic so that transmit queues are disabled +during the period when lower device is being changed. And enabled +only after sub channels are setup. This avoids issue where it could +be that a packet was being sent while subchannel was not initialized. + +Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/hyperv_net.h | 1 + drivers/net/hyperv/netvsc.c | 21 +- + drivers/net/hyperv/netvsc_drv.c | 280 +++++++++++++++++++++----------------- + drivers/net/hyperv/rndis_filter.c | 15 -- + 4 files changed, 175 insertions(+), 142 deletions(-) + +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -208,7 +208,6 @@ void netvsc_channel_cb(void *context); + int netvsc_poll(struct napi_struct *napi, int budget); + + void rndis_set_subchannel(struct work_struct *w); +-bool rndis_filter_opened(const struct netvsc_device *nvdev); + int rndis_filter_open(struct netvsc_device *nvdev); + int rndis_filter_close(struct netvsc_device *nvdev); + struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -554,8 +554,6 @@ void netvsc_device_remove(struct hv_devi + = rtnl_dereference(net_device_ctx->nvdev); + int i; + +- cancel_work_sync(&net_device->subchan_work); +- + netvsc_revoke_buf(device, net_device); + + RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); +@@ -644,13 +642,18 @@ static void netvsc_send_tx_complete(stru + queue_sends = + atomic_dec_return(&net_device->chan_table[q_idx].queue_sends); + +- if (net_device->destroy && queue_sends == 0) +- wake_up(&net_device->wait_drain); +- +- if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && +- (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || +- queue_sends < 1)) +- netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); ++ if (unlikely(net_device->destroy)) { ++ if (queue_sends == 0) ++ wake_up(&net_device->wait_drain); ++ } else { ++ struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx); ++ ++ if (netif_tx_queue_stopped(txq) && ++ (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || ++ queue_sends < 1)) { ++ netif_tx_wake_queue(txq); ++ } ++ } + } + + static void netvsc_send_completion(struct netvsc_device *net_device, +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -45,7 +45,10 @@ + + #include "hyperv_net.h" + +-#define RING_SIZE_MIN 64 ++#define RING_SIZE_MIN 64 ++#define RETRY_US_LO 5000 ++#define RETRY_US_HI 10000 ++#define RETRY_MAX 2000 /* >10 sec */ + + #define LINKCHANGE_INT (2 * HZ) + #define VF_TAKEOVER_INT (HZ / 10) +@@ -89,10 +92,8 @@ static int netvsc_open(struct net_device + } + + rdev = nvdev->extension; +- if (!rdev->link_state) { ++ if (!rdev->link_state) + netif_carrier_on(net); +- netif_tx_wake_all_queues(net); +- } + + if (vf_netdev) { + /* Setting synthetic device up transparently sets +@@ -108,36 +109,25 @@ static int netvsc_open(struct net_device + return 0; + } + +-static int netvsc_close(struct net_device *net) ++static int netvsc_wait_until_empty(struct netvsc_device *nvdev) + { +- struct net_device_context *net_device_ctx = netdev_priv(net); +- struct net_device *vf_netdev +- = rtnl_dereference(net_device_ctx->vf_netdev); +- struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); +- int ret = 0; +- u32 aread, i, msec = 10, retry = 0, retry_max = 20; +- struct vmbus_channel *chn; +- +- netif_tx_disable(net); +- +- /* No need to close rndis filter if it is removed already */ +- if (!nvdev) +- goto out; +- +- ret = rndis_filter_close(nvdev); +- if (ret != 0) { +- netdev_err(net, "unable to close device (ret %d).\n", ret); +- return ret; +- } ++ unsigned int retry = 0; ++ int i; + + /* Ensure pending bytes in ring are read */ +- while (true) { +- aread = 0; ++ for (;;) { ++ u32 aread = 0; ++ + for (i = 0; i < nvdev->num_chn; i++) { +- chn = nvdev->chan_table[i].channel; ++ struct vmbus_channel *chn ++ = nvdev->chan_table[i].channel; ++ + if (!chn) + continue; + ++ /* make sure receive not running now */ ++ napi_synchronize(&nvdev->chan_table[i].napi); ++ + aread = hv_get_bytes_to_read(&chn->inbound); + if (aread) + break; +@@ -147,22 +137,40 @@ static int netvsc_close(struct net_devic + break; + } + +- retry++; +- if (retry > retry_max || aread == 0) +- break; ++ if (aread == 0) ++ return 0; + +- msleep(msec); ++ if (++retry > RETRY_MAX) ++ return -ETIMEDOUT; + +- if (msec < 1000) +- msec *= 2; ++ usleep_range(RETRY_US_LO, RETRY_US_HI); + } ++} + +- if (aread) { +- netdev_err(net, "Ring buffer not empty after closing rndis\n"); +- ret = -ETIMEDOUT; ++static int netvsc_close(struct net_device *net) ++{ ++ struct net_device_context *net_device_ctx = netdev_priv(net); ++ struct net_device *vf_netdev ++ = rtnl_dereference(net_device_ctx->vf_netdev); ++ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); ++ int ret; ++ ++ netif_tx_disable(net); ++ ++ /* No need to close rndis filter if it is removed already */ ++ if (!nvdev) ++ return 0; ++ ++ ret = rndis_filter_close(nvdev); ++ if (ret != 0) { ++ netdev_err(net, "unable to close device (ret %d).\n", ret); ++ return ret; + } + +-out: ++ ret = netvsc_wait_until_empty(nvdev); ++ if (ret) ++ netdev_err(net, "Ring buffer not empty after closing rndis\n"); ++ + if (vf_netdev) + dev_close(vf_netdev); + +@@ -820,16 +828,81 @@ static void netvsc_get_channels(struct n + } + } + ++static int netvsc_detach(struct net_device *ndev, ++ struct netvsc_device *nvdev) ++{ ++ struct net_device_context *ndev_ctx = netdev_priv(ndev); ++ struct hv_device *hdev = ndev_ctx->device_ctx; ++ int ret; ++ ++ /* Don't try continuing to try and setup sub channels */ ++ if (cancel_work_sync(&nvdev->subchan_work)) ++ nvdev->num_chn = 1; ++ ++ /* If device was up (receiving) then shutdown */ ++ if (netif_running(ndev)) { ++ netif_tx_disable(ndev); ++ ++ ret = rndis_filter_close(nvdev); ++ if (ret) { ++ netdev_err(ndev, ++ "unable to close device (ret %d).\n", ret); ++ return ret; ++ } ++ ++ ret = netvsc_wait_until_empty(nvdev); ++ if (ret) { ++ netdev_err(ndev, ++ "Ring buffer not empty after closing rndis\n"); ++ return ret; ++ } ++ } ++ ++ netif_device_detach(ndev); ++ ++ rndis_filter_device_remove(hdev, nvdev); ++ ++ return 0; ++} ++ ++static int netvsc_attach(struct net_device *ndev, ++ struct netvsc_device_info *dev_info) ++{ ++ struct net_device_context *ndev_ctx = netdev_priv(ndev); ++ struct hv_device *hdev = ndev_ctx->device_ctx; ++ struct netvsc_device *nvdev; ++ struct rndis_device *rdev; ++ int ret; ++ ++ nvdev = rndis_filter_device_add(hdev, dev_info); ++ if (IS_ERR(nvdev)) ++ return PTR_ERR(nvdev); ++ ++ /* Note: enable and attach happen when sub-channels setup */ ++ ++ netif_carrier_off(ndev); ++ ++ if (netif_running(ndev)) { ++ ret = rndis_filter_open(nvdev); ++ if (ret) ++ return ret; ++ ++ rdev = nvdev->extension; ++ if (!rdev->link_state) ++ netif_carrier_on(ndev); ++ } ++ ++ return 0; ++} ++ + static int netvsc_set_channels(struct net_device *net, + struct ethtool_channels *channels) + { + struct net_device_context *net_device_ctx = netdev_priv(net); +- struct hv_device *dev = net_device_ctx->device_ctx; + struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); + unsigned int orig, count = channels->combined_count; + struct netvsc_device_info device_info; +- bool was_opened; +- int ret = 0; ++ int ret; + + /* We do not support separate count for rx, tx, or other */ + if (count == 0 || +@@ -846,9 +919,6 @@ static int netvsc_set_channels(struct ne + return -EINVAL; + + orig = nvdev->num_chn; +- was_opened = rndis_filter_opened(nvdev); +- if (was_opened) +- rndis_filter_close(nvdev); + + memset(&device_info, 0, sizeof(device_info)); + device_info.num_chn = count; +@@ -858,28 +928,17 @@ static int netvsc_set_channels(struct ne + device_info.recv_sections = nvdev->recv_section_cnt; + device_info.recv_section_size = nvdev->recv_section_size; + +- rndis_filter_device_remove(dev, nvdev); ++ ret = netvsc_detach(net, nvdev); ++ if (ret) ++ return ret; + +- nvdev = rndis_filter_device_add(dev, &device_info); +- if (IS_ERR(nvdev)) { +- ret = PTR_ERR(nvdev); ++ ret = netvsc_attach(net, &device_info); ++ if (ret) { + device_info.num_chn = orig; +- nvdev = rndis_filter_device_add(dev, &device_info); +- +- if (IS_ERR(nvdev)) { +- netdev_err(net, "restoring channel setting failed: %ld\n", +- PTR_ERR(nvdev)); +- return ret; +- } ++ if (netvsc_attach(net, &device_info)) ++ netdev_err(net, "restoring channel setting failed\n"); + } + +- if (was_opened) +- rndis_filter_open(nvdev); +- +- /* We may have missed link change notifications */ +- net_device_ctx->last_reconfig = 0; +- schedule_delayed_work(&net_device_ctx->dwork, 0); +- + return ret; + } + +@@ -946,10 +1005,8 @@ static int netvsc_change_mtu(struct net_ + struct net_device_context *ndevctx = netdev_priv(ndev); + struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev); + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); +- struct hv_device *hdev = ndevctx->device_ctx; + int orig_mtu = ndev->mtu; + struct netvsc_device_info device_info; +- bool was_opened; + int ret = 0; + + if (!nvdev || nvdev->destroy) +@@ -962,11 +1019,6 @@ static int netvsc_change_mtu(struct net_ + return ret; + } + +- netif_device_detach(ndev); +- was_opened = rndis_filter_opened(nvdev); +- if (was_opened) +- rndis_filter_close(nvdev); +- + memset(&device_info, 0, sizeof(device_info)); + device_info.ring_size = ring_size; + device_info.num_chn = nvdev->num_chn; +@@ -975,35 +1027,27 @@ static int netvsc_change_mtu(struct net_ + device_info.recv_sections = nvdev->recv_section_cnt; + device_info.recv_section_size = nvdev->recv_section_size; + +- rndis_filter_device_remove(hdev, nvdev); ++ ret = netvsc_detach(ndev, nvdev); ++ if (ret) ++ goto rollback_vf; + + ndev->mtu = mtu; + +- nvdev = rndis_filter_device_add(hdev, &device_info); +- if (IS_ERR(nvdev)) { +- ret = PTR_ERR(nvdev); +- +- /* Attempt rollback to original MTU */ +- ndev->mtu = orig_mtu; +- nvdev = rndis_filter_device_add(hdev, &device_info); +- +- if (vf_netdev) +- dev_set_mtu(vf_netdev, orig_mtu); +- +- if (IS_ERR(nvdev)) { +- netdev_err(ndev, "restoring mtu failed: %ld\n", +- PTR_ERR(nvdev)); +- return ret; +- } +- } +- +- if (was_opened) +- rndis_filter_open(nvdev); ++ ret = netvsc_attach(ndev, &device_info); ++ if (ret) ++ goto rollback; + +- netif_device_attach(ndev); ++ return 0; + +- /* We may have missed link change notifications */ +- schedule_delayed_work(&ndevctx->dwork, 0); ++rollback: ++ /* Attempt rollback to original MTU */ ++ ndev->mtu = orig_mtu; ++ ++ if (netvsc_attach(ndev, &device_info)) ++ netdev_err(ndev, "restoring mtu failed\n"); ++rollback_vf: ++ if (vf_netdev) ++ dev_set_mtu(vf_netdev, orig_mtu); + + return ret; + } +@@ -1469,11 +1513,9 @@ static int netvsc_set_ringparam(struct n + { + struct net_device_context *ndevctx = netdev_priv(ndev); + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); +- struct hv_device *hdev = ndevctx->device_ctx; + struct netvsc_device_info device_info; + struct ethtool_ringparam orig; + u32 new_tx, new_rx; +- bool was_opened; + int ret = 0; + + if (!nvdev || nvdev->destroy) +@@ -1499,34 +1541,18 @@ static int netvsc_set_ringparam(struct n + device_info.recv_sections = new_rx; + device_info.recv_section_size = nvdev->recv_section_size; + +- netif_device_detach(ndev); +- was_opened = rndis_filter_opened(nvdev); +- if (was_opened) +- rndis_filter_close(nvdev); +- +- rndis_filter_device_remove(hdev, nvdev); +- +- nvdev = rndis_filter_device_add(hdev, &device_info); +- if (IS_ERR(nvdev)) { +- ret = PTR_ERR(nvdev); ++ ret = netvsc_detach(ndev, nvdev); ++ if (ret) ++ return ret; + ++ ret = netvsc_attach(ndev, &device_info); ++ if (ret) { + device_info.send_sections = orig.tx_pending; + device_info.recv_sections = orig.rx_pending; +- nvdev = rndis_filter_device_add(hdev, &device_info); +- if (IS_ERR(nvdev)) { +- netdev_err(ndev, "restoring ringparam failed: %ld\n", +- PTR_ERR(nvdev)); +- return ret; +- } +- } + +- if (was_opened) +- rndis_filter_open(nvdev); +- netif_device_attach(ndev); +- +- /* We may have missed link change notifications */ +- ndevctx->last_reconfig = 0; +- schedule_delayed_work(&ndevctx->dwork, 0); ++ if (netvsc_attach(ndev, &device_info)) ++ netdev_err(ndev, "restoring ringparam failed"); ++ } + + return ret; + } +@@ -2003,8 +2029,8 @@ no_net: + static int netvsc_remove(struct hv_device *dev) + { + struct net_device_context *ndev_ctx; +- struct net_device *vf_netdev; +- struct net_device *net; ++ struct net_device *vf_netdev, *net; ++ struct netvsc_device *nvdev; + + net = hv_get_drvdata(dev); + if (net == NULL) { +@@ -2014,10 +2040,14 @@ static int netvsc_remove(struct hv_devic + + ndev_ctx = netdev_priv(net); + +- netif_device_detach(net); +- + cancel_delayed_work_sync(&ndev_ctx->dwork); + ++ rcu_read_lock(); ++ nvdev = rcu_dereference(ndev_ctx->nvdev); ++ ++ if (nvdev) ++ cancel_work_sync(&nvdev->subchan_work); ++ + /* + * Call to the vsc driver to let it know that the device is being + * removed. Also blocks mtu and channel changes. +@@ -2027,11 +2057,13 @@ static int netvsc_remove(struct hv_devic + if (vf_netdev) + netvsc_unregister_vf(vf_netdev); + ++ if (nvdev) ++ rndis_filter_device_remove(dev, nvdev); ++ + unregister_netdevice(net); + +- rndis_filter_device_remove(dev, +- rtnl_dereference(ndev_ctx->nvdev)); + rtnl_unlock(); ++ rcu_read_unlock(); + + hv_set_drvdata(dev, NULL); + +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -1112,6 +1112,7 @@ void rndis_set_subchannel(struct work_st + for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) + ndev_ctx->tx_table[i] = i % nvdev->num_chn; + ++ netif_device_attach(ndev); + rtnl_unlock(); + return; + +@@ -1122,6 +1123,8 @@ failed: + + nvdev->max_chn = 1; + nvdev->num_chn = 1; ++ ++ netif_device_attach(ndev); + unlock: + rtnl_unlock(); + } +@@ -1324,6 +1327,10 @@ out: + net_device->num_chn = 1; + } + ++ /* No sub channels, device is ready */ ++ if (net_device->num_chn == 1) ++ netif_device_attach(net); ++ + return net_device; + + err_dev_remv: +@@ -1336,9 +1343,6 @@ void rndis_filter_device_remove(struct h + { + struct rndis_device *rndis_dev = net_dev->extension; + +- /* Don't try and setup sub channels if about to halt */ +- cancel_work_sync(&net_dev->subchan_work); +- + /* Halt and release the rndis device */ + rndis_filter_halt_device(rndis_dev); + +@@ -1368,8 +1372,3 @@ int rndis_filter_close(struct netvsc_dev + + return rndis_filter_close_device(nvdev->extension); + } +- +-bool rndis_filter_opened(const struct netvsc_device *nvdev) +-{ +- return atomic_read(&nvdev->open_cnt) > 0; +-} diff --git a/queue-4.14/hv_netvsc-defer-queue-selection-to-vf.patch b/queue-4.14/hv_netvsc-defer-queue-selection-to-vf.patch new file mode 100644 index 00000000000..f46695e0fdb --- /dev/null +++ b/queue-4.14/hv_netvsc-defer-queue-selection-to-vf.patch @@ -0,0 +1,45 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:14 -0700 +Subject: hv_netvsc: defer queue selection to VF + +From: Stephen Hemminger + +[ Commit b3bf5666a51068ad5ddd89a76ed877101ef3bc16 upstream. ] + +When VF is used for accelerated networking it will likely have +more queues (and different policy) than the synthetic NIC. +This patch defers the queue policy to the VF so that all the +queues can be used. This impacts workloads like local generate UDP. + +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -283,8 +283,19 @@ static u16 netvsc_select_queue(struct ne + rcu_read_lock(); + vf_netdev = rcu_dereference(ndc->vf_netdev); + if (vf_netdev) { +- txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; +- qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; ++ const struct net_device_ops *vf_ops = vf_netdev->netdev_ops; ++ ++ if (vf_ops->ndo_select_queue) ++ txq = vf_ops->ndo_select_queue(vf_netdev, skb, ++ accel_priv, fallback); ++ else ++ txq = fallback(vf_netdev, skb); ++ ++ /* Record the queue selected by VF so that it can be ++ * used for common case where VF has more queues than ++ * the synthetic device. ++ */ ++ qdisc_skb_cb(skb)->slave_dev_queue_mapping = txq; + } else { + txq = netvsc_pick_tx(ndev, skb); + } diff --git a/queue-4.14/hv_netvsc-disable-napi-before-channel-close.patch b/queue-4.14/hv_netvsc-disable-napi-before-channel-close.patch new file mode 100644 index 00000000000..621a044ad3c --- /dev/null +++ b/queue-4.14/hv_netvsc-disable-napi-before-channel-close.patch @@ -0,0 +1,44 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:15 -0700 +Subject: hv_netvsc: disable NAPI before channel close + +From: Stephen Hemminger + +[ Commit 8348e0460ab1473f06c8b824699dd2eed3c1979d upstream. ] + +This makes sure that no CPU is still process packets when +the channel is closed. + +Fixes: 76bb5db5c749 ("netvsc: fix use after free on module removal") +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -567,6 +567,10 @@ void netvsc_device_remove(struct hv_devi + + RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); + ++ /* And disassociate NAPI context from device */ ++ for (i = 0; i < net_device->num_chn; i++) ++ netif_napi_del(&net_device->chan_table[i].napi); ++ + /* + * At this point, no one should be accessing net_device + * except in here +@@ -578,10 +582,6 @@ void netvsc_device_remove(struct hv_devi + + netvsc_teardown_gpadl(device, net_device); + +- /* And dissassociate NAPI context from device */ +- for (i = 0; i < net_device->num_chn; i++) +- netif_napi_del(&net_device->chan_table[i].napi); +- + /* Release all resources */ + free_netvsc_device_rcu(net_device); + } diff --git a/queue-4.14/hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch b/queue-4.14/hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch new file mode 100644 index 00000000000..6357ed176d6 --- /dev/null +++ b/queue-4.14/hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch @@ -0,0 +1,160 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:07 -0700 +Subject: hv_netvsc: empty current transmit aggregation if flow blocked + +From: Stephen Hemminger + +[ Commit cfd8afd986cdb59ea9adac873c5082498a1eb7c0 upstream. ] + +If the transmit queue is known full, then don't keep aggregating +data. And the cp_partial flag which indicates that the current +aggregation buffer is full can be folded in to avoid more +conditionals. + +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/hyperv_net.h | 2 +- + drivers/net/hyperv/netvsc.c | 36 +++++++++++++++++++++--------------- + drivers/net/hyperv/netvsc_drv.c | 2 +- + drivers/net/hyperv/rndis_filter.c | 3 +-- + 4 files changed, 24 insertions(+), 19 deletions(-) + +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -192,7 +192,7 @@ struct netvsc_device *netvsc_device_add( + const struct netvsc_device_info *info); + int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx); + void netvsc_device_remove(struct hv_device *device); +-int netvsc_send(struct net_device_context *ndc, ++int netvsc_send(struct net_device *net, + struct hv_netvsc_packet *packet, + struct rndis_message *rndis_msg, + struct hv_page_buffer *page_buffer, +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -700,13 +700,13 @@ static u32 netvsc_get_next_send_section( + return NETVSC_INVALID_INDEX; + } + +-static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, +- unsigned int section_index, +- u32 pend_size, +- struct hv_netvsc_packet *packet, +- struct rndis_message *rndis_msg, +- struct hv_page_buffer *pb, +- struct sk_buff *skb) ++static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, ++ unsigned int section_index, ++ u32 pend_size, ++ struct hv_netvsc_packet *packet, ++ struct rndis_message *rndis_msg, ++ struct hv_page_buffer *pb, ++ bool xmit_more) + { + char *start = net_device->send_buf; + char *dest = start + (section_index * net_device->send_section_size) +@@ -719,7 +719,8 @@ static u32 netvsc_copy_to_send_buf(struc + packet->page_buf_cnt; + + /* Add padding */ +- if (skb->xmit_more && remain && !packet->cp_partial) { ++ remain = packet->total_data_buflen & (net_device->pkt_align - 1); ++ if (xmit_more && remain) { + padding = net_device->pkt_align - remain; + rndis_msg->msg_len += padding; + packet->total_data_buflen += padding; +@@ -739,8 +740,6 @@ static u32 netvsc_copy_to_send_buf(struc + memset(dest, 0, padding); + msg_size += padding; + } +- +- return msg_size; + } + + static inline int netvsc_send_pkt( +@@ -828,12 +827,13 @@ static inline void move_pkt_msd(struct h + } + + /* RCU already held by caller */ +-int netvsc_send(struct net_device_context *ndev_ctx, ++int netvsc_send(struct net_device *ndev, + struct hv_netvsc_packet *packet, + struct rndis_message *rndis_msg, + struct hv_page_buffer *pb, + struct sk_buff *skb) + { ++ struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct netvsc_device *net_device + = rcu_dereference_bh(ndev_ctx->nvdev); + struct hv_device *device = ndev_ctx->device_ctx; +@@ -844,8 +844,7 @@ int netvsc_send(struct net_device_contex + struct multi_send_data *msdp; + struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL; + struct sk_buff *msd_skb = NULL; +- bool try_batch; +- bool xmit_more = (skb != NULL) ? skb->xmit_more : false; ++ bool try_batch, xmit_more; + + /* If device is rescinded, return error and packet will get dropped. */ + if (unlikely(!net_device || net_device->destroy)) +@@ -896,10 +895,17 @@ int netvsc_send(struct net_device_contex + } + } + ++ /* Keep aggregating only if stack says more data is coming ++ * and not doing mixed modes send and not flow blocked ++ */ ++ xmit_more = skb->xmit_more && ++ !packet->cp_partial && ++ !netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx)); ++ + if (section_index != NETVSC_INVALID_INDEX) { + netvsc_copy_to_send_buf(net_device, + section_index, msd_len, +- packet, rndis_msg, pb, skb); ++ packet, rndis_msg, pb, xmit_more); + + packet->send_buf_index = section_index; + +@@ -919,7 +925,7 @@ int netvsc_send(struct net_device_contex + if (msdp->skb) + dev_consume_skb_any(msdp->skb); + +- if (xmit_more && !packet->cp_partial) { ++ if (xmit_more) { + msdp->skb = skb; + msdp->pkt = packet; + msdp->count++; +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -614,7 +614,7 @@ static int netvsc_start_xmit(struct sk_b + /* timestamp packet in software */ + skb_tx_timestamp(skb); + +- ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb); ++ ret = netvsc_send(net, packet, rndis_msg, pb, skb); + if (likely(ret == 0)) + return NETDEV_TX_OK; + +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -217,7 +217,6 @@ static int rndis_filter_send_request(str + struct hv_netvsc_packet *packet; + struct hv_page_buffer page_buf[2]; + struct hv_page_buffer *pb = page_buf; +- struct net_device_context *net_device_ctx = netdev_priv(dev->ndev); + int ret; + + /* Setup the packet to send it */ +@@ -245,7 +244,7 @@ static int rndis_filter_send_request(str + } + + rcu_read_lock_bh(); +- ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL); ++ ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL); + rcu_read_unlock_bh(); + + return ret; diff --git a/queue-4.14/hv_netvsc-ensure-correct-teardown-message-sequence-order.patch b/queue-4.14/hv_netvsc-ensure-correct-teardown-message-sequence-order.patch new file mode 100644 index 00000000000..180d7e8db04 --- /dev/null +++ b/queue-4.14/hv_netvsc-ensure-correct-teardown-message-sequence-order.patch @@ -0,0 +1,122 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Mohammed Gamal +Date: Mon, 14 May 2018 15:32:21 -0700 +Subject: hv_netvsc: Ensure correct teardown message sequence order + +From: Mohammed Gamal + +[ Commit a56d99d714665591fed8527b90eef21530ea61e0 upstream. ] + +Prior to commit 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split") +the call sequence in netvsc_device_remove() was as follows (as +implemented in netvsc_destroy_buf()): +1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message +2- Teardown receive buffer GPADL +3- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message +4- Teardown send buffer GPADL +5- Close vmbus + +This didn't work for WS2016 hosts. Commit 0cf737808ae7 +("hv_netvsc: netvsc_teardown_gpadl() split") rearranged the +teardown sequence as follows: +1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message +2- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message +3- Close vmbus +4- Teardown receive buffer GPADL +5- Teardown send buffer GPADL + +That worked well for WS2016 hosts, but it prevented guests on older hosts from +shutting down after changing network settings. Commit 0ef58b0a05c1 +("hv_netvsc: change GPAD teardown order on older versions") ensured the +following message sequence for older hosts +1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message +2- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message +3- Teardown receive buffer GPADL +4- Teardown send buffer GPADL +5- Close vmbus + +However, with this sequence calling `ip link set eth0 mtu 1000` hangs and the +process becomes uninterruptible. On futher analysis it turns out that on tearing +down the receive buffer GPADL the kernel is waiting indefinitely +in vmbus_teardown_gpadl() for a completion to be signaled. + +Here is a snippet of where this occurs: +int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) +{ + struct vmbus_channel_gpadl_teardown *msg; + struct vmbus_channel_msginfo *info; + unsigned long flags; + int ret; + + info = kmalloc(sizeof(*info) + + sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL); + if (!info) + return -ENOMEM; + + init_completion(&info->waitevent); + info->waiting_channel = channel; +[....] + ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown), + true); + + if (ret) + goto post_msg_err; + + wait_for_completion(&info->waitevent); +[....] +} + +The completion is signaled from vmbus_ongpadl_torndown(), which gets called when +the corresponding message is received from the host, which apparently never happens +in that case. +This patch works around the issue by restoring the first mentioned message sequence +for older hosts + +Fixes: 0ef58b0a05c1 ("hv_netvsc: change GPAD teardown order on older versions") +Signed-off-by: Mohammed Gamal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -571,8 +571,17 @@ void netvsc_device_remove(struct hv_devi + = rtnl_dereference(net_device_ctx->nvdev); + int i; + ++ /* ++ * Revoke receive buffer. If host is pre-Win2016 then tear down ++ * receive buffer GPADL. Do the same for send buffer. ++ */ + netvsc_revoke_recv_buf(device, net_device); ++ if (vmbus_proto_version < VERSION_WIN10) ++ netvsc_teardown_recv_gpadl(device, net_device); ++ + netvsc_revoke_send_buf(device, net_device); ++ if (vmbus_proto_version < VERSION_WIN10) ++ netvsc_teardown_send_gpadl(device, net_device); + + RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); + +@@ -586,15 +595,13 @@ void netvsc_device_remove(struct hv_devi + */ + netdev_dbg(ndev, "net device safe to remove\n"); + +- /* older versions require that buffer be revoked before close */ +- if (vmbus_proto_version < VERSION_WIN10) { +- netvsc_teardown_recv_gpadl(device, net_device); +- netvsc_teardown_send_gpadl(device, net_device); +- } +- + /* Now, we can close the channel safely */ + vmbus_close(device->channel); + ++ /* ++ * If host is Win2016 or higher then we do the GPADL tear down ++ * here after VMBus is closed. ++ */ + if (vmbus_proto_version >= VERSION_WIN10) { + netvsc_teardown_recv_gpadl(device, net_device); + netvsc_teardown_send_gpadl(device, net_device); diff --git a/queue-4.14/hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch b/queue-4.14/hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch new file mode 100644 index 00000000000..a06b54cd828 --- /dev/null +++ b/queue-4.14/hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch @@ -0,0 +1,36 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:11 -0700 +Subject: hv_netvsc: fix error unwind handling if vmbus_open fails + +From: Stephen Hemminger + +[ Commit fcfb4a00d1e514e8313277a01ef919de1113025b upstream. ] + +Need to delete NAPI association if vmbus_open fails. + +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -1288,7 +1288,6 @@ struct netvsc_device *netvsc_device_add( + net_device->chan_table); + + if (ret != 0) { +- netif_napi_del(&net_device->chan_table[0].napi); + netdev_err(ndev, "unable to open channel: %d\n", ret); + goto cleanup; + } +@@ -1321,6 +1320,7 @@ close: + vmbus_close(device->channel); + + cleanup: ++ netif_napi_del(&net_device->chan_table[0].napi); + free_netvsc_device(&net_device->rcu); + + return ERR_PTR(ret); diff --git a/queue-4.14/hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch b/queue-4.14/hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch new file mode 100644 index 00000000000..6082ffbe263 --- /dev/null +++ b/queue-4.14/hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch @@ -0,0 +1,38 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Mohammed Gamal +Date: Mon, 14 May 2018 15:32:22 -0700 +Subject: hv_netvsc: Fix net device attach on older Windows hosts + +From: Mohammed Gamal + +[ Commit 55be9f25be1ca5bda75c39808fc77e42691bc07f upstream. ] + +On older windows hosts the net_device instance is returned to +the caller of rndis_filter_device_add() without having the presence +bit set first. This would cause any subsequent calls to network device +operations (e.g. MTU change, channel change) to fail after the device +is detached once, returning -ENODEV. + +Instead of returning the device instabce, we take the exit path where +we call netif_device_attach() + +Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic") +Signed-off-by: Mohammed Gamal +Reviewed-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/rndis_filter.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -1276,7 +1276,7 @@ struct netvsc_device *rndis_filter_devic + rndis_device->link_state ? "down" : "up"); + + if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) +- return net_device; ++ goto out; + + rndis_filter_query_link_speed(rndis_device, net_device); + diff --git a/queue-4.14/hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch b/queue-4.14/hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch new file mode 100644 index 00000000000..2874743c46d --- /dev/null +++ b/queue-4.14/hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch @@ -0,0 +1,36 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:13 -0700 +Subject: hv_netvsc: fix race in napi poll when rescheduling + +From: Stephen Hemminger + +[ Commit d64e38ae690e3337db0d38d9b149a193a1646c4b upstream. ] + +There is a race between napi_reschedule and re-enabling interrupts +which could lead to missed host interrrupts. This occurs when +interrupts are re-enabled (hv_end_read) and vmbus irq callback +(netvsc_channel_cb) has already scheduled NAPI. + +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -1205,9 +1205,10 @@ int netvsc_poll(struct napi_struct *napi + if (send_recv_completions(ndev, net_device, nvchan) == 0 && + work_done < budget && + napi_complete_done(napi, work_done) && +- hv_end_read(&channel->inbound)) { ++ hv_end_read(&channel->inbound) && ++ napi_schedule_prep(napi)) { + hv_begin_read(&channel->inbound); +- napi_reschedule(napi); ++ __napi_schedule(napi); + } + + /* Driver may overshoot since multiple packets per descriptor */ diff --git a/queue-4.14/hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch b/queue-4.14/hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch new file mode 100644 index 00000000000..2fefa93dbd0 --- /dev/null +++ b/queue-4.14/hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch @@ -0,0 +1,37 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Haiyang Zhang +Date: Mon, 14 May 2018 15:32:00 -0700 +Subject: hv_netvsc: Fix the real number of queues of non-vRSS cases + +From: Haiyang Zhang + +[ Commit 6450f8f269a9271985e4a8c13920b7e4cf21c0f3 upstream. ] + +For older hosts without multi-channel (vRSS) support, and some error +cases, we still need to set the real number of queues to one. +This patch adds this missing setting. + +Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") +Signed-off-by: Haiyang Zhang +Reviewed-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -1932,6 +1932,12 @@ static int netvsc_probe(struct hv_device + /* We always need headroom for rndis header */ + net->needed_headroom = RNDIS_AND_PPI_SIZE; + ++ /* Initialize the number of queues to be 1, we may change it if more ++ * channels are offered later. ++ */ ++ netif_set_real_num_tx_queues(net, 1); ++ netif_set_real_num_rx_queues(net, 1); ++ + /* Notify the netvsc driver of the new device */ + memset(&device_info, 0, sizeof(device_info)); + device_info.ring_size = ring_size; diff --git a/queue-4.14/hv_netvsc-netvsc_teardown_gpadl-split.patch b/queue-4.14/hv_netvsc-netvsc_teardown_gpadl-split.patch new file mode 100644 index 00000000000..1c90a789aae --- /dev/null +++ b/queue-4.14/hv_netvsc-netvsc_teardown_gpadl-split.patch @@ -0,0 +1,147 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Vitaly Kuznetsov +Date: Mon, 14 May 2018 15:32:05 -0700 +Subject: hv_netvsc: netvsc_teardown_gpadl() split + +From: Vitaly Kuznetsov + +[ Commit 0cf737808ae7cb25e952be619db46b9147a92f46 upstream. ] + +It was found that in some cases host refuses to teardown GPADL for send/ +receive buffers (probably when some work with these buffere is scheduled or +ongoing). Change the teardown logic to be: +1) Send NVSP_MSG1_TYPE_REVOKE_* messages +2) Close the channel +3) Teardown GPADLs. +This seems to work reliably. + +Signed-off-by: Vitaly Kuznetsov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 69 ++++++++++++++++++++++---------------------- + 1 file changed, 36 insertions(+), 33 deletions(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -100,12 +100,11 @@ static void free_netvsc_device_rcu(struc + call_rcu(&nvdev->rcu, free_netvsc_device); + } + +-static void netvsc_destroy_buf(struct hv_device *device) ++static void netvsc_revoke_buf(struct hv_device *device, ++ struct netvsc_device *net_device) + { + struct nvsp_message *revoke_packet; + struct net_device *ndev = hv_get_drvdata(device); +- struct net_device_context *ndc = netdev_priv(ndev); +- struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev); + int ret; + + /* +@@ -148,28 +147,6 @@ static void netvsc_destroy_buf(struct hv + net_device->recv_section_cnt = 0; + } + +- /* Teardown the gpadl on the vsp end */ +- if (net_device->recv_buf_gpadl_handle) { +- ret = vmbus_teardown_gpadl(device->channel, +- net_device->recv_buf_gpadl_handle); +- +- /* If we failed here, we might as well return and have a leak +- * rather than continue and a bugchk +- */ +- if (ret != 0) { +- netdev_err(ndev, +- "unable to teardown receive buffer's gpadl\n"); +- return; +- } +- net_device->recv_buf_gpadl_handle = 0; +- } +- +- if (net_device->recv_buf) { +- /* Free up the receive buffer */ +- vfree(net_device->recv_buf); +- net_device->recv_buf = NULL; +- } +- + /* Deal with the send buffer we may have setup. + * If we got a send section size, it means we received a + * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent +@@ -210,7 +187,35 @@ static void netvsc_destroy_buf(struct hv + } + net_device->send_section_cnt = 0; + } +- /* Teardown the gpadl on the vsp end */ ++} ++ ++static void netvsc_teardown_gpadl(struct hv_device *device, ++ struct netvsc_device *net_device) ++{ ++ struct net_device *ndev = hv_get_drvdata(device); ++ int ret; ++ ++ if (net_device->recv_buf_gpadl_handle) { ++ ret = vmbus_teardown_gpadl(device->channel, ++ net_device->recv_buf_gpadl_handle); ++ ++ /* If we failed here, we might as well return and have a leak ++ * rather than continue and a bugchk ++ */ ++ if (ret != 0) { ++ netdev_err(ndev, ++ "unable to teardown receive buffer's gpadl\n"); ++ return; ++ } ++ net_device->recv_buf_gpadl_handle = 0; ++ } ++ ++ if (net_device->recv_buf) { ++ /* Free up the receive buffer */ ++ vfree(net_device->recv_buf); ++ net_device->recv_buf = NULL; ++ } ++ + if (net_device->send_buf_gpadl_handle) { + ret = vmbus_teardown_gpadl(device->channel, + net_device->send_buf_gpadl_handle); +@@ -425,7 +430,8 @@ static int netvsc_init_buf(struct hv_dev + goto exit; + + cleanup: +- netvsc_destroy_buf(device); ++ netvsc_revoke_buf(device, net_device); ++ netvsc_teardown_gpadl(device, net_device); + + exit: + return ret; +@@ -544,11 +550,6 @@ cleanup: + return ret; + } + +-static void netvsc_disconnect_vsp(struct hv_device *device) +-{ +- netvsc_destroy_buf(device); +-} +- + /* + * netvsc_device_remove - Callback when the root bus device is removed + */ +@@ -562,7 +563,7 @@ void netvsc_device_remove(struct hv_devi + + cancel_work_sync(&net_device->subchan_work); + +- netvsc_disconnect_vsp(device); ++ netvsc_revoke_buf(device, net_device); + + RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); + +@@ -575,6 +576,8 @@ void netvsc_device_remove(struct hv_devi + /* Now, we can close the channel safely */ + vmbus_close(device->channel); + ++ netvsc_teardown_gpadl(device, net_device); ++ + /* And dissassociate NAPI context from device */ + for (i = 0; i < net_device->num_chn; i++) + netif_napi_del(&net_device->chan_table[i].napi); diff --git a/queue-4.14/hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch b/queue-4.14/hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch new file mode 100644 index 00000000000..927757446ad --- /dev/null +++ b/queue-4.14/hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch @@ -0,0 +1,36 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:10 -0700 +Subject: hv_netvsc: only wake transmit queue if link is up + +From: Stephen Hemminger + +[ Commit f4950e4586dfc957e0a28226eeb992ddc049b5a2 upstream. ] + +Don't wake transmit queues if link is not up yet. + +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -88,12 +88,11 @@ static int netvsc_open(struct net_device + return ret; + } + +- netif_tx_wake_all_queues(net); +- + rdev = nvdev->extension; +- +- if (!rdev->link_state) ++ if (!rdev->link_state) { + netif_carrier_on(net); ++ netif_tx_wake_all_queues(net); ++ } + + if (vf_netdev) { + /* Setting synthetic device up transparently sets diff --git a/queue-4.14/hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch b/queue-4.14/hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch new file mode 100644 index 00000000000..88a885c0b27 --- /dev/null +++ b/queue-4.14/hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch @@ -0,0 +1,232 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Vitaly Kuznetsov +Date: Mon, 14 May 2018 15:32:06 -0700 +Subject: hv_netvsc: preserve hw_features on mtu/channels/ringparam changes + +From: Vitaly Kuznetsov + +[ Commit aefd80e874e98a864915df5b7d90824a4340b450 upstream. ] + +rndis_filter_device_add() is called both from netvsc_probe() when we +initially create the device and from set channels/mtu/ringparam +routines where we basically remove the device and add it back. + +hw_features is reset in rndis_filter_device_add() and filled with +host data. However, we lose all additional flags which are set outside +of the driver, e.g. register_netdevice() adds NETIF_F_SOFT_FEATURES and +many others. + +Unfortunately, calls to rndis_{query_hwcaps(), _set_offload_params()} +calls cannot be avoided on every RNDIS reset: host expects us to set +required features explicitly. Moreover, in theory hardware capabilities +can change and we need to reflect the change in hw_features. + +Reset net->hw_features bits according to host data in +rndis_netdev_set_hwcaps(), clear corresponding feature bits +from net->features in case some features went missing (will never happen +in real life I guess but let's be consistent). + +Signed-off-by: Vitaly Kuznetsov +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/hyperv_net.h | 4 + + drivers/net/hyperv/netvsc_drv.c | 2 + drivers/net/hyperv/rndis_filter.c | 136 +++++++++++++++++++++----------------- + 3 files changed, 83 insertions(+), 59 deletions(-) + +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -659,6 +659,10 @@ struct nvsp_message { + #define NETVSC_RECEIVE_BUFFER_ID 0xcafe + #define NETVSC_SEND_BUFFER_ID 0 + ++#define NETVSC_SUPPORTED_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | \ ++ NETIF_F_TSO | NETIF_F_IPV6_CSUM | \ ++ NETIF_F_TSO6) ++ + #define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */ + #define VRSS_CHANNEL_MAX 64 + #define VRSS_CHANNEL_DEFAULT 8 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -1956,7 +1956,7 @@ static int netvsc_probe(struct hv_device + + memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + +- /* hw_features computed in rndis_filter_device_add */ ++ /* hw_features computed in rndis_netdev_set_hwcaps() */ + net->features = net->hw_features | + NETIF_F_HIGHDMA | NETIF_F_SG | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -1131,69 +1131,20 @@ unlock: + rtnl_unlock(); + } + +-struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, +- struct netvsc_device_info *device_info) ++static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, ++ struct netvsc_device *nvdev) + { +- struct net_device *net = hv_get_drvdata(dev); ++ struct net_device *net = rndis_device->ndev; + struct net_device_context *net_device_ctx = netdev_priv(net); +- struct netvsc_device *net_device; +- struct rndis_device *rndis_device; + struct ndis_offload hwcaps; + struct ndis_offload_params offloads; +- struct ndis_recv_scale_cap rsscap; +- u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); + unsigned int gso_max_size = GSO_MAX_SIZE; +- u32 mtu, size; +- const struct cpumask *node_cpu_mask; +- u32 num_possible_rss_qs; +- int i, ret; +- +- rndis_device = get_rndis_device(); +- if (!rndis_device) +- return ERR_PTR(-ENODEV); +- +- /* +- * Let the inner driver handle this first to create the netvsc channel +- * NOTE! Once the channel is created, we may get a receive callback +- * (RndisFilterOnReceive()) before this call is completed +- */ +- net_device = netvsc_device_add(dev, device_info); +- if (IS_ERR(net_device)) { +- kfree(rndis_device); +- return net_device; +- } +- +- /* Initialize the rndis device */ +- net_device->max_chn = 1; +- net_device->num_chn = 1; +- +- net_device->extension = rndis_device; +- rndis_device->ndev = net; +- +- /* Send the rndis initialization message */ +- ret = rndis_filter_init_device(rndis_device, net_device); +- if (ret != 0) +- goto err_dev_remv; +- +- /* Get the MTU from the host */ +- size = sizeof(u32); +- ret = rndis_filter_query_device(rndis_device, net_device, +- RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE, +- &mtu, &size); +- if (ret == 0 && size == sizeof(u32) && mtu < net->mtu) +- net->mtu = mtu; +- +- /* Get the mac address */ +- ret = rndis_filter_query_device_mac(rndis_device, net_device); +- if (ret != 0) +- goto err_dev_remv; +- +- memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); ++ int ret; + + /* Find HW offload capabilities */ +- ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps); ++ ret = rndis_query_hwcaps(rndis_device, nvdev, &hwcaps); + if (ret != 0) +- goto err_dev_remv; ++ return ret; + + /* A value of zero means "no change"; now turn on what we want. */ + memset(&offloads, 0, sizeof(struct ndis_offload_params)); +@@ -1201,8 +1152,12 @@ struct netvsc_device *rndis_filter_devic + /* Linux does not care about IP checksum, always does in kernel */ + offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED; + ++ /* Reset previously set hw_features flags */ ++ net->hw_features &= ~NETVSC_SUPPORTED_HW_FEATURES; ++ net_device_ctx->tx_checksum_mask = 0; ++ + /* Compute tx offload settings based on hw capabilities */ +- net->hw_features = NETIF_F_RXCSUM; ++ net->hw_features |= NETIF_F_RXCSUM; + + if ((hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_ALL_TCP4) == NDIS_TXCSUM_ALL_TCP4) { + /* Can checksum TCP */ +@@ -1246,10 +1201,75 @@ struct netvsc_device *rndis_filter_devic + } + } + ++ /* In case some hw_features disappeared we need to remove them from ++ * net->features list as they're no longer supported. ++ */ ++ net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features; ++ + netif_set_gso_max_size(net, gso_max_size); + +- ret = rndis_filter_set_offload_params(net, net_device, &offloads); +- if (ret) ++ ret = rndis_filter_set_offload_params(net, nvdev, &offloads); ++ ++ return ret; ++} ++ ++struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, ++ struct netvsc_device_info *device_info) ++{ ++ struct net_device *net = hv_get_drvdata(dev); ++ struct netvsc_device *net_device; ++ struct rndis_device *rndis_device; ++ struct ndis_recv_scale_cap rsscap; ++ u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); ++ u32 mtu, size; ++ const struct cpumask *node_cpu_mask; ++ u32 num_possible_rss_qs; ++ int i, ret; ++ ++ rndis_device = get_rndis_device(); ++ if (!rndis_device) ++ return ERR_PTR(-ENODEV); ++ ++ /* Let the inner driver handle this first to create the netvsc channel ++ * NOTE! Once the channel is created, we may get a receive callback ++ * (RndisFilterOnReceive()) before this call is completed ++ */ ++ net_device = netvsc_device_add(dev, device_info); ++ if (IS_ERR(net_device)) { ++ kfree(rndis_device); ++ return net_device; ++ } ++ ++ /* Initialize the rndis device */ ++ net_device->max_chn = 1; ++ net_device->num_chn = 1; ++ ++ net_device->extension = rndis_device; ++ rndis_device->ndev = net; ++ ++ /* Send the rndis initialization message */ ++ ret = rndis_filter_init_device(rndis_device, net_device); ++ if (ret != 0) ++ goto err_dev_remv; ++ ++ /* Get the MTU from the host */ ++ size = sizeof(u32); ++ ret = rndis_filter_query_device(rndis_device, net_device, ++ RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE, ++ &mtu, &size); ++ if (ret == 0 && size == sizeof(u32) && mtu < net->mtu) ++ net->mtu = mtu; ++ ++ /* Get the mac address */ ++ ret = rndis_filter_query_device_mac(rndis_device, net_device); ++ if (ret != 0) ++ goto err_dev_remv; ++ ++ memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); ++ ++ /* Query and set hardware capabilities */ ++ ret = rndis_netdev_set_hwcaps(rndis_device, net_device); ++ if (ret != 0) + goto err_dev_remv; + + rndis_filter_query_device_link_status(rndis_device, net_device); diff --git a/queue-4.14/hv_netvsc-rename-ind_table-to-rx_table.patch b/queue-4.14/hv_netvsc-rename-ind_table-to-rx_table.patch new file mode 100644 index 00000000000..8f54a6368dd --- /dev/null +++ b/queue-4.14/hv_netvsc-rename-ind_table-to-rx_table.patch @@ -0,0 +1,74 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Haiyang Zhang +Date: Mon, 14 May 2018 15:32:01 -0700 +Subject: hv_netvsc: Rename ind_table to rx_table + +From: Haiyang Zhang + +[ Commit 47371300dfc269dd8d150e5b872bdbbda98ba809 upstream. ] + +Rename this variable because it is the Receive indirection +table. + +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/hyperv_net.h | 2 +- + drivers/net/hyperv/netvsc_drv.c | 4 ++-- + drivers/net/hyperv/rndis_filter.c | 6 +++--- + 3 files changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -179,7 +179,7 @@ struct rndis_device { + + u8 hw_mac_adr[ETH_ALEN]; + u8 rss_key[NETVSC_HASH_KEYLEN]; +- u16 ind_table[ITAB_NUM]; ++ u16 rx_table[ITAB_NUM]; + }; + + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -1378,7 +1378,7 @@ static int netvsc_get_rxfh(struct net_de + rndis_dev = ndev->extension; + if (indir) { + for (i = 0; i < ITAB_NUM; i++) +- indir[i] = rndis_dev->ind_table[i]; ++ indir[i] = rndis_dev->rx_table[i]; + } + + if (key) +@@ -1408,7 +1408,7 @@ static int netvsc_set_rxfh(struct net_de + return -EINVAL; + + for (i = 0; i < ITAB_NUM; i++) +- rndis_dev->ind_table[i] = indir[i]; ++ rndis_dev->rx_table[i] = indir[i]; + } + + if (!key) { +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -759,7 +759,7 @@ int rndis_filter_set_rss_param(struct rn + /* Set indirection table entries */ + itab = (u32 *)(rssp + 1); + for (i = 0; i < ITAB_NUM; i++) +- itab[i] = rdev->ind_table[i]; ++ itab[i] = rdev->rx_table[i]; + + /* Set hask key values */ + keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); +@@ -1284,8 +1284,8 @@ struct netvsc_device *rndis_filter_devic + net_device->num_chn = min(net_device->max_chn, device_info->num_chn); + + for (i = 0; i < ITAB_NUM; i++) +- rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i, +- net_device->num_chn); ++ rndis_device->rx_table[i] = ethtool_rxfh_indir_default( ++ i, net_device->num_chn); + + atomic_set(&net_device->open_chn, 1); + vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); diff --git a/queue-4.14/hv_netvsc-rename-tx_send_table-to-tx_table.patch b/queue-4.14/hv_netvsc-rename-tx_send_table-to-tx_table.patch new file mode 100644 index 00000000000..4adc099de74 --- /dev/null +++ b/queue-4.14/hv_netvsc-rename-tx_send_table-to-tx_table.patch @@ -0,0 +1,55 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Haiyang Zhang +Date: Mon, 14 May 2018 15:32:02 -0700 +Subject: hv_netvsc: Rename tx_send_table to tx_table + +From: Haiyang Zhang + +[ Commit 39e91cfbf6f5fb26ba64cc2e8874372baf1671e7 upstream. ] + +Simplify the variable name: tx_send_table + +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/hyperv_net.h | 2 +- + drivers/net/hyperv/netvsc.c | 2 +- + drivers/net/hyperv/netvsc_drv.c | 4 ++-- + 3 files changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -734,7 +734,7 @@ struct net_device_context { + + u32 tx_checksum_mask; + +- u32 tx_send_table[VRSS_SEND_TAB_SIZE]; ++ u32 tx_table[VRSS_SEND_TAB_SIZE]; + + /* Ethtool settings */ + bool udp4_l4_hash; +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -1107,7 +1107,7 @@ static void netvsc_send_table(struct hv_ + nvmsg->msg.v5_msg.send_table.offset); + + for (i = 0; i < count; i++) +- net_device_ctx->tx_send_table[i] = tab[i]; ++ net_device_ctx->tx_table[i] = tab[i]; + } + + static void netvsc_send_vf(struct net_device_context *net_device_ctx, +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -234,8 +234,8 @@ static inline int netvsc_get_tx_queue(st + struct sock *sk = skb->sk; + int q_idx; + +- q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) & +- (VRSS_SEND_TAB_SIZE - 1)]; ++ q_idx = ndc->tx_table[netvsc_get_hash(skb, ndc) & ++ (VRSS_SEND_TAB_SIZE - 1)]; + + /* If queue index changed record the new value */ + if (q_idx != old_idx && diff --git a/queue-4.14/hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch b/queue-4.14/hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch new file mode 100644 index 00000000000..aad569bf0e7 --- /dev/null +++ b/queue-4.14/hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch @@ -0,0 +1,36 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Haiyang Zhang +Date: Mon, 14 May 2018 15:32:04 -0700 +Subject: hv_netvsc: Set tx_table to equal weight after subchannels open + +From: Haiyang Zhang + +[ Commit a6fb6aa3cfa9047b62653dbcfc9bcde6e2272b41 upstream. ] + +In some cases, like internal vSwitch, the host doesn't provide +send indirection table updates. This patch sets the table to be +equal weight after subchannels are all open. Otherwise, all workload +will be on one TX channel. + +As tested, this patch has largely increased the throughput over +internal vSwitch. + +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/rndis_filter.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -1114,6 +1114,9 @@ void rndis_set_subchannel(struct work_st + netif_set_real_num_tx_queues(ndev, nvdev->num_chn); + netif_set_real_num_rx_queues(ndev, nvdev->num_chn); + ++ for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) ++ ndev_ctx->tx_table[i] = i % nvdev->num_chn; ++ + rtnl_unlock(); + return; + diff --git a/queue-4.14/hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch b/queue-4.14/hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch new file mode 100644 index 00000000000..6d15acb3fdd --- /dev/null +++ b/queue-4.14/hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch @@ -0,0 +1,125 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Mohammed Gamal +Date: Mon, 14 May 2018 15:32:20 -0700 +Subject: hv_netvsc: Split netvsc_revoke_buf() and netvsc_teardown_gpadl() + +From: Mohammed Gamal + +[ Commit 7992894c305eaf504d005529637ff8283d0a849d upstream. ] + +Split each of the functions into two for each of send/recv buffers. +This will be needed in order to implement a fine-grained messaging +sequence to the host so that we accommodate the requirements of +different Windows versions + +Fixes: 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order on older versions") +Signed-off-by: Mohammed Gamal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 46 ++++++++++++++++++++++++++++++++------------ + 1 file changed, 34 insertions(+), 12 deletions(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -105,11 +105,11 @@ static void free_netvsc_device_rcu(struc + call_rcu(&nvdev->rcu, free_netvsc_device); + } + +-static void netvsc_revoke_buf(struct hv_device *device, +- struct netvsc_device *net_device) ++static void netvsc_revoke_recv_buf(struct hv_device *device, ++ struct netvsc_device *net_device) + { +- struct nvsp_message *revoke_packet; + struct net_device *ndev = hv_get_drvdata(device); ++ struct nvsp_message *revoke_packet; + int ret; + + /* +@@ -151,6 +151,14 @@ static void netvsc_revoke_buf(struct hv_ + } + net_device->recv_section_cnt = 0; + } ++} ++ ++static void netvsc_revoke_send_buf(struct hv_device *device, ++ struct netvsc_device *net_device) ++{ ++ struct net_device *ndev = hv_get_drvdata(device); ++ struct nvsp_message *revoke_packet; ++ int ret; + + /* Deal with the send buffer we may have setup. + * If we got a send section size, it means we received a +@@ -194,8 +202,8 @@ static void netvsc_revoke_buf(struct hv_ + } + } + +-static void netvsc_teardown_gpadl(struct hv_device *device, +- struct netvsc_device *net_device) ++static void netvsc_teardown_recv_gpadl(struct hv_device *device, ++ struct netvsc_device *net_device) + { + struct net_device *ndev = hv_get_drvdata(device); + int ret; +@@ -214,6 +222,13 @@ static void netvsc_teardown_gpadl(struct + } + net_device->recv_buf_gpadl_handle = 0; + } ++} ++ ++static void netvsc_teardown_send_gpadl(struct hv_device *device, ++ struct netvsc_device *net_device) ++{ ++ struct net_device *ndev = hv_get_drvdata(device); ++ int ret; + + if (net_device->send_buf_gpadl_handle) { + ret = vmbus_teardown_gpadl(device->channel, +@@ -423,8 +438,10 @@ static int netvsc_init_buf(struct hv_dev + goto exit; + + cleanup: +- netvsc_revoke_buf(device, net_device); +- netvsc_teardown_gpadl(device, net_device); ++ netvsc_revoke_recv_buf(device, net_device); ++ netvsc_revoke_send_buf(device, net_device); ++ netvsc_teardown_recv_gpadl(device, net_device); ++ netvsc_teardown_send_gpadl(device, net_device); + + exit: + return ret; +@@ -554,7 +571,8 @@ void netvsc_device_remove(struct hv_devi + = rtnl_dereference(net_device_ctx->nvdev); + int i; + +- netvsc_revoke_buf(device, net_device); ++ netvsc_revoke_recv_buf(device, net_device); ++ netvsc_revoke_send_buf(device, net_device); + + RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); + +@@ -569,14 +587,18 @@ void netvsc_device_remove(struct hv_devi + netdev_dbg(ndev, "net device safe to remove\n"); + + /* older versions require that buffer be revoked before close */ +- if (vmbus_proto_version < VERSION_WIN10) +- netvsc_teardown_gpadl(device, net_device); ++ if (vmbus_proto_version < VERSION_WIN10) { ++ netvsc_teardown_recv_gpadl(device, net_device); ++ netvsc_teardown_send_gpadl(device, net_device); ++ } + + /* Now, we can close the channel safely */ + vmbus_close(device->channel); + +- if (vmbus_proto_version >= VERSION_WIN10) +- netvsc_teardown_gpadl(device, net_device); ++ if (vmbus_proto_version >= VERSION_WIN10) { ++ netvsc_teardown_recv_gpadl(device, net_device); ++ netvsc_teardown_send_gpadl(device, net_device); ++ } + + /* Release all resources */ + free_netvsc_device_rcu(net_device); diff --git a/queue-4.14/hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch b/queue-4.14/hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch new file mode 100644 index 00000000000..bcbb737147f --- /dev/null +++ b/queue-4.14/hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch @@ -0,0 +1,159 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Stephen Hemminger +Date: Mon, 14 May 2018 15:32:16 -0700 +Subject: hv_netvsc: use RCU to fix concurrent rx and queue changes + +From: Stephen Hemminger + +[ Commit 02400fcee2542ee334a2394e0d9f6efd969fe782 upstream. ] + +The receive processing may continue to happen while the +internal network device state is in RCU grace period. +The internal RNDIS structure is associated with the +internal netvsc_device structure; both have the same +RCU lifetime. + +Defer freeing all associated parts until after grace +period. + +Fixes: 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split") +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 17 ++++------------ + drivers/net/hyperv/rndis_filter.c | 39 ++++++++++++++++---------------------- + 2 files changed, 22 insertions(+), 34 deletions(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -89,6 +89,11 @@ static void free_netvsc_device(struct rc + = container_of(head, struct netvsc_device, rcu); + int i; + ++ kfree(nvdev->extension); ++ vfree(nvdev->recv_buf); ++ vfree(nvdev->send_buf); ++ kfree(nvdev->send_section_map); ++ + for (i = 0; i < VRSS_CHANNEL_MAX; i++) + vfree(nvdev->chan_table[i].mrc.slots); + +@@ -210,12 +215,6 @@ static void netvsc_teardown_gpadl(struct + net_device->recv_buf_gpadl_handle = 0; + } + +- if (net_device->recv_buf) { +- /* Free up the receive buffer */ +- vfree(net_device->recv_buf); +- net_device->recv_buf = NULL; +- } +- + if (net_device->send_buf_gpadl_handle) { + ret = vmbus_teardown_gpadl(device->channel, + net_device->send_buf_gpadl_handle); +@@ -230,12 +229,6 @@ static void netvsc_teardown_gpadl(struct + } + net_device->send_buf_gpadl_handle = 0; + } +- if (net_device->send_buf) { +- /* Free up the send buffer */ +- vfree(net_device->send_buf); +- net_device->send_buf = NULL; +- } +- kfree(net_device->send_section_map); + } + + int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx) +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -266,13 +266,23 @@ static void rndis_set_link_state(struct + } + } + +-static void rndis_filter_receive_response(struct rndis_device *dev, +- struct rndis_message *resp) ++static void rndis_filter_receive_response(struct net_device *ndev, ++ struct netvsc_device *nvdev, ++ const struct rndis_message *resp) + { ++ struct rndis_device *dev = nvdev->extension; + struct rndis_request *request = NULL; + bool found = false; + unsigned long flags; +- struct net_device *ndev = dev->ndev; ++ ++ /* This should never happen, it means control message ++ * response received after device removed. ++ */ ++ if (dev->state == RNDIS_DEV_UNINITIALIZED) { ++ netdev_err(ndev, ++ "got rndis message uninitialized\n"); ++ return; ++ } + + spin_lock_irqsave(&dev->request_lock, flags); + list_for_each_entry(request, &dev->req_list, list_ent) { +@@ -353,7 +363,7 @@ static inline void *rndis_get_ppi(struct + } + + static int rndis_filter_receive_data(struct net_device *ndev, +- struct rndis_device *dev, ++ struct netvsc_device *nvdev, + struct rndis_message *msg, + struct vmbus_channel *channel, + void *data, u32 data_buflen) +@@ -373,7 +383,7 @@ static int rndis_filter_receive_data(str + * should be the data packet size plus the trailer padding size + */ + if (unlikely(data_buflen < rndis_pkt->data_len)) { +- netdev_err(dev->ndev, "rndis message buffer " ++ netdev_err(ndev, "rndis message buffer " + "overflow detected (got %u, min %u)" + "...dropping this message!\n", + data_buflen, rndis_pkt->data_len); +@@ -401,34 +411,20 @@ int rndis_filter_receive(struct net_devi + void *data, u32 buflen) + { + struct net_device_context *net_device_ctx = netdev_priv(ndev); +- struct rndis_device *rndis_dev = net_dev->extension; + struct rndis_message *rndis_msg = data; + +- /* Make sure the rndis device state is initialized */ +- if (unlikely(!rndis_dev)) { +- netif_err(net_device_ctx, rx_err, ndev, +- "got rndis message but no rndis device!\n"); +- return NVSP_STAT_FAIL; +- } +- +- if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) { +- netif_err(net_device_ctx, rx_err, ndev, +- "got rndis message uninitialized\n"); +- return NVSP_STAT_FAIL; +- } +- + if (netif_msg_rx_status(net_device_ctx)) + dump_rndis_message(dev, rndis_msg); + + switch (rndis_msg->ndis_msg_type) { + case RNDIS_MSG_PACKET: +- return rndis_filter_receive_data(ndev, rndis_dev, rndis_msg, ++ return rndis_filter_receive_data(ndev, net_dev, rndis_msg, + channel, data, buflen); + case RNDIS_MSG_INIT_C: + case RNDIS_MSG_QUERY_C: + case RNDIS_MSG_SET_C: + /* completion msgs */ +- rndis_filter_receive_response(rndis_dev, rndis_msg); ++ rndis_filter_receive_response(ndev, net_dev, rndis_msg); + break; + + case RNDIS_MSG_INDICATE: +@@ -1349,7 +1345,6 @@ void rndis_filter_device_remove(struct h + net_dev->extension = NULL; + + netvsc_device_remove(dev); +- kfree(rndis_dev); + } + + int rndis_filter_open(struct netvsc_device *nvdev) diff --git a/queue-4.14/hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch b/queue-4.14/hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch new file mode 100644 index 00000000000..3a539916549 --- /dev/null +++ b/queue-4.14/hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch @@ -0,0 +1,49 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Haiyang Zhang +Date: Mon, 14 May 2018 15:32:08 -0700 +Subject: hv_netvsc: Use the num_online_cpus() for channel limit + +From: Haiyang Zhang + +[ Commit 25a39f7f975c3c26a0052fbf9b59201c06744332 upstream. ] + +Since we no longer localize channel/CPU affiliation within one NUMA +node, num_online_cpus() is used as the number of channel cap, instead of +the number of processors in a NUMA node. + +This patch allows a bigger range for tuning the number of channels. + +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/rndis_filter.c | 11 ++--------- + 1 file changed, 2 insertions(+), 9 deletions(-) + +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -1221,7 +1221,6 @@ struct netvsc_device *rndis_filter_devic + struct ndis_recv_scale_cap rsscap; + u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); + u32 mtu, size; +- const struct cpumask *node_cpu_mask; + u32 num_possible_rss_qs; + int i, ret; + +@@ -1290,14 +1289,8 @@ struct netvsc_device *rndis_filter_devic + if (ret || rsscap.num_recv_que < 2) + goto out; + +- /* +- * We will limit the VRSS channels to the number CPUs in the NUMA node +- * the primary channel is currently bound to. +- * +- * This also guarantees that num_possible_rss_qs <= num_online_cpus +- */ +- node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu)); +- num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask), ++ /* This guarantees that num_possible_rss_qs <= num_online_cpus */ ++ num_possible_rss_qs = min_t(u32, num_online_cpus(), + rsscap.num_recv_que); + + net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs); diff --git a/queue-4.14/hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch b/queue-4.14/hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch new file mode 100644 index 00000000000..3f5e38cf02f --- /dev/null +++ b/queue-4.14/hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch @@ -0,0 +1,42 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Mohammed Gamal +Date: Mon, 14 May 2018 15:32:19 -0700 +Subject: hv_netvsc: Use Windows version instead of NVSP version on GPAD teardown + +From: Mohammed Gamal + +commit 2afc5d61a7197de25a61f54ea4ecfb4cb62b1d42A upstram + +When changing network interface settings, Windows guests +older than WS2016 can no longer shutdown. This was addressed +by commit 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order +on older versions"), however the issue also occurs on WS2012 +guests that share NVSP protocol versions with WS2016 guests. +Hence we use Windows version directly to differentiate them. + +Fixes: 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order on older versions") +Signed-off-by: Mohammed Gamal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -569,13 +569,13 @@ void netvsc_device_remove(struct hv_devi + netdev_dbg(ndev, "net device safe to remove\n"); + + /* older versions require that buffer be revoked before close */ +- if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4) ++ if (vmbus_proto_version < VERSION_WIN10) + netvsc_teardown_gpadl(device, net_device); + + /* Now, we can close the channel safely */ + vmbus_close(device->channel); + +- if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4) ++ if (vmbus_proto_version >= VERSION_WIN10) + netvsc_teardown_gpadl(device, net_device); + + /* Release all resources */ diff --git a/queue-4.14/net-fix-a-bug-in-removing-queues-from-xps-map.patch b/queue-4.14/net-fix-a-bug-in-removing-queues-from-xps-map.patch new file mode 100644 index 00000000000..301d67f6a06 --- /dev/null +++ b/queue-4.14/net-fix-a-bug-in-removing-queues-from-xps-map.patch @@ -0,0 +1,33 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Amritha Nambiar +Date: Thu, 17 May 2018 14:50:44 -0700 +Subject: net: Fix a bug in removing queues from XPS map + +From: Amritha Nambiar + +[ Upstream commit 6358d49ac23995fdfe157cc8747ab0f274d3954b ] + +While removing queues from the XPS map, the individual CPU ID +alone was used to index the CPUs map, this should be changed to also +factor in the traffic class mapping for the CPU-to-queue lookup. + +Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes") +Signed-off-by: Amritha Nambiar +Acked-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2058,7 +2058,7 @@ static bool remove_xps_queue_cpu(struct + int i, j; + + for (i = count, j = offset; i--; j++) { +- if (!remove_xps_queue(dev_maps, cpu, j)) ++ if (!remove_xps_queue(dev_maps, tci, j)) + break; + } + diff --git a/queue-4.14/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch b/queue-4.14/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch new file mode 100644 index 00000000000..e9238c77fe6 --- /dev/null +++ b/queue-4.14/net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch @@ -0,0 +1,48 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Tarick Bedeir +Date: Sun, 13 May 2018 16:38:45 -0700 +Subject: net/mlx4_core: Fix error handling in mlx4_init_port_info. + +From: Tarick Bedeir + +[ Upstream commit 57f6f99fdad9984801cde05c1db68fe39b474a10 ] + +Avoid exiting the function with a lingering sysfs file (if the first +call to device_create_file() fails while the second succeeds), and avoid +calling devlink_port_unregister() twice. + +In other words, either mlx4_init_port_info() succeeds and returns zero, or +it fails, returns non-zero, and requires no cleanup. + +Fixes: 096335b3f983 ("mlx4_core: Allow dynamic MTU configuration for IB ports") +Signed-off-by: Tarick Bedeir +Reviewed-by: Leon Romanovsky +Reviewed-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/main.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/main.c ++++ b/drivers/net/ethernet/mellanox/mlx4/main.c +@@ -3007,6 +3007,7 @@ static int mlx4_init_port_info(struct ml + mlx4_err(dev, "Failed to create file for port %d\n", port); + devlink_port_unregister(&info->devlink_port); + info->port = -1; ++ return err; + } + + sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); +@@ -3028,9 +3029,10 @@ static int mlx4_init_port_info(struct ml + &info->port_attr); + devlink_port_unregister(&info->devlink_port); + info->port = -1; ++ return err; + } + +- return err; ++ return 0; + } + + static void mlx4_cleanup_port_info(struct mlx4_port_info *info) diff --git a/queue-4.14/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch b/queue-4.14/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch new file mode 100644 index 00000000000..63d358612a8 --- /dev/null +++ b/queue-4.14/net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch @@ -0,0 +1,34 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Davide Caratti +Date: Wed, 16 May 2018 12:54:29 +0200 +Subject: net/sched: fix refcnt leak in the error path of tcf_vlan_init() + +From: Davide Caratti + +[ Upstream commit 5a4931ae0193f8a4a97e8260fd0df1d705d83299 ] + +Similarly to what was done with commit a52956dfc503 ("net sched actions: +fix refcnt leak in skbmod"), fix the error path of tcf_vlan_init() to avoid +refcnt leaks when wrong value of TCA_VLAN_PUSH_VLAN_PROTOCOL is given. + +Fixes: 5026c9b1bafc ("net sched: vlan action fix late binding") +CC: Roman Mashak +Signed-off-by: Davide Caratti +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_vlan.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/sched/act_vlan.c ++++ b/net/sched/act_vlan.c +@@ -154,6 +154,8 @@ static int tcf_vlan_init(struct net *net + case htons(ETH_P_8021AD): + break; + default: ++ if (exists) ++ tcf_idr_release(*a, bind); + return -EPROTONOSUPPORT; + } + } else { diff --git a/queue-4.14/net-sched-red-avoid-hashing-null-child.patch b/queue-4.14/net-sched-red-avoid-hashing-null-child.patch new file mode 100644 index 00000000000..4b8cc6ac10e --- /dev/null +++ b/queue-4.14/net-sched-red-avoid-hashing-null-child.patch @@ -0,0 +1,108 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Paolo Abeni +Date: Fri, 18 May 2018 14:51:44 +0200 +Subject: net: sched: red: avoid hashing NULL child + +From: Paolo Abeni + +[ Upstream commit 44a63b137f7b6e4c7bd6c9cc21615941cb36509d ] + +Hangbin reported an Oops triggered by the syzkaller qdisc rules: + + kasan: GPF could be caused by NULL-ptr deref or user memory access + general protection fault: 0000 [#1] SMP KASAN PTI + Modules linked in: sch_red + CPU: 0 PID: 28699 Comm: syz-executor5 Not tainted 4.17.0-rc4.kcov #1 + Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 + RIP: 0010:qdisc_hash_add+0x26/0xa0 + RSP: 0018:ffff8800589cf470 EFLAGS: 00010203 + RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff824ad971 + RDX: 0000000000000007 RSI: ffffc9000ce9f000 RDI: 000000000000003c + RBP: 0000000000000001 R08: ffffed000b139ea2 R09: ffff8800589cf4f0 + R10: ffff8800589cf50f R11: ffffed000b139ea2 R12: ffff880054019fc0 + R13: ffff880054019fb4 R14: ffff88005c0af600 R15: ffff880054019fb0 + FS: 00007fa6edcb1700(0000) GS:ffff88005ce00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000020000740 CR3: 000000000fc16000 CR4: 00000000000006f0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + red_change+0x2d2/0xed0 [sch_red] + qdisc_create+0x57e/0xef0 + tc_modify_qdisc+0x47f/0x14e0 + rtnetlink_rcv_msg+0x6a8/0x920 + netlink_rcv_skb+0x2a2/0x3c0 + netlink_unicast+0x511/0x740 + netlink_sendmsg+0x825/0xc30 + sock_sendmsg+0xc5/0x100 + ___sys_sendmsg+0x778/0x8e0 + __sys_sendmsg+0xf5/0x1b0 + do_syscall_64+0xbd/0x3b0 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + RIP: 0033:0x450869 + RSP: 002b:00007fa6edcb0c48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e + RAX: ffffffffffffffda RBX: 00007fa6edcb16b4 RCX: 0000000000450869 + RDX: 0000000000000000 RSI: 00000000200000c0 RDI: 0000000000000013 + RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff + R13: 0000000000008778 R14: 0000000000702838 R15: 00007fa6edcb1700 + Code: e9 0b fe ff ff 0f 1f 44 00 00 55 53 48 89 fb 89 f5 e8 3f 07 f3 fe 48 8d 7b 3c 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 51 + RIP: qdisc_hash_add+0x26/0xa0 RSP: ffff8800589cf470 + +When a red qdisc is updated with a 0 limit, the child qdisc is left +unmodified, no additional scheduler is created in red_change(), +the 'child' local variable is rightfully NULL and must not add it +to the hash table. + +This change addresses the above issue moving qdisc_hash_add() right +after the child qdisc creation. It additionally removes unneeded checks +for noop_qdisc. + +Reported-by: Hangbin Liu +Fixes: 49b499718fa1 ("net: sched: make default fifo qdiscs appear in the dump") +Signed-off-by: Paolo Abeni +Acked-by: Jiri Kosina +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_red.c | 5 +++-- + net/sched/sch_tbf.c | 5 +++-- + 2 files changed, 6 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_red.c ++++ b/net/sched/sch_red.c +@@ -191,10 +191,11 @@ static int red_change(struct Qdisc *sch, + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); + if (IS_ERR(child)) + return PTR_ERR(child); +- } + +- if (child != &noop_qdisc) ++ /* child is fifo, no need to check for noop_qdisc */ + qdisc_hash_add(child, true); ++ } ++ + sch_tree_lock(sch); + q->flags = ctl->flags; + q->limit = ctl->limit; +--- a/net/sched/sch_tbf.c ++++ b/net/sched/sch_tbf.c +@@ -388,6 +388,9 @@ static int tbf_change(struct Qdisc *sch, + err = PTR_ERR(child); + goto done; + } ++ ++ /* child is fifo, no need to check for noop_qdisc */ ++ qdisc_hash_add(child, true); + } + + sch_tree_lock(sch); +@@ -396,8 +399,6 @@ static int tbf_change(struct Qdisc *sch, + q->qdisc->qstats.backlog); + qdisc_destroy(q->qdisc); + q->qdisc = child; +- if (child != &noop_qdisc) +- qdisc_hash_add(child, true); + } + q->limit = qopt->limit; + if (tb[TCA_TBF_PBURST]) diff --git a/queue-4.14/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch b/queue-4.14/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch new file mode 100644 index 00000000000..26a8a39f783 --- /dev/null +++ b/queue-4.14/net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch @@ -0,0 +1,132 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Eric Biggers +Date: Sun, 13 May 2018 17:01:30 -0700 +Subject: net/smc: check for missing nlattrs in SMC_PNETID messages + +From: Eric Biggers + +[ Upstream commit d49baa7e12ee70c0a7b821d088a770c94c02e494 ] + +It's possible to crash the kernel in several different ways by sending +messages to the SMC_PNETID generic netlink family that are missing the +expected attributes: + +- Missing SMC_PNETID_NAME => null pointer dereference when comparing + names. +- Missing SMC_PNETID_ETHNAME => null pointer dereference accessing + smc_pnetentry::ndev. +- Missing SMC_PNETID_IBNAME => null pointer dereference accessing + smc_pnetentry::smcibdev. +- Missing SMC_PNETID_IBPORT => out of bounds array access to + smc_ib_device::pattr[-1]. + +Fix it by validating that all expected attributes are present and that +SMC_PNETID_IBPORT is nonzero. + +Reported-by: syzbot+5cd61039dc9b8bfa6e47@syzkaller.appspotmail.com +Fixes: 6812baabf24d ("smc: establish pnet table management") +Cc: # v4.11+ +Signed-off-by: Eric Biggers +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/smc_pnet.c | 71 +++++++++++++++++++++++++++++------------------------ + 1 file changed, 40 insertions(+), 31 deletions(-) + +--- a/net/smc/smc_pnet.c ++++ b/net/smc/smc_pnet.c +@@ -245,40 +245,45 @@ out: + static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, + struct nlattr *tb[]) + { +- char *string, *ibname = NULL; +- int rc = 0; ++ char *string, *ibname; ++ int rc; + + memset(pnetelem, 0, sizeof(*pnetelem)); + INIT_LIST_HEAD(&pnetelem->list); +- if (tb[SMC_PNETID_NAME]) { +- string = (char *)nla_data(tb[SMC_PNETID_NAME]); +- if (!smc_pnetid_valid(string, pnetelem->pnet_name)) { +- rc = -EINVAL; +- goto error; +- } +- } +- if (tb[SMC_PNETID_ETHNAME]) { +- string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); +- pnetelem->ndev = dev_get_by_name(net, string); +- if (!pnetelem->ndev) +- return -ENOENT; +- } +- if (tb[SMC_PNETID_IBNAME]) { +- ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); +- ibname = strim(ibname); +- pnetelem->smcibdev = smc_pnet_find_ib(ibname); +- if (!pnetelem->smcibdev) { +- rc = -ENOENT; +- goto error; +- } +- } +- if (tb[SMC_PNETID_IBPORT]) { +- pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); +- if (pnetelem->ib_port > SMC_MAX_PORTS) { +- rc = -EINVAL; +- goto error; +- } +- } ++ ++ rc = -EINVAL; ++ if (!tb[SMC_PNETID_NAME]) ++ goto error; ++ string = (char *)nla_data(tb[SMC_PNETID_NAME]); ++ if (!smc_pnetid_valid(string, pnetelem->pnet_name)) ++ goto error; ++ ++ rc = -EINVAL; ++ if (!tb[SMC_PNETID_ETHNAME]) ++ goto error; ++ rc = -ENOENT; ++ string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); ++ pnetelem->ndev = dev_get_by_name(net, string); ++ if (!pnetelem->ndev) ++ goto error; ++ ++ rc = -EINVAL; ++ if (!tb[SMC_PNETID_IBNAME]) ++ goto error; ++ rc = -ENOENT; ++ ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); ++ ibname = strim(ibname); ++ pnetelem->smcibdev = smc_pnet_find_ib(ibname); ++ if (!pnetelem->smcibdev) ++ goto error; ++ ++ rc = -EINVAL; ++ if (!tb[SMC_PNETID_IBPORT]) ++ goto error; ++ pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); ++ if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS) ++ goto error; ++ + return 0; + + error: +@@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff * + void *hdr; + int rc; + ++ if (!info->attrs[SMC_PNETID_NAME]) ++ return -EINVAL; + pnetelem = smc_pnet_find_pnetid( + (char *)nla_data(info->attrs[SMC_PNETID_NAME])); + if (!pnetelem) +@@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff * + + static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) + { ++ if (!info->attrs[SMC_PNETID_NAME]) ++ return -EINVAL; + return smc_pnet_remove_by_pnetid( + (char *)nla_data(info->attrs[SMC_PNETID_NAME])); + } diff --git a/queue-4.14/net-test-tailroom-before-appending-to-linear-skb.patch b/queue-4.14/net-test-tailroom-before-appending-to-linear-skb.patch new file mode 100644 index 00000000000..ca4f5ce4528 --- /dev/null +++ b/queue-4.14/net-test-tailroom-before-appending-to-linear-skb.patch @@ -0,0 +1,54 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Willem de Bruijn +Date: Thu, 17 May 2018 13:13:29 -0400 +Subject: net: test tailroom before appending to linear skb + +From: Willem de Bruijn + +[ Upstream commit 113f99c3358564a0647d444c2ae34e8b1abfd5b9 ] + +Device features may change during transmission. In particular with +corking, a device may toggle scatter-gather in between allocating +and writing to an skb. + +Do not unconditionally assume that !NETIF_F_SG at write time implies +that the same held at alloc time and thus the skb has sufficient +tailroom. + +This issue predates git history. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Eric Dumazet +Signed-off-by: Willem de Bruijn +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_output.c | 3 ++- + net/ipv6/ip6_output.c | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -1040,7 +1040,8 @@ alloc_new_skb: + if (copy > length) + copy = length; + +- if (!(rt->dst.dev->features&NETIF_F_SG)) { ++ if (!(rt->dst.dev->features&NETIF_F_SG) && ++ skb_tailroom(skb) >= copy) { + unsigned int off; + + off = skb->len; +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1488,7 +1488,8 @@ alloc_new_skb: + if (copy > length) + copy = length; + +- if (!(rt->dst.dev->features&NETIF_F_SG)) { ++ if (!(rt->dst.dev->features&NETIF_F_SG) && ++ skb_tailroom(skb) >= copy) { + unsigned int off; + + off = skb->len; diff --git a/queue-4.14/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch b/queue-4.14/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch new file mode 100644 index 00000000000..897760b5e91 --- /dev/null +++ b/queue-4.14/packet-in-packet_snd-start-writing-at-link-layer-allocation.patch @@ -0,0 +1,56 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Willem de Bruijn +Date: Fri, 11 May 2018 13:24:25 -0400 +Subject: packet: in packet_snd start writing at link layer allocation + +From: Willem de Bruijn + +[ Upstream commit b84bbaf7a6c8cca24f8acf25a2c8e46913a947ba ] + +Packet sockets allow construction of packets shorter than +dev->hard_header_len to accommodate protocols with variable length +link layer headers. These packets are padded to dev->hard_header_len, +because some device drivers interpret that as a minimum packet size. + +packet_snd reserves dev->hard_header_len bytes on allocation. +SOCK_DGRAM sockets call skb_push in dev_hard_header() to ensure that +link layer headers are stored in the reserved range. SOCK_RAW sockets +do the same in tpacket_snd, but not in packet_snd. + +Syzbot was able to send a zero byte packet to a device with massive +116B link layer header, causing padding to cross over into skb_shinfo. +Fix this by writing from the start of the llheader reserved range also +in the case of packet_snd/SOCK_RAW. + +Update skb_set_network_header to the new offset. This also corrects +it for SOCK_DGRAM, where it incorrectly double counted reserve due to +the skb_push in dev_hard_header. + +Fixes: 9ed988cd5915 ("packet: validate variable length ll headers") +Reported-by: syzbot+71d74a5406d02057d559@syzkaller.appspotmail.com +Signed-off-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2912,13 +2912,15 @@ static int packet_snd(struct socket *soc + if (skb == NULL) + goto out_unlock; + +- skb_set_network_header(skb, reserve); ++ skb_reset_network_header(skb); + + err = -EINVAL; + if (sock->type == SOCK_DGRAM) { + offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); + if (unlikely(offset < 0)) + goto out_free; ++ } else if (reserve) { ++ skb_push(skb, reserve); + } + + /* Returns -EFAULT on error */ diff --git a/queue-4.14/series b/queue-4.14/series new file mode 100644 index 00000000000..f502f401830 --- /dev/null +++ b/queue-4.14/series @@ -0,0 +1,35 @@ +net-fix-a-bug-in-removing-queues-from-xps-map.patch +net-mlx4_core-fix-error-handling-in-mlx4_init_port_info.patch +net-sched-fix-refcnt-leak-in-the-error-path-of-tcf_vlan_init.patch +net-sched-red-avoid-hashing-null-child.patch +net-smc-check-for-missing-nlattrs-in-smc_pnetid-messages.patch +net-test-tailroom-before-appending-to-linear-skb.patch +packet-in-packet_snd-start-writing-at-link-layer-allocation.patch +sock_diag-fix-use-after-free-read-in-__sk_free.patch +tcp-purge-write-queue-in-tcp_connect_init.patch +vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch +vmxnet3-use-dma-memory-barriers-where-required.patch +hv_netvsc-fix-the-real-number-of-queues-of-non-vrss-cases.patch +hv_netvsc-rename-ind_table-to-rx_table.patch +hv_netvsc-rename-tx_send_table-to-tx_table.patch +hv_netvsc-add-initialization-of-tx_table-in-netvsc_device_add.patch +hv_netvsc-set-tx_table-to-equal-weight-after-subchannels-open.patch +hv_netvsc-netvsc_teardown_gpadl-split.patch +hv_netvsc-preserve-hw_features-on-mtu-channels-ringparam-changes.patch +hv_netvsc-empty-current-transmit-aggregation-if-flow-blocked.patch +hv_netvsc-use-the-num_online_cpus-for-channel-limit.patch +hv_netvsc-avoid-retry-on-send-during-shutdown.patch +hv_netvsc-only-wake-transmit-queue-if-link-is-up.patch +hv_netvsc-fix-error-unwind-handling-if-vmbus_open-fails.patch +hv_netvsc-cancel-subchannel-setup-before-halting-device.patch +hv_netvsc-fix-race-in-napi-poll-when-rescheduling.patch +hv_netvsc-defer-queue-selection-to-vf.patch +hv_netvsc-disable-napi-before-channel-close.patch +hv_netvsc-use-rcu-to-fix-concurrent-rx-and-queue-changes.patch +hv_netvsc-change-gpad-teardown-order-on-older-versions.patch +hv_netvsc-common-detach-logic.patch +hv_netvsc-use-windows-version-instead-of-nvsp-version-on-gpad-teardown.patch +hv_netvsc-split-netvsc_revoke_buf-and-netvsc_teardown_gpadl.patch +hv_netvsc-ensure-correct-teardown-message-sequence-order.patch +hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch +sparc-vio-use-put_device-instead-of-kfree.patch diff --git a/queue-4.14/sock_diag-fix-use-after-free-read-in-__sk_free.patch b/queue-4.14/sock_diag-fix-use-after-free-read-in-__sk_free.patch new file mode 100644 index 00000000000..377846c1d9d --- /dev/null +++ b/queue-4.14/sock_diag-fix-use-after-free-read-in-__sk_free.patch @@ -0,0 +1,128 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Eric Dumazet +Date: Fri, 18 May 2018 04:47:55 -0700 +Subject: sock_diag: fix use-after-free read in __sk_free + +From: Eric Dumazet + +[ Upstream commit 9709020c86f6bf8439ca3effc58cfca49a5de192 ] + +We must not call sock_diag_has_destroy_listeners(sk) on a socket +that has no reference on net structure. + +BUG: KASAN: use-after-free in sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline] +BUG: KASAN: use-after-free in __sk_free+0x329/0x340 net/core/sock.c:1609 +Read of size 8 at addr ffff88018a02e3a0 by task swapper/1/0 + +CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.17.0-rc5+ #54 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1b9/0x294 lib/dump_stack.c:113 + print_address_description+0x6c/0x20b mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 + sock_diag_has_destroy_listeners include/linux/sock_diag.h:75 [inline] + __sk_free+0x329/0x340 net/core/sock.c:1609 + sk_free+0x42/0x50 net/core/sock.c:1623 + sock_put include/net/sock.h:1664 [inline] + reqsk_free include/net/request_sock.h:116 [inline] + reqsk_put include/net/request_sock.h:124 [inline] + inet_csk_reqsk_queue_drop_and_put net/ipv4/inet_connection_sock.c:672 [inline] + reqsk_timer_handler+0xe27/0x10e0 net/ipv4/inet_connection_sock.c:739 + call_timer_fn+0x230/0x940 kernel/time/timer.c:1326 + expire_timers kernel/time/timer.c:1363 [inline] + __run_timers+0x79e/0xc50 kernel/time/timer.c:1666 + run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 + __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285 + invoke_softirq kernel/softirq.c:365 [inline] + irq_exit+0x1d1/0x200 kernel/softirq.c:405 + exiting_irq arch/x86/include/asm/apic.h:525 [inline] + smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052 + apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863 + +RIP: 0010:native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:54 +RSP: 0018:ffff8801d9ae7c38 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13 +RAX: dffffc0000000000 RBX: 1ffff1003b35cf8a RCX: 0000000000000000 +RDX: 1ffffffff11a30d0 RSI: 0000000000000001 RDI: ffffffff88d18680 +RBP: ffff8801d9ae7c38 R08: ffffed003b5e46c3 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 +R13: ffff8801d9ae7cf0 R14: ffffffff897bef20 R15: 0000000000000000 + arch_safe_halt arch/x86/include/asm/paravirt.h:94 [inline] + default_idle+0xc2/0x440 arch/x86/kernel/process.c:354 + arch_cpu_idle+0x10/0x20 arch/x86/kernel/process.c:345 + default_idle_call+0x6d/0x90 kernel/sched/idle.c:93 + cpuidle_idle_call kernel/sched/idle.c:153 [inline] + do_idle+0x395/0x560 kernel/sched/idle.c:262 + cpu_startup_entry+0x104/0x120 kernel/sched/idle.c:368 + start_secondary+0x426/0x5b0 arch/x86/kernel/smpboot.c:269 + secondary_startup_64+0xa5/0xb0 arch/x86/kernel/head_64.S:242 + +Allocated by task 4557: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 + kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 + kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554 + kmem_cache_zalloc include/linux/slab.h:691 [inline] + net_alloc net/core/net_namespace.c:383 [inline] + copy_net_ns+0x159/0x4c0 net/core/net_namespace.c:423 + create_new_namespaces+0x69d/0x8f0 kernel/nsproxy.c:107 + unshare_nsproxy_namespaces+0xc3/0x1f0 kernel/nsproxy.c:206 + ksys_unshare+0x708/0xf90 kernel/fork.c:2408 + __do_sys_unshare kernel/fork.c:2476 [inline] + __se_sys_unshare kernel/fork.c:2474 [inline] + __x64_sys_unshare+0x31/0x40 kernel/fork.c:2474 + do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Freed by task 69: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 + kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 + __cache_free mm/slab.c:3498 [inline] + kmem_cache_free+0x86/0x2d0 mm/slab.c:3756 + net_free net/core/net_namespace.c:399 [inline] + net_drop_ns.part.14+0x11a/0x130 net/core/net_namespace.c:406 + net_drop_ns net/core/net_namespace.c:405 [inline] + cleanup_net+0x6a1/0xb20 net/core/net_namespace.c:541 + process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145 + worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279 + kthread+0x345/0x410 kernel/kthread.c:240 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 + +The buggy address belongs to the object at ffff88018a02c140 + which belongs to the cache net_namespace of size 8832 +The buggy address is located 8800 bytes inside of + 8832-byte region [ffff88018a02c140, ffff88018a02e3c0) +The buggy address belongs to the page: +page:ffffea0006280b00 count:1 mapcount:0 mapping:ffff88018a02c140 index:0x0 compound_mapcount: 0 +flags: 0x2fffc0000008100(slab|head) +raw: 02fffc0000008100 ffff88018a02c140 0000000000000000 0000000100000001 +raw: ffffea00062a1320 ffffea0006268020 ffff8801d9bdde40 0000000000000000 +page dumped because: kasan: bad access detected + +Fixes: b922622ec6ef ("sock_diag: don't broadcast kernel sockets") +Signed-off-by: Eric Dumazet +Cc: Craig Gallek +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1595,7 +1595,7 @@ void sk_destruct(struct sock *sk) + + static void __sk_free(struct sock *sk) + { +- if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) ++ if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk))) + sock_diag_broadcast_destroy(sk); + else + sk_destruct(sk); diff --git a/queue-4.14/sparc-vio-use-put_device-instead-of-kfree.patch b/queue-4.14/sparc-vio-use-put_device-instead-of-kfree.patch new file mode 100644 index 00000000000..d8831d1e62e --- /dev/null +++ b/queue-4.14/sparc-vio-use-put_device-instead-of-kfree.patch @@ -0,0 +1,31 @@ +From 00ad691ab140b54ab9f5de5e74cb994f552e8124 Mon Sep 17 00:00:00 2001 +From: Arvind Yadav +Date: Wed, 25 Apr 2018 20:26:14 +0530 +Subject: sparc: vio: use put_device() instead of kfree() + +From: Arvind Yadav + +[ Upstream commit 00ad691ab140b54ab9f5de5e74cb994f552e8124 ] + +Never directly free @dev after calling device_register(), even +if it returned an error. Always use put_device() to give up the +reference initialized. + +Signed-off-by: Arvind Yadav +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/kernel/vio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/sparc/kernel/vio.c ++++ b/arch/sparc/kernel/vio.c +@@ -403,7 +403,7 @@ static struct vio_dev *vio_create_one(st + if (err) { + printk(KERN_ERR "VIO: Could not register device %s, err=%d\n", + dev_name(&vdev->dev), err); +- kfree(vdev); ++ put_device(&vdev->dev); + return NULL; + } + if (vdev->dp) diff --git a/queue-4.14/tcp-purge-write-queue-in-tcp_connect_init.patch b/queue-4.14/tcp-purge-write-queue-in-tcp_connect_init.patch new file mode 100644 index 00000000000..ddae6673f3e --- /dev/null +++ b/queue-4.14/tcp-purge-write-queue-in-tcp_connect_init.patch @@ -0,0 +1,88 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: Eric Dumazet +Date: Mon, 14 May 2018 21:14:26 -0700 +Subject: tcp: purge write queue in tcp_connect_init() + +From: Eric Dumazet + +[ Upstream commit 7f582b248d0a86bae5788c548d7bb5bca6f7691a ] + +syzkaller found a reliable way to crash the host, hitting a BUG() +in __tcp_retransmit_skb() + +Malicous MSG_FASTOPEN is the root cause. We need to purge write queue +in tcp_connect_init() at the point we init snd_una/write_seq. + +This patch also replaces the BUG() by a less intrusive WARN_ON_ONCE() + +kernel BUG at net/ipv4/tcp_output.c:2837! +invalid opcode: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 0 PID: 5276 Comm: syz-executor0 Not tainted 4.17.0-rc3+ #51 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:__tcp_retransmit_skb+0x2992/0x2eb0 net/ipv4/tcp_output.c:2837 +RSP: 0000:ffff8801dae06ff8 EFLAGS: 00010206 +RAX: ffff8801b9fe61c0 RBX: 00000000ffc18a16 RCX: ffffffff864e1a49 +RDX: 0000000000000100 RSI: ffffffff864e2e12 RDI: 0000000000000005 +RBP: ffff8801dae073a0 R08: ffff8801b9fe61c0 R09: ffffed0039c40dd2 +R10: ffffed0039c40dd2 R11: ffff8801ce206e93 R12: 00000000421eeaad +R13: ffff8801ce206d4e R14: ffff8801ce206cc0 R15: ffff8801cd4f4a80 +FS: 0000000000000000(0000) GS:ffff8801dae00000(0063) knlGS:00000000096bc900 +CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 +CR2: 0000000020000000 CR3: 00000001c47b6000 CR4: 00000000001406f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + tcp_retransmit_skb+0x2e/0x250 net/ipv4/tcp_output.c:2923 + tcp_retransmit_timer+0xc50/0x3060 net/ipv4/tcp_timer.c:488 + tcp_write_timer_handler+0x339/0x960 net/ipv4/tcp_timer.c:573 + tcp_write_timer+0x111/0x1d0 net/ipv4/tcp_timer.c:593 + call_timer_fn+0x230/0x940 kernel/time/timer.c:1326 + expire_timers kernel/time/timer.c:1363 [inline] + __run_timers+0x79e/0xc50 kernel/time/timer.c:1666 + run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 + __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285 + invoke_softirq kernel/softirq.c:365 [inline] + irq_exit+0x1d1/0x200 kernel/softirq.c:405 + exiting_irq arch/x86/include/asm/apic.h:525 [inline] + smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052 + apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863 + +Fixes: cf60af03ca4e ("net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)") +Signed-off-by: Eric Dumazet +Cc: Yuchung Cheng +Cc: Neal Cardwell +Reported-by: syzbot +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2814,8 +2814,10 @@ int __tcp_retransmit_skb(struct sock *sk + return -EBUSY; + + if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { +- if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) +- BUG(); ++ if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { ++ WARN_ON_ONCE(1); ++ return -EINVAL; ++ } + if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) + return -ENOMEM; + } +@@ -3312,6 +3314,7 @@ static void tcp_connect_init(struct sock + sock_reset_flag(sk, SOCK_DONE); + tp->snd_wnd = 0; + tcp_init_wl(tp, 0); ++ tcp_write_queue_purge(sk); + tp->snd_una = tp->write_seq; + tp->snd_sml = tp->write_seq; + tp->snd_up = tp->write_seq; diff --git a/queue-4.14/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch b/queue-4.14/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch new file mode 100644 index 00000000000..e741b8e8d27 --- /dev/null +++ b/queue-4.14/vmxnet3-set-the-dma-mask-before-the-first-dma-map-operation.patch @@ -0,0 +1,134 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: "hpreg@vmware.com" +Date: Mon, 14 May 2018 08:14:34 -0400 +Subject: vmxnet3: set the DMA mask before the first DMA map operation + +From: "hpreg@vmware.com" + +[ Upstream commit 61aeecea40afb2b89933e27cd4adb10fc2e75cfd ] + +The DMA mask must be set before, not after, the first DMA map operation, or +the first DMA map operation could in theory fail on some systems. + +Fixes: b0eb57cb97e78 ("VMXNET3: Add support for virtual IOMMU") +Signed-off-by: Regis Duchesne +Acked-by: Ronak Doshi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 50 +++++++++++++++++++------------------- + 1 file changed, 25 insertions(+), 25 deletions(-) + +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -2675,7 +2675,7 @@ vmxnet3_set_mac_addr(struct net_device * + /* ==================== initialization and cleanup routines ============ */ + + static int +-vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64) ++vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter) + { + int err; + unsigned long mmio_start, mmio_len; +@@ -2687,30 +2687,12 @@ vmxnet3_alloc_pci_resources(struct vmxne + return err; + } + +- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { +- if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) { +- dev_err(&pdev->dev, +- "pci_set_consistent_dma_mask failed\n"); +- err = -EIO; +- goto err_set_mask; +- } +- *dma64 = true; +- } else { +- if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) { +- dev_err(&pdev->dev, +- "pci_set_dma_mask failed\n"); +- err = -EIO; +- goto err_set_mask; +- } +- *dma64 = false; +- } +- + err = pci_request_selected_regions(pdev, (1 << 2) - 1, + vmxnet3_driver_name); + if (err) { + dev_err(&pdev->dev, + "Failed to request region for adapter: error %d\n", err); +- goto err_set_mask; ++ goto err_enable_device; + } + + pci_set_master(pdev); +@@ -2738,7 +2720,7 @@ err_bar1: + iounmap(adapter->hw_addr0); + err_ioremap: + pci_release_selected_regions(pdev, (1 << 2) - 1); +-err_set_mask: ++err_enable_device: + pci_disable_device(pdev); + return err; + } +@@ -3243,7 +3225,7 @@ vmxnet3_probe_device(struct pci_dev *pde + #endif + }; + int err; +- bool dma64 = false; /* stupid gcc */ ++ bool dma64; + u32 ver; + struct net_device *netdev; + struct vmxnet3_adapter *adapter; +@@ -3289,6 +3271,24 @@ vmxnet3_probe_device(struct pci_dev *pde + adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE; + adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE; + ++ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { ++ if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) { ++ dev_err(&pdev->dev, ++ "pci_set_consistent_dma_mask failed\n"); ++ err = -EIO; ++ goto err_set_mask; ++ } ++ dma64 = true; ++ } else { ++ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) { ++ dev_err(&pdev->dev, ++ "pci_set_dma_mask failed\n"); ++ err = -EIO; ++ goto err_set_mask; ++ } ++ dma64 = false; ++ } ++ + spin_lock_init(&adapter->cmd_lock); + adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter, + sizeof(struct vmxnet3_adapter), +@@ -3296,7 +3296,7 @@ vmxnet3_probe_device(struct pci_dev *pde + if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) { + dev_err(&pdev->dev, "Failed to map dma\n"); + err = -EFAULT; +- goto err_dma_map; ++ goto err_set_mask; + } + adapter->shared = dma_alloc_coherent( + &adapter->pdev->dev, +@@ -3347,7 +3347,7 @@ vmxnet3_probe_device(struct pci_dev *pde + } + #endif /* VMXNET3_RSS */ + +- err = vmxnet3_alloc_pci_resources(adapter, &dma64); ++ err = vmxnet3_alloc_pci_resources(adapter); + if (err < 0) + goto err_alloc_pci; + +@@ -3493,7 +3493,7 @@ err_alloc_queue_desc: + err_alloc_shared: + dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa, + sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE); +-err_dma_map: ++err_set_mask: + free_netdev(netdev); + return err; + } diff --git a/queue-4.14/vmxnet3-use-dma-memory-barriers-where-required.patch b/queue-4.14/vmxnet3-use-dma-memory-barriers-where-required.patch new file mode 100644 index 00000000000..d6418f9f723 --- /dev/null +++ b/queue-4.14/vmxnet3-use-dma-memory-barriers-where-required.patch @@ -0,0 +1,73 @@ +From foo@baz Tue May 22 20:10:42 CEST 2018 +From: "hpreg@vmware.com" +Date: Mon, 14 May 2018 08:14:49 -0400 +Subject: vmxnet3: use DMA memory barriers where required + +From: "hpreg@vmware.com" + +[ Upstream commit f3002c1374fb2367c9d8dbb28852791ef90d2bac ] + +The gen bits must be read first from (resp. written last to) DMA memory. +The proper way to enforce this on Linux is to call dma_rmb() (resp. +dma_wmb()). + +Signed-off-by: Regis Duchesne +Acked-by: Ronak Doshi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx + + gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; + while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) { ++ /* Prevent any &gdesc->tcd field from being (speculatively) ++ * read before (&gdesc->tcd)->gen is read. ++ */ ++ dma_rmb(); ++ + completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX( + &gdesc->tcd), tq, adapter->pdev, + adapter); +@@ -1099,6 +1104,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, str + gdesc->txd.tci = skb_vlan_tag_get(skb); + } + ++ /* Ensure that the write to (&gdesc->txd)->gen will be observed after ++ * all other writes to &gdesc->txd. ++ */ ++ dma_wmb(); ++ + /* finally flips the GEN bit of the SOP desc. */ + gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ + VMXNET3_TXD_GEN); +@@ -1286,6 +1296,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx + */ + break; + } ++ ++ /* Prevent any rcd field from being (speculatively) read before ++ * rcd->gen is read. ++ */ ++ dma_rmb(); ++ + BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 && + rcd->rqID != rq->dataRingQid); + idx = rcd->rxdIdx; +@@ -1515,6 +1531,12 @@ rcd_done: + ring->next2comp = idx; + num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); + ring = rq->rx_ring + ring_idx; ++ ++ /* Ensure that the writes to rxd->gen bits will be observed ++ * after all other writes to rxd objects. ++ */ ++ dma_wmb(); ++ + while (num_to_alloc) { + vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, + &rxCmdDesc);