From a7b0967db425d27b349d436bf83e4a18143f6fe3 Mon Sep 17 00:00:00 2001 From: Michael Tremer Date: Tue, 15 Dec 2015 18:30:56 +0000 Subject: [PATCH] linux: Backport Hyper-V network driver Signed-off-by: Michael Tremer --- lfs/linux | 28 +- ...rv-Get-rid-of-the-rndis_filter_packe.patch | 129 +++ ...rt-for-virtual-Receive-Side-Scaling-.patch | 917 ++++++++++++++++++ ...-net-hyperv-Cleanup-the-receive-path.patch | 112 +++ ...hyperv-Remove-recv_pkt_list-and-lock.patch | 384 ++++++++ ...rv-Cleanup-the-netvsc-receive-callba.patch | 101 ++ ...mplify-the-send_completion-variables.patch | 105 ++ ...t-NetVSP-versions-to-auto-negotiatio.patch | 171 ++++ ...e-sendbuf-mechanism-on-the-send-path.patch | 407 ++++++++ ...netvsc-build-without-CONFIG_SYSFS-fl.patch | 42 + ...net-hyperv-Enable-scatter-gather-I-O.patch | 212 ++++ ...ers-net-hyperv-Cleanup-the-send-path.patch | 266 +++++ .../0006-net-get-rid-of-SET_ETHTOOL_OPS.patch | 44 + ...t-hyperv-Enable-offloads-on-the-host.patch | 196 ++++ ...ash-value-into-RNDIS-Per-packet-info.patch | 93 ++ ...rv-Enable-receive-side-IP-checksum-o.patch | 147 +++ ...ent-cut-n-paste-error-in-send-path-t.patch | 32 + ...rv-Enable-send-side-checksum-offload.patch | 140 +++ ...error-return-code-in-netvsc_init_buf.patch | 34 + ...net-hyperv-Enable-large-send-offload.patch | 153 +++ ...yperv-Fix-a-bug-in-netvsc_start_xmit.patch | 47 + ...-receive-buffer-size-for-legacy-hos.patch} | 14 +- ...0011-hyperv-Fix-a-bug-in-netvsc_send.patch | 68 ++ ...rv-Allocate-memory-for-all-possible-.patch | 34 + ...rv-Negotiate-suitable-ndis-version-f.patch | 32 + ...t-hyperv-Address-UDP-checksum-issues.patch | 111 +++ ...erv-Properly-handle-checksum-offload.patch | 42 + ...rt-for-virtual-Receive-Side-Scaling-.patch | 917 ++++++++++++++++++ ...hyperv-Remove-recv_pkt_list-and-lock.patch | 384 ++++++++ ...mplify-the-send_completion-variables.patch | 105 ++ ...e-sendbuf-mechanism-on-the-send-path.patch | 407 ++++++++ ...netvsc-build-without-CONFIG_SYSFS-fl.patch | 42 + ...ash-value-into-RNDIS-Per-packet-info.patch | 93 ++ ...ent-cut-n-paste-error-in-send-path-t.patch | 32 + ...error-return-code-in-netvsc_init_buf.patch | 34 + ...0024-hyperv-Fix-a-bug-in-netvsc_send.patch | 68 ++ ...bus-Support-per-channel-driver-state.patch | 51 + 37 files changed, 6187 insertions(+), 7 deletions(-) create mode 100644 src/patches/linux/0001-Drivers-net-hyperv-Get-rid-of-the-rndis_filter_packe.patch create mode 100644 src/patches/linux/0001-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch create mode 100644 src/patches/linux/0002-Drivers-net-hyperv-Cleanup-the-receive-path.patch create mode 100644 src/patches/linux/0002-hyperv-Remove-recv_pkt_list-and-lock.patch create mode 100644 src/patches/linux/0003-Drivers-net-hyperv-Cleanup-the-netvsc-receive-callba.patch create mode 100644 src/patches/linux/0003-hyperv-Simplify-the-send_completion-variables.patch create mode 100644 src/patches/linux/0004-hyperv-Add-latest-NetVSP-versions-to-auto-negotiatio.patch create mode 100644 src/patches/linux/0004-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch create mode 100644 src/patches/linux/0005-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch create mode 100644 src/patches/linux/0005-Drivers-net-hyperv-Enable-scatter-gather-I-O.patch create mode 100644 src/patches/linux/0006-Drivers-net-hyperv-Cleanup-the-send-path.patch create mode 100644 src/patches/linux/0006-net-get-rid-of-SET_ETHTOOL_OPS.patch create mode 100644 src/patches/linux/0007-Drivers-net-hyperv-Enable-offloads-on-the-host.patch create mode 100644 src/patches/linux/0007-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch create mode 100644 src/patches/linux/0008-Drivers-net-hyperv-Enable-receive-side-IP-checksum-o.patch create mode 100644 src/patches/linux/0008-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch create mode 100644 src/patches/linux/0009-Drivers-net-hyperv-Enable-send-side-checksum-offload.patch create mode 100644 src/patches/linux/0009-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch create mode 100644 src/patches/linux/0010-Drivers-net-hyperv-Enable-large-send-offload.patch create mode 100644 src/patches/linux/0010-hyperv-Fix-a-bug-in-netvsc_start_xmit.patch rename src/patches/{linux-3.14.x-hyperv-2008-fix.patch => linux/0011-hyperv-Change-the-receive-buffer-size-for-legacy-hos.patch} (81%) create mode 100644 src/patches/linux/0011-hyperv-Fix-a-bug-in-netvsc_send.patch create mode 100644 src/patches/linux/0012-Drivers-net-hyperv-Allocate-memory-for-all-possible-.patch create mode 100644 src/patches/linux/0013-Drivers-net-hyperv-Negotiate-suitable-ndis-version-f.patch create mode 100644 src/patches/linux/0014-Drivers-net-hyperv-Address-UDP-checksum-issues.patch create mode 100644 src/patches/linux/0015-hyperv-Properly-handle-checksum-offload.patch create mode 100644 src/patches/linux/0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch create mode 100644 src/patches/linux/0017-hyperv-Remove-recv_pkt_list-and-lock.patch create mode 100644 src/patches/linux/0018-hyperv-Simplify-the-send_completion-variables.patch create mode 100644 src/patches/linux/0019-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch create mode 100644 src/patches/linux/0020-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch create mode 100644 src/patches/linux/0021-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch create mode 100644 src/patches/linux/0022-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch create mode 100644 src/patches/linux/0023-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch create mode 100644 src/patches/linux/0024-hyperv-Fix-a-bug-in-netvsc_send.patch create mode 100644 src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch diff --git a/lfs/linux b/lfs/linux index e1d6a4ecb9..7f9c36cebf 100644 --- a/lfs/linux +++ b/lfs/linux @@ -171,8 +171,32 @@ endif # update the queued trim blacklist from kernel 4.2rc1 cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-3.14.43_new_qtrim_blacklist.patch - # HyperV 2008 patch - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-3.14.x-hyperv-2008-fix.patch + # Hyper-V patches + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0001-Drivers-net-hyperv-Get-rid-of-the-rndis_filter_packe.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0002-Drivers-net-hyperv-Cleanup-the-receive-path.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0003-Drivers-net-hyperv-Cleanup-the-netvsc-receive-callba.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0004-hyperv-Add-latest-NetVSP-versions-to-auto-negotiatio.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0005-Drivers-net-hyperv-Enable-scatter-gather-I-O.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0006-Drivers-net-hyperv-Cleanup-the-send-path.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0007-Drivers-net-hyperv-Enable-offloads-on-the-host.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0008-Drivers-net-hyperv-Enable-receive-side-IP-checksum-o.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0009-Drivers-net-hyperv-Enable-send-side-checksum-offload.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0010-Drivers-net-hyperv-Enable-large-send-offload.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0011-hyperv-Change-the-receive-buffer-size-for-legacy-hos.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0012-Drivers-net-hyperv-Allocate-memory-for-all-possible-.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0013-Drivers-net-hyperv-Negotiate-suitable-ndis-version-f.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0014-Drivers-net-hyperv-Address-UDP-checksum-issues.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0015-hyperv-Properly-handle-checksum-offload.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0017-hyperv-Remove-recv_pkt_list-and-lock.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0018-hyperv-Simplify-the-send_completion-variables.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0019-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0020-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0021-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0022-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0023-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0024-hyperv-Fix-a-bug-in-netvsc_send.patch + cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch # fix empty symbol crc's cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-genksyms_fix_typeof_handling.patch diff --git a/src/patches/linux/0001-Drivers-net-hyperv-Get-rid-of-the-rndis_filter_packe.patch b/src/patches/linux/0001-Drivers-net-hyperv-Get-rid-of-the-rndis_filter_packe.patch new file mode 100644 index 0000000000..2e0039217e --- /dev/null +++ b/src/patches/linux/0001-Drivers-net-hyperv-Get-rid-of-the-rndis_filter_packe.patch @@ -0,0 +1,129 @@ +From f3f885fa684ff18fa4d223dc22b782f5e5d32560 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sun, 16 Feb 2014 16:38:43 -0800 +Subject: [PATCH 01/25] Drivers: net: hyperv: Get rid of the + rndis_filter_packet structure + +This structure is redundant; get rid of it make the code little more efficient - +get rid of the unnecessary indirection. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 6 ------ + drivers/net/hyperv/netvsc_drv.c | 2 +- + drivers/net/hyperv/rndis_filter.c | 41 +++------------------------------------ + 3 files changed, 4 insertions(+), 45 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 7b594ce3f21d..7645ba38bde8 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -846,12 +846,6 @@ struct rndis_message { + }; + + +-struct rndis_filter_packet { +- void *completion_ctx; +- void (*completion)(void *context); +- struct rndis_message msg; +-}; +- + /* Handy macros */ + + /* get the size of an RNDIS message. Pass in the message type, */ +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 3c1c33ceffba..28020f83ba6f 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -156,7 +156,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + /* Allocate a netvsc packet based on # of frags. */ + packet = kzalloc(sizeof(struct hv_netvsc_packet) + + (num_pages * sizeof(struct hv_page_buffer)) + +- sizeof(struct rndis_filter_packet) + ++ sizeof(struct rndis_message) + + NDIS_VLAN_PPI_SIZE, GFP_ATOMIC); + if (!packet) { + /* out of memory, drop packet */ +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index b54fd257652b..6a9f6021f09c 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -58,9 +58,6 @@ struct rndis_request { + u8 request_ext[RNDIS_EXT_LEN]; + }; + +-static void rndis_filter_send_completion(void *ctx); +- +- + static struct rndis_device *get_rndis_device(void) + { + struct rndis_device *device; +@@ -297,7 +294,7 @@ static void rndis_filter_receive_response(struct rndis_device *dev, + "rndis response buffer overflow " + "detected (size %u max %zu)\n", + resp->msg_len, +- sizeof(struct rndis_filter_packet)); ++ sizeof(struct rndis_message)); + + if (resp->ndis_msg_type == + RNDIS_MSG_RESET_C) { +@@ -917,17 +914,14 @@ int rndis_filter_close(struct hv_device *dev) + int rndis_filter_send(struct hv_device *dev, + struct hv_netvsc_packet *pkt) + { +- int ret; +- struct rndis_filter_packet *filter_pkt; + struct rndis_message *rndis_msg; + struct rndis_packet *rndis_pkt; + u32 rndis_msg_size; + bool isvlan = pkt->vlan_tci & VLAN_TAG_PRESENT; + + /* Add the rndis header */ +- filter_pkt = (struct rndis_filter_packet *)pkt->extension; ++ rndis_msg = (struct rndis_message *)pkt->extension; + +- rndis_msg = &filter_pkt->msg; + rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); + if (isvlan) + rndis_msg_size += NDIS_VLAN_PPI_SIZE; +@@ -980,34 +974,5 @@ int rndis_filter_send(struct hv_device *dev, + pkt->page_buf[1].len = rndis_msg_size - pkt->page_buf[0].len; + } + +- /* Save the packet send completion and context */ +- filter_pkt->completion = pkt->completion.send.send_completion; +- filter_pkt->completion_ctx = +- pkt->completion.send.send_completion_ctx; +- +- /* Use ours */ +- pkt->completion.send.send_completion = rndis_filter_send_completion; +- pkt->completion.send.send_completion_ctx = filter_pkt; +- +- ret = netvsc_send(dev, pkt); +- if (ret != 0) { +- /* +- * Reset the completion to originals to allow retries from +- * above +- */ +- pkt->completion.send.send_completion = +- filter_pkt->completion; +- pkt->completion.send.send_completion_ctx = +- filter_pkt->completion_ctx; +- } +- +- return ret; +-} +- +-static void rndis_filter_send_completion(void *ctx) +-{ +- struct rndis_filter_packet *filter_pkt = ctx; +- +- /* Pass it back to the original handler */ +- filter_pkt->completion(filter_pkt->completion_ctx); ++ return netvsc_send(dev, pkt); + } +-- +2.4.3 + diff --git a/src/patches/linux/0001-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch b/src/patches/linux/0001-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch new file mode 100644 index 0000000000..52c4da6561 --- /dev/null +++ b/src/patches/linux/0001-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch @@ -0,0 +1,917 @@ +From 5b54dac856cb5bd6f33f4159012773e4a33704f7 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Mon, 21 Apr 2014 10:20:28 -0700 +Subject: [PATCH 01/11] hyperv: Add support for virtual Receive Side Scaling + (vRSS) + +This feature allows multiple channels to be used by each virtual NIC. +It is available on Hyper-V host 2012 R2. + +Signed-off-by: Haiyang Zhang +Reviewed-by: K. Y. Srinivasan +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 110 +++++++++++++++++++++- + drivers/net/hyperv/netvsc.c | 136 +++++++++++++++++++++------ + drivers/net/hyperv/netvsc_drv.c | 103 ++++++++++++++++++++- + drivers/net/hyperv/rndis_filter.c | 189 +++++++++++++++++++++++++++++++++++++- + 4 files changed, 504 insertions(+), 34 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index d18f711d0b0c..57eb3f906d64 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -28,6 +28,96 @@ + #include + #include + ++/* RSS related */ ++#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */ ++#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */ ++ ++#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 ++#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 ++ ++#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 ++#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 ++ ++struct ndis_obj_header { ++ u8 type; ++ u8 rev; ++ u16 size; ++} __packed; ++ ++/* ndis_recv_scale_cap/cap_flag */ ++#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 ++#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000 ++#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000 ++#define NDIS_RSS_CAPS_USING_MSI_X 0x08000000 ++#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000 ++#define NDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000 ++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100 ++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200 ++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400 ++ ++struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */ ++ struct ndis_obj_header hdr; ++ u32 cap_flag; ++ u32 num_int_msg; ++ u32 num_recv_que; ++ u16 num_indirect_tabent; ++} __packed; ++ ++ ++/* ndis_recv_scale_param flags */ ++#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001 ++#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002 ++#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004 ++#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008 ++#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010 ++ ++/* Hash info bits */ ++#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001 ++#define NDIS_HASH_IPV4 0x00000100 ++#define NDIS_HASH_TCP_IPV4 0x00000200 ++#define NDIS_HASH_IPV6 0x00000400 ++#define NDIS_HASH_IPV6_EX 0x00000800 ++#define NDIS_HASH_TCP_IPV6 0x00001000 ++#define NDIS_HASH_TCP_IPV6_EX 0x00002000 ++ ++#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) ++#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 ++ ++#define ITAB_NUM 128 ++#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 ++extern u8 netvsc_hash_key[]; ++ ++struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ ++ struct ndis_obj_header hdr; ++ ++ /* Qualifies the rest of the information */ ++ u16 flag; ++ ++ /* The base CPU number to do receive processing. not used */ ++ u16 base_cpu_number; ++ ++ /* This describes the hash function and type being enabled */ ++ u32 hashinfo; ++ ++ /* The size of indirection table array */ ++ u16 indirect_tabsize; ++ ++ /* The offset of the indirection table from the beginning of this ++ * structure ++ */ ++ u32 indirect_taboffset; ++ ++ /* The size of the hash secret key */ ++ u16 hashkey_size; ++ ++ /* The offset of the secret key from the beginning of this structure */ ++ u32 kashkey_offset; ++ ++ u32 processor_masks_offset; ++ u32 num_processor_masks; ++ u32 processor_masks_entry_size; ++}; ++ + /* Fwd declaration */ + struct hv_netvsc_packet; + struct ndis_tcp_ip_checksum_info; +@@ -39,6 +129,8 @@ struct xferpage_packet { + + /* # of netvsc packets this xfer packet contains */ + u32 count; ++ ++ struct vmbus_channel *channel; + }; + + /* +@@ -54,6 +146,9 @@ struct hv_netvsc_packet { + bool is_data_pkt; + u16 vlan_tci; + ++ u16 q_idx; ++ struct vmbus_channel *channel; ++ + /* + * Valid only for receives when we break a xfer page packet + * into multiple netvsc packets +@@ -120,6 +215,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, + int netvsc_recv_callback(struct hv_device *device_obj, + struct hv_netvsc_packet *packet, + struct ndis_tcp_ip_checksum_info *csum_info); ++void netvsc_channel_cb(void *context); + int rndis_filter_open(struct hv_device *dev); + int rndis_filter_close(struct hv_device *dev); + int rndis_filter_device_add(struct hv_device *dev, +@@ -522,6 +618,8 @@ struct nvsp_message { + + #define NETVSC_PACKET_SIZE 2048 + ++#define VRSS_SEND_TAB_SIZE 16 ++ + /* Per netvsc channel-specific */ + struct netvsc_device { + struct hv_device *dev; +@@ -555,10 +653,20 @@ struct netvsc_device { + + struct net_device *ndev; + ++ struct vmbus_channel *chn_table[NR_CPUS]; ++ u32 send_table[VRSS_SEND_TAB_SIZE]; ++ u32 num_chn; ++ atomic_t queue_sends[NR_CPUS]; ++ + /* Holds rndis device info */ + void *extension; +- /* The recive buffer for this device */ ++ ++ int ring_size; ++ ++ /* The primary channel callback buffer */ + unsigned char cb_buffer[NETVSC_PACKET_SIZE]; ++ /* The sub channel callback buffer */ ++ unsigned char *sub_cb_buf; + }; + + /* NdisInitialize message */ +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index f7629ecefa84..e7e77f12bc38 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -422,6 +422,9 @@ int netvsc_device_remove(struct hv_device *device) + kfree(netvsc_packet); + } + ++ if (net_device->sub_cb_buf) ++ vfree(net_device->sub_cb_buf); ++ + kfree(net_device); + return 0; + } +@@ -461,7 +464,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + (nvsp_packet->hdr.msg_type == + NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) || + (nvsp_packet->hdr.msg_type == +- NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) { ++ NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) || ++ (nvsp_packet->hdr.msg_type == ++ NVSP_MSG5_TYPE_SUBCHANNEL)) { + /* Copy the response back */ + memcpy(&net_device->channel_init_pkt, nvsp_packet, + sizeof(struct nvsp_message)); +@@ -469,28 +474,37 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + } else if (nvsp_packet->hdr.msg_type == + NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) { + int num_outstanding_sends; ++ u16 q_idx = 0; ++ struct vmbus_channel *channel = device->channel; ++ int queue_sends; + + /* Get the send context */ + nvsc_packet = (struct hv_netvsc_packet *)(unsigned long) + packet->trans_id; + + /* Notify the layer above us */ +- if (nvsc_packet) ++ if (nvsc_packet) { ++ q_idx = nvsc_packet->q_idx; ++ channel = nvsc_packet->channel; + nvsc_packet->completion.send.send_completion( + nvsc_packet->completion.send. + send_completion_ctx); ++ } + + num_outstanding_sends = + atomic_dec_return(&net_device->num_outstanding_sends); ++ queue_sends = atomic_dec_return(&net_device-> ++ queue_sends[q_idx]); + + if (net_device->destroy && num_outstanding_sends == 0) + wake_up(&net_device->wait_drain); + +- if (netif_queue_stopped(ndev) && !net_device->start_remove && +- (hv_ringbuf_avail_percent(&device->channel->outbound) +- > RING_AVAIL_PERCENT_HIWATER || +- num_outstanding_sends < 1)) +- netif_wake_queue(ndev); ++ if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && ++ !net_device->start_remove && ++ (hv_ringbuf_avail_percent(&channel->outbound) > ++ RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) ++ netif_tx_wake_queue(netdev_get_tx_queue( ++ ndev, q_idx)); + } else { + netdev_err(ndev, "Unknown send completion packet type- " + "%d received!!\n", nvsp_packet->hdr.msg_type); +@@ -505,6 +519,7 @@ int netvsc_send(struct hv_device *device, + int ret = 0; + struct nvsp_message sendMessage; + struct net_device *ndev; ++ struct vmbus_channel *out_channel = NULL; + u64 req_id; + + net_device = get_outbound_net_device(device); +@@ -531,15 +546,20 @@ int netvsc_send(struct hv_device *device, + else + req_id = 0; + ++ out_channel = net_device->chn_table[packet->q_idx]; ++ if (out_channel == NULL) ++ out_channel = device->channel; ++ packet->channel = out_channel; ++ + if (packet->page_buf_cnt) { +- ret = vmbus_sendpacket_pagebuffer(device->channel, ++ ret = vmbus_sendpacket_pagebuffer(out_channel, + packet->page_buf, + packet->page_buf_cnt, + &sendMessage, + sizeof(struct nvsp_message), + req_id); + } else { +- ret = vmbus_sendpacket(device->channel, &sendMessage, ++ ret = vmbus_sendpacket(out_channel, &sendMessage, + sizeof(struct nvsp_message), + req_id, + VM_PKT_DATA_INBAND, +@@ -548,17 +568,24 @@ int netvsc_send(struct hv_device *device, + + if (ret == 0) { + atomic_inc(&net_device->num_outstanding_sends); +- if (hv_ringbuf_avail_percent(&device->channel->outbound) < ++ atomic_inc(&net_device->queue_sends[packet->q_idx]); ++ ++ if (hv_ringbuf_avail_percent(&out_channel->outbound) < + RING_AVAIL_PERCENT_LOWATER) { +- netif_stop_queue(ndev); ++ netif_tx_stop_queue(netdev_get_tx_queue( ++ ndev, packet->q_idx)); ++ + if (atomic_read(&net_device-> +- num_outstanding_sends) < 1) +- netif_wake_queue(ndev); ++ queue_sends[packet->q_idx]) < 1) ++ netif_tx_wake_queue(netdev_get_tx_queue( ++ ndev, packet->q_idx)); + } + } else if (ret == -EAGAIN) { +- netif_stop_queue(ndev); +- if (atomic_read(&net_device->num_outstanding_sends) < 1) { +- netif_wake_queue(ndev); ++ netif_tx_stop_queue(netdev_get_tx_queue( ++ ndev, packet->q_idx)); ++ if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) { ++ netif_tx_wake_queue(netdev_get_tx_queue( ++ ndev, packet->q_idx)); + ret = -ENOSPC; + } + } else { +@@ -570,6 +597,7 @@ int netvsc_send(struct hv_device *device, + } + + static void netvsc_send_recv_completion(struct hv_device *device, ++ struct vmbus_channel *channel, + struct netvsc_device *net_device, + u64 transaction_id, u32 status) + { +@@ -587,7 +615,7 @@ static void netvsc_send_recv_completion(struct hv_device *device, + + retry_send_cmplt: + /* Send the completion */ +- ret = vmbus_sendpacket(device->channel, &recvcompMessage, ++ ret = vmbus_sendpacket(channel, &recvcompMessage, + sizeof(struct nvsp_message), transaction_id, + VM_PKT_COMP, 0); + if (ret == 0) { +@@ -618,6 +646,7 @@ static void netvsc_receive_completion(void *context) + { + struct hv_netvsc_packet *packet = context; + struct hv_device *device = packet->device; ++ struct vmbus_channel *channel; + struct netvsc_device *net_device; + u64 transaction_id = 0; + bool fsend_receive_comp = false; +@@ -649,6 +678,7 @@ static void netvsc_receive_completion(void *context) + */ + if (packet->xfer_page_pkt->count == 0) { + fsend_receive_comp = true; ++ channel = packet->xfer_page_pkt->channel; + transaction_id = packet->completion.recv.recv_completion_tid; + status = packet->xfer_page_pkt->status; + list_add_tail(&packet->xfer_page_pkt->list_ent, +@@ -662,12 +692,13 @@ static void netvsc_receive_completion(void *context) + + /* Send a receive completion for the xfer page packet */ + if (fsend_receive_comp) +- netvsc_send_recv_completion(device, net_device, transaction_id, +- status); ++ netvsc_send_recv_completion(device, channel, net_device, ++ transaction_id, status); + + } + + static void netvsc_receive(struct netvsc_device *net_device, ++ struct vmbus_channel *channel, + struct hv_device *device, + struct vmpacket_descriptor *packet) + { +@@ -748,7 +779,7 @@ static void netvsc_receive(struct netvsc_device *net_device, + spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, + flags); + +- netvsc_send_recv_completion(device, net_device, ++ netvsc_send_recv_completion(device, channel, net_device, + vmxferpage_packet->d.trans_id, + NVSP_STAT_FAIL); + +@@ -759,6 +790,7 @@ static void netvsc_receive(struct netvsc_device *net_device, + xferpage_packet = (struct xferpage_packet *)listHead.next; + list_del(&xferpage_packet->list_ent); + xferpage_packet->status = NVSP_STAT_SUCCESS; ++ xferpage_packet->channel = channel; + + /* This is how much we can satisfy */ + xferpage_packet->count = count - 1; +@@ -800,10 +832,45 @@ static void netvsc_receive(struct netvsc_device *net_device, + + } + +-static void netvsc_channel_cb(void *context) ++ ++static void netvsc_send_table(struct hv_device *hdev, ++ struct vmpacket_descriptor *vmpkt) ++{ ++ struct netvsc_device *nvscdev; ++ struct net_device *ndev; ++ struct nvsp_message *nvmsg; ++ int i; ++ u32 count, *tab; ++ ++ nvscdev = get_outbound_net_device(hdev); ++ if (!nvscdev) ++ return; ++ ndev = nvscdev->ndev; ++ ++ nvmsg = (struct nvsp_message *)((unsigned long)vmpkt + ++ (vmpkt->offset8 << 3)); ++ ++ if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE) ++ return; ++ ++ count = nvmsg->msg.v5_msg.send_table.count; ++ if (count != VRSS_SEND_TAB_SIZE) { ++ netdev_err(ndev, "Received wrong send-table size:%u\n", count); ++ return; ++ } ++ ++ tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table + ++ nvmsg->msg.v5_msg.send_table.offset); ++ ++ for (i = 0; i < count; i++) ++ nvscdev->send_table[i] = tab[i]; ++} ++ ++void netvsc_channel_cb(void *context) + { + int ret; +- struct hv_device *device = context; ++ struct vmbus_channel *channel = (struct vmbus_channel *)context; ++ struct hv_device *device; + struct netvsc_device *net_device; + u32 bytes_recvd; + u64 request_id; +@@ -812,14 +879,19 @@ static void netvsc_channel_cb(void *context) + int bufferlen = NETVSC_PACKET_SIZE; + struct net_device *ndev; + ++ if (channel->primary_channel != NULL) ++ device = channel->primary_channel->device_obj; ++ else ++ device = channel->device_obj; ++ + net_device = get_inbound_net_device(device); + if (!net_device) + return; + ndev = net_device->ndev; +- buffer = net_device->cb_buffer; ++ buffer = get_per_channel_state(channel); + + do { +- ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen, ++ ret = vmbus_recvpacket_raw(channel, buffer, bufferlen, + &bytes_recvd, &request_id); + if (ret == 0) { + if (bytes_recvd > 0) { +@@ -831,8 +903,12 @@ static void netvsc_channel_cb(void *context) + break; + + case VM_PKT_DATA_USING_XFER_PAGES: +- netvsc_receive(net_device, +- device, desc); ++ netvsc_receive(net_device, channel, ++ device, desc); ++ break; ++ ++ case VM_PKT_DATA_INBAND: ++ netvsc_send_table(device, desc); + break; + + default: +@@ -893,6 +969,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) + goto cleanup; + } + ++ net_device->ring_size = ring_size; ++ + /* + * Coming into this function, struct net_device * is + * registered as the driver private data. +@@ -917,10 +995,12 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) + } + init_completion(&net_device->channel_init_wait); + ++ set_per_channel_state(device->channel, net_device->cb_buffer); ++ + /* Open the channel */ + ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, + ring_size * PAGE_SIZE, NULL, 0, +- netvsc_channel_cb, device); ++ netvsc_channel_cb, device->channel); + + if (ret != 0) { + netdev_err(ndev, "unable to open channel: %d\n", ret); +@@ -930,6 +1010,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) + /* Channel is opened */ + pr_info("hv_netvsc channel opened successfully\n"); + ++ net_device->chn_table[0] = device->channel; ++ + /* Connect with the NetVsp */ + ret = netvsc_connect_vsp(device); + if (ret != 0) { +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 31e55fba7cad..093cf3fc46b8 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -101,7 +101,7 @@ static int netvsc_open(struct net_device *net) + return ret; + } + +- netif_start_queue(net); ++ netif_tx_start_all_queues(net); + + nvdev = hv_get_drvdata(device_obj); + rdev = nvdev->extension; +@@ -149,6 +149,88 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, + return ppi; + } + ++union sub_key { ++ u64 k; ++ struct { ++ u8 pad[3]; ++ u8 kb; ++ u32 ka; ++ }; ++}; ++ ++/* Toeplitz hash function ++ * data: network byte order ++ * return: host byte order ++ */ ++static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen) ++{ ++ union sub_key subk; ++ int k_next = 4; ++ u8 dt; ++ int i, j; ++ u32 ret = 0; ++ ++ subk.k = 0; ++ subk.ka = ntohl(*(u32 *)key); ++ ++ for (i = 0; i < dlen; i++) { ++ subk.kb = key[k_next]; ++ k_next = (k_next + 1) % klen; ++ dt = data[i]; ++ for (j = 0; j < 8; j++) { ++ if (dt & 0x80) ++ ret ^= subk.ka; ++ dt <<= 1; ++ subk.k <<= 1; ++ } ++ } ++ ++ return ret; ++} ++ ++static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) ++{ ++ struct iphdr *iphdr; ++ int data_len; ++ bool ret = false; ++ ++ if (eth_hdr(skb)->h_proto != htons(ETH_P_IP)) ++ return false; ++ ++ iphdr = ip_hdr(skb); ++ ++ if (iphdr->version == 4) { ++ if (iphdr->protocol == IPPROTO_TCP) ++ data_len = 12; ++ else ++ data_len = 8; ++ *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, ++ (u8 *)&iphdr->saddr, data_len); ++ ret = true; ++ } ++ ++ return ret; ++} ++ ++static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, ++ void *accel_priv, select_queue_fallback_t fallback) ++{ ++ struct net_device_context *net_device_ctx = netdev_priv(ndev); ++ struct hv_device *hdev = net_device_ctx->device_ctx; ++ struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev); ++ u32 hash; ++ u16 q_idx = 0; ++ ++ if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) ++ return 0; ++ ++ if (netvsc_set_hash(&hash, skb)) ++ q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % ++ ndev->real_num_tx_queues; ++ ++ return q_idx; ++} ++ + static void netvsc_xmit_completion(void *context) + { + struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; +@@ -333,6 +415,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + + packet->vlan_tci = skb->vlan_tci; + ++ packet->q_idx = skb_get_queue_mapping(skb); ++ + packet->is_data_pkt = true; + packet->total_data_buflen = skb->len; + +@@ -554,6 +638,10 @@ int netvsc_recv_callback(struct hv_device *device_obj, + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + packet->vlan_tci); + ++ skb_record_rx_queue(skb, packet->xfer_page_pkt->channel-> ++ offermsg.offer.sub_channel_index % ++ net->real_num_rx_queues); ++ + net->stats.rx_packets++; + net->stats.rx_bytes += packet->total_data_buflen; + +@@ -602,7 +690,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) + hv_set_drvdata(hdev, ndev); + device_info.ring_size = ring_size; + rndis_filter_device_add(hdev, &device_info); +- netif_wake_queue(ndev); ++ netif_tx_wake_all_queues(ndev); + + return 0; + } +@@ -648,6 +736,7 @@ static const struct net_device_ops device_ops = { + .ndo_change_mtu = netvsc_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = netvsc_set_mac_addr, ++ .ndo_select_queue = netvsc_select_queue, + }; + + /* +@@ -694,9 +783,11 @@ static int netvsc_probe(struct hv_device *dev, + struct net_device *net = NULL; + struct net_device_context *net_device_ctx; + struct netvsc_device_info device_info; ++ struct netvsc_device *nvdev; + int ret; + +- net = alloc_etherdev(sizeof(struct net_device_context)); ++ net = alloc_etherdev_mq(sizeof(struct net_device_context), ++ num_online_cpus()); + if (!net) + return -ENOMEM; + +@@ -729,6 +820,12 @@ static int netvsc_probe(struct hv_device *dev, + } + memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + ++ nvdev = hv_get_drvdata(dev); ++ netif_set_real_num_tx_queues(net, nvdev->num_chn); ++ netif_set_real_num_rx_queues(net, nvdev->num_chn); ++ dev_info(&dev->device, "real num tx,rx queues:%u, %u\n", ++ net->real_num_tx_queues, net->real_num_rx_queues); ++ + ret = register_netdev(net); + if (ret != 0) { + pr_err("Unable to register netdev.\n"); +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index 143a98caf618..d92cfbe43410 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -31,7 +31,7 @@ + #include "hyperv_net.h" + + +-#define RNDIS_EXT_LEN 100 ++#define RNDIS_EXT_LEN PAGE_SIZE + struct rndis_request { + struct list_head list_ent; + struct completion wait_event; +@@ -94,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev, + rndis_msg->ndis_msg_type = msg_type; + rndis_msg->msg_len = msg_len; + ++ request->pkt.q_idx = 0; ++ + /* + * Set the request id. This field is always after the rndis header for + * request/response packet types so we just used the SetRequest as a +@@ -509,6 +511,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, + query->info_buflen = 0; + query->dev_vc_handle = 0; + ++ if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) { ++ struct ndis_recv_scale_cap *cap; ++ ++ request->request_msg.msg_len += ++ sizeof(struct ndis_recv_scale_cap); ++ query->info_buflen = sizeof(struct ndis_recv_scale_cap); ++ cap = (struct ndis_recv_scale_cap *)((unsigned long)query + ++ query->info_buf_offset); ++ cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES; ++ cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2; ++ cap->hdr.size = sizeof(struct ndis_recv_scale_cap); ++ } ++ + ret = rndis_filter_send_request(dev, request); + if (ret != 0) + goto cleanup; +@@ -695,6 +710,89 @@ cleanup: + return ret; + } + ++u8 netvsc_hash_key[HASH_KEYLEN] = { ++ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, ++ 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, ++ 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, ++ 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, ++ 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa ++}; ++ ++int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) ++{ ++ struct net_device *ndev = rdev->net_dev->ndev; ++ struct rndis_request *request; ++ struct rndis_set_request *set; ++ struct rndis_set_complete *set_complete; ++ u32 extlen = sizeof(struct ndis_recv_scale_param) + ++ 4*ITAB_NUM + HASH_KEYLEN; ++ struct ndis_recv_scale_param *rssp; ++ u32 *itab; ++ u8 *keyp; ++ int i, t, ret; ++ ++ request = get_rndis_request( ++ rdev, RNDIS_MSG_SET, ++ RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); ++ if (!request) ++ return -ENOMEM; ++ ++ set = &request->request_msg.msg.set_req; ++ set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS; ++ set->info_buflen = extlen; ++ set->info_buf_offset = sizeof(struct rndis_set_request); ++ set->dev_vc_handle = 0; ++ ++ rssp = (struct ndis_recv_scale_param *)(set + 1); ++ rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS; ++ rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2; ++ rssp->hdr.size = sizeof(struct ndis_recv_scale_param); ++ rssp->flag = 0; ++ rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 | ++ NDIS_HASH_TCP_IPV4; ++ rssp->indirect_tabsize = 4*ITAB_NUM; ++ rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param); ++ rssp->hashkey_size = HASH_KEYLEN; ++ rssp->kashkey_offset = rssp->indirect_taboffset + ++ rssp->indirect_tabsize; ++ ++ /* Set indirection table entries */ ++ itab = (u32 *)(rssp + 1); ++ for (i = 0; i < ITAB_NUM; i++) ++ itab[i] = i % num_queue; ++ ++ /* Set hask key values */ ++ keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); ++ for (i = 0; i < HASH_KEYLEN; i++) ++ keyp[i] = netvsc_hash_key[i]; ++ ++ ++ ret = rndis_filter_send_request(rdev, request); ++ if (ret != 0) ++ goto cleanup; ++ ++ t = wait_for_completion_timeout(&request->wait_event, 5*HZ); ++ if (t == 0) { ++ netdev_err(ndev, "timeout before we got a set response...\n"); ++ /* can't put_rndis_request, since we may still receive a ++ * send-completion. ++ */ ++ return -ETIMEDOUT; ++ } else { ++ set_complete = &request->response_msg.msg.set_complete; ++ if (set_complete->status != RNDIS_STATUS_SUCCESS) { ++ netdev_err(ndev, "Fail to set RSS parameters:0x%x\n", ++ set_complete->status); ++ ret = -EINVAL; ++ } ++ } ++ ++cleanup: ++ put_rndis_request(rdev, request); ++ return ret; ++} ++ ++ + static int rndis_filter_query_device_link_status(struct rndis_device *dev) + { + u32 size = sizeof(u32); +@@ -886,6 +984,28 @@ static int rndis_filter_close_device(struct rndis_device *dev) + return ret; + } + ++static void netvsc_sc_open(struct vmbus_channel *new_sc) ++{ ++ struct netvsc_device *nvscdev; ++ u16 chn_index = new_sc->offermsg.offer.sub_channel_index; ++ int ret; ++ ++ nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj); ++ ++ if (chn_index >= nvscdev->num_chn) ++ return; ++ ++ set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) * ++ NETVSC_PACKET_SIZE); ++ ++ ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, ++ nvscdev->ring_size * PAGE_SIZE, NULL, 0, ++ netvsc_channel_cb, new_sc); ++ ++ if (ret == 0) ++ nvscdev->chn_table[chn_index] = new_sc; ++} ++ + int rndis_filter_device_add(struct hv_device *dev, + void *additional_info) + { +@@ -894,6 +1014,10 @@ int rndis_filter_device_add(struct hv_device *dev, + struct rndis_device *rndis_device; + struct netvsc_device_info *device_info = additional_info; + struct ndis_offload_params offloads; ++ struct nvsp_message *init_packet; ++ int t; ++ struct ndis_recv_scale_cap rsscap; ++ u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); + + rndis_device = get_rndis_device(); + if (!rndis_device) +@@ -913,6 +1037,7 @@ int rndis_filter_device_add(struct hv_device *dev, + + /* Initialize the rndis device */ + net_device = hv_get_drvdata(dev); ++ net_device->num_chn = 1; + + net_device->extension = rndis_device; + rndis_device->net_dev = net_device; +@@ -952,7 +1077,6 @@ int rndis_filter_device_add(struct hv_device *dev, + if (ret) + goto err_dev_remv; + +- + rndis_filter_query_device_link_status(rndis_device); + + device_info->link_state = rndis_device->link_state; +@@ -961,7 +1085,66 @@ int rndis_filter_device_add(struct hv_device *dev, + rndis_device->hw_mac_adr, + device_info->link_state ? "down" : "up"); + +- return ret; ++ if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) ++ return 0; ++ ++ /* vRSS setup */ ++ memset(&rsscap, 0, rsscap_size); ++ ret = rndis_filter_query_device(rndis_device, ++ OID_GEN_RECEIVE_SCALE_CAPABILITIES, ++ &rsscap, &rsscap_size); ++ if (ret || rsscap.num_recv_que < 2) ++ goto out; ++ ++ net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ? ++ num_online_cpus() : rsscap.num_recv_que; ++ if (net_device->num_chn == 1) ++ goto out; ++ ++ net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) * ++ NETVSC_PACKET_SIZE); ++ if (!net_device->sub_cb_buf) { ++ net_device->num_chn = 1; ++ dev_info(&dev->device, "No memory for subchannels.\n"); ++ goto out; ++ } ++ ++ vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); ++ ++ init_packet = &net_device->channel_init_pkt; ++ memset(init_packet, 0, sizeof(struct nvsp_message)); ++ init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; ++ init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE; ++ init_packet->msg.v5_msg.subchn_req.num_subchannels = ++ net_device->num_chn - 1; ++ ret = vmbus_sendpacket(dev->channel, init_packet, ++ sizeof(struct nvsp_message), ++ (unsigned long)init_packet, ++ VM_PKT_DATA_INBAND, ++ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); ++ if (ret) ++ goto out; ++ t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); ++ if (t == 0) { ++ ret = -ETIMEDOUT; ++ goto out; ++ } ++ if (init_packet->msg.v5_msg.subchn_comp.status != ++ NVSP_STAT_SUCCESS) { ++ ret = -ENODEV; ++ goto out; ++ } ++ net_device->num_chn = 1 + ++ init_packet->msg.v5_msg.subchn_comp.num_subchannels; ++ ++ vmbus_are_subchannels_present(dev->channel); ++ ++ ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn); ++ ++out: ++ if (ret) ++ net_device->num_chn = 1; ++ return 0; /* return 0 because primary channel can be used alone */ + + err_dev_remv: + rndis_filter_device_remove(dev); +-- +2.4.3 + diff --git a/src/patches/linux/0002-Drivers-net-hyperv-Cleanup-the-receive-path.patch b/src/patches/linux/0002-Drivers-net-hyperv-Cleanup-the-receive-path.patch new file mode 100644 index 0000000000..79485b3982 --- /dev/null +++ b/src/patches/linux/0002-Drivers-net-hyperv-Cleanup-the-receive-path.patch @@ -0,0 +1,112 @@ +From 348a5d691d84759dda8cdd3cbf9f071115c1240e Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sun, 16 Feb 2014 16:38:44 -0800 +Subject: [PATCH 02/25] Drivers: net: hyperv: Cleanup the receive path + +Make the receive path a little more efficient by parameterizing the +required state rather than re-establishing that state. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc.c | 29 +++++++++++++---------------- + 1 file changed, 13 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index 03a2c6e17158..7fa2bbade327 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -432,17 +432,14 @@ static inline u32 hv_ringbuf_avail_percent( + return avail_write * 100 / ring_info->ring_datasize; + } + +-static void netvsc_send_completion(struct hv_device *device, ++static void netvsc_send_completion(struct netvsc_device *net_device, ++ struct hv_device *device, + struct vmpacket_descriptor *packet) + { +- struct netvsc_device *net_device; + struct nvsp_message *nvsp_packet; + struct hv_netvsc_packet *nvsc_packet; + struct net_device *ndev; + +- net_device = get_inbound_net_device(device); +- if (!net_device) +- return; + ndev = net_device->ndev; + + nvsp_packet = (struct nvsp_message *)((unsigned long)packet + +@@ -561,13 +558,13 @@ int netvsc_send(struct hv_device *device, + } + + static void netvsc_send_recv_completion(struct hv_device *device, ++ struct netvsc_device *net_device, + u64 transaction_id, u32 status) + { + struct nvsp_message recvcompMessage; + int retries = 0; + int ret; + struct net_device *ndev; +- struct netvsc_device *net_device = hv_get_drvdata(device); + + ndev = net_device->ndev; + +@@ -653,14 +650,15 @@ static void netvsc_receive_completion(void *context) + + /* Send a receive completion for the xfer page packet */ + if (fsend_receive_comp) +- netvsc_send_recv_completion(device, transaction_id, status); ++ netvsc_send_recv_completion(device, net_device, transaction_id, ++ status); + + } + +-static void netvsc_receive(struct hv_device *device, +- struct vmpacket_descriptor *packet) ++static void netvsc_receive(struct netvsc_device *net_device, ++ struct hv_device *device, ++ struct vmpacket_descriptor *packet) + { +- struct netvsc_device *net_device; + struct vmtransfer_page_packet_header *vmxferpage_packet; + struct nvsp_message *nvsp_packet; + struct hv_netvsc_packet *netvsc_packet = NULL; +@@ -673,9 +671,6 @@ static void netvsc_receive(struct hv_device *device, + + LIST_HEAD(listHead); + +- net_device = get_inbound_net_device(device); +- if (!net_device) +- return; + ndev = net_device->ndev; + + /* +@@ -741,7 +736,7 @@ static void netvsc_receive(struct hv_device *device, + spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, + flags); + +- netvsc_send_recv_completion(device, ++ netvsc_send_recv_completion(device, net_device, + vmxferpage_packet->d.trans_id, + NVSP_STAT_FAIL); + +@@ -825,11 +820,13 @@ static void netvsc_channel_cb(void *context) + desc = (struct vmpacket_descriptor *)buffer; + switch (desc->type) { + case VM_PKT_COMP: +- netvsc_send_completion(device, desc); ++ netvsc_send_completion(net_device, ++ device, desc); + break; + + case VM_PKT_DATA_USING_XFER_PAGES: +- netvsc_receive(device, desc); ++ netvsc_receive(net_device, ++ device, desc); + break; + + default: +-- +2.4.3 + diff --git a/src/patches/linux/0002-hyperv-Remove-recv_pkt_list-and-lock.patch b/src/patches/linux/0002-hyperv-Remove-recv_pkt_list-and-lock.patch new file mode 100644 index 0000000000..bbe0e5279c --- /dev/null +++ b/src/patches/linux/0002-hyperv-Remove-recv_pkt_list-and-lock.patch @@ -0,0 +1,384 @@ +From 4baab26129e0540746744232022110dbe9e011e7 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Mon, 21 Apr 2014 14:54:43 -0700 +Subject: [PATCH 02/11] hyperv: Remove recv_pkt_list and lock + +Removed recv_pkt_list and lock, and updated related code, so that +the locking overhead is reduced especially when multiple channels +are in use. + +The recv_pkt_list isn't actually necessary because the packets are +processed sequentially in each channel. It has been replaced by a +local variable, and the related lock for this list is also removed. +The is_data_pkt field is not used in receive path, so its assignment +is cleaned up. + +Signed-off-by: Haiyang Zhang +Reviewed-by: K. Y. Srinivasan +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 33 -------- + drivers/net/hyperv/netvsc.c | 174 +++----------------------------------- + drivers/net/hyperv/netvsc_drv.c | 2 +- + drivers/net/hyperv/rndis_filter.c | 2 - + 4 files changed, 13 insertions(+), 198 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 57eb3f906d64..a1af0f7711e2 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -119,27 +119,14 @@ struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ + }; + + /* Fwd declaration */ +-struct hv_netvsc_packet; + struct ndis_tcp_ip_checksum_info; + +-/* Represent the xfer page packet which contains 1 or more netvsc packet */ +-struct xferpage_packet { +- struct list_head list_ent; +- u32 status; +- +- /* # of netvsc packets this xfer packet contains */ +- u32 count; +- +- struct vmbus_channel *channel; +-}; +- + /* + * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame + * within the RNDIS + */ + struct hv_netvsc_packet { + /* Bookkeeping stuff */ +- struct list_head list_ent; + u32 status; + + struct hv_device *device; +@@ -149,19 +136,8 @@ struct hv_netvsc_packet { + u16 q_idx; + struct vmbus_channel *channel; + +- /* +- * Valid only for receives when we break a xfer page packet +- * into multiple netvsc packets +- */ +- struct xferpage_packet *xfer_page_pkt; +- + union { + struct { +- u64 recv_completion_tid; +- void *recv_completion_ctx; +- void (*recv_completion)(void *context); +- } recv; +- struct { + u64 send_completion_tid; + void *send_completion_ctx; + void (*send_completion)(void *context); +@@ -613,9 +589,6 @@ struct nvsp_message { + + #define NETVSC_RECEIVE_BUFFER_ID 0xcafe + +-/* Preallocated receive packets */ +-#define NETVSC_RECEIVE_PACKETLIST_COUNT 256 +- + #define NETVSC_PACKET_SIZE 2048 + + #define VRSS_SEND_TAB_SIZE 16 +@@ -630,12 +603,6 @@ struct netvsc_device { + wait_queue_head_t wait_drain; + bool start_remove; + bool destroy; +- /* +- * List of free preallocated hv_netvsc_packet to represent receive +- * packet +- */ +- struct list_head recv_pkt_list; +- spinlock_t recv_pkt_list_lock; + + /* Receive buffer allocated by us but manages by NetVSP */ + void *recv_buf; +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index e7e77f12bc38..b10334773b32 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -387,7 +387,6 @@ static void netvsc_disconnect_vsp(struct netvsc_device *net_device) + int netvsc_device_remove(struct hv_device *device) + { + struct netvsc_device *net_device; +- struct hv_netvsc_packet *netvsc_packet, *pos; + unsigned long flags; + + net_device = hv_get_drvdata(device); +@@ -416,12 +415,6 @@ int netvsc_device_remove(struct hv_device *device) + vmbus_close(device->channel); + + /* Release all resources */ +- list_for_each_entry_safe(netvsc_packet, pos, +- &net_device->recv_pkt_list, list_ent) { +- list_del(&netvsc_packet->list_ent); +- kfree(netvsc_packet); +- } +- + if (net_device->sub_cb_buf) + vfree(net_device->sub_cb_buf); + +@@ -641,62 +634,6 @@ retry_send_cmplt: + } + } + +-/* Send a receive completion packet to RNDIS device (ie NetVsp) */ +-static void netvsc_receive_completion(void *context) +-{ +- struct hv_netvsc_packet *packet = context; +- struct hv_device *device = packet->device; +- struct vmbus_channel *channel; +- struct netvsc_device *net_device; +- u64 transaction_id = 0; +- bool fsend_receive_comp = false; +- unsigned long flags; +- struct net_device *ndev; +- u32 status = NVSP_STAT_NONE; +- +- /* +- * Even though it seems logical to do a GetOutboundNetDevice() here to +- * send out receive completion, we are using GetInboundNetDevice() +- * since we may have disable outbound traffic already. +- */ +- net_device = get_inbound_net_device(device); +- if (!net_device) +- return; +- ndev = net_device->ndev; +- +- /* Overloading use of the lock. */ +- spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); +- +- if (packet->status != NVSP_STAT_SUCCESS) +- packet->xfer_page_pkt->status = NVSP_STAT_FAIL; +- +- packet->xfer_page_pkt->count--; +- +- /* +- * Last one in the line that represent 1 xfer page packet. +- * Return the xfer page packet itself to the freelist +- */ +- if (packet->xfer_page_pkt->count == 0) { +- fsend_receive_comp = true; +- channel = packet->xfer_page_pkt->channel; +- transaction_id = packet->completion.recv.recv_completion_tid; +- status = packet->xfer_page_pkt->status; +- list_add_tail(&packet->xfer_page_pkt->list_ent, +- &net_device->recv_pkt_list); +- +- } +- +- /* Put the packet back */ +- list_add_tail(&packet->list_ent, &net_device->recv_pkt_list); +- spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags); +- +- /* Send a receive completion for the xfer page packet */ +- if (fsend_receive_comp) +- netvsc_send_recv_completion(device, channel, net_device, +- transaction_id, status); +- +-} +- + static void netvsc_receive(struct netvsc_device *net_device, + struct vmbus_channel *channel, + struct hv_device *device, +@@ -704,16 +641,13 @@ static void netvsc_receive(struct netvsc_device *net_device, + { + struct vmtransfer_page_packet_header *vmxferpage_packet; + struct nvsp_message *nvsp_packet; +- struct hv_netvsc_packet *netvsc_packet = NULL; +- /* struct netvsc_driver *netvscDriver; */ +- struct xferpage_packet *xferpage_packet = NULL; ++ struct hv_netvsc_packet nv_pkt; ++ struct hv_netvsc_packet *netvsc_packet = &nv_pkt; ++ u32 status = NVSP_STAT_SUCCESS; + int i; + int count = 0; +- unsigned long flags; + struct net_device *ndev; + +- LIST_HEAD(listHead); +- + ndev = net_device->ndev; + + /* +@@ -746,78 +680,14 @@ static void netvsc_receive(struct netvsc_device *net_device, + return; + } + +- /* +- * Grab free packets (range count + 1) to represent this xfer +- * page packet. +1 to represent the xfer page packet itself. +- * We grab it here so that we know exactly how many we can +- * fulfil +- */ +- spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); +- while (!list_empty(&net_device->recv_pkt_list)) { +- list_move_tail(net_device->recv_pkt_list.next, &listHead); +- if (++count == vmxferpage_packet->range_cnt + 1) +- break; +- } +- spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags); +- +- /* +- * We need at least 2 netvsc pkts (1 to represent the xfer +- * page and at least 1 for the range) i.e. we can handled +- * some of the xfer page packet ranges... +- */ +- if (count < 2) { +- netdev_err(ndev, "Got only %d netvsc pkt...needed " +- "%d pkts. Dropping this xfer page packet completely!\n", +- count, vmxferpage_packet->range_cnt + 1); +- +- /* Return it to the freelist */ +- spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); +- for (i = count; i != 0; i--) { +- list_move_tail(listHead.next, +- &net_device->recv_pkt_list); +- } +- spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, +- flags); +- +- netvsc_send_recv_completion(device, channel, net_device, +- vmxferpage_packet->d.trans_id, +- NVSP_STAT_FAIL); +- +- return; +- } +- +- /* Remove the 1st packet to represent the xfer page packet itself */ +- xferpage_packet = (struct xferpage_packet *)listHead.next; +- list_del(&xferpage_packet->list_ent); +- xferpage_packet->status = NVSP_STAT_SUCCESS; +- xferpage_packet->channel = channel; +- +- /* This is how much we can satisfy */ +- xferpage_packet->count = count - 1; +- +- if (xferpage_packet->count != vmxferpage_packet->range_cnt) { +- netdev_err(ndev, "Needed %d netvsc pkts to satisfy " +- "this xfer page...got %d\n", +- vmxferpage_packet->range_cnt, xferpage_packet->count); +- } ++ count = vmxferpage_packet->range_cnt; ++ netvsc_packet->device = device; ++ netvsc_packet->channel = channel; + + /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ +- for (i = 0; i < (count - 1); i++) { +- netvsc_packet = (struct hv_netvsc_packet *)listHead.next; +- list_del(&netvsc_packet->list_ent); +- ++ for (i = 0; i < count; i++) { + /* Initialize the netvsc packet */ + netvsc_packet->status = NVSP_STAT_SUCCESS; +- netvsc_packet->xfer_page_pkt = xferpage_packet; +- netvsc_packet->completion.recv.recv_completion = +- netvsc_receive_completion; +- netvsc_packet->completion.recv.recv_completion_ctx = +- netvsc_packet; +- netvsc_packet->device = device; +- /* Save this so that we can send it back */ +- netvsc_packet->completion.recv.recv_completion_tid = +- vmxferpage_packet->d.trans_id; +- + netvsc_packet->data = (void *)((unsigned long)net_device-> + recv_buf + vmxferpage_packet->ranges[i].byte_offset); + netvsc_packet->total_data_buflen = +@@ -826,10 +696,12 @@ static void netvsc_receive(struct netvsc_device *net_device, + /* Pass it to the upper layer */ + rndis_filter_receive(device, netvsc_packet); + +- netvsc_receive_completion(netvsc_packet-> +- completion.recv.recv_completion_ctx); ++ if (netvsc_packet->status != NVSP_STAT_SUCCESS) ++ status = NVSP_STAT_FAIL; + } + ++ netvsc_send_recv_completion(device, channel, net_device, ++ vmxferpage_packet->d.trans_id, status); + } + + +@@ -956,11 +828,9 @@ void netvsc_channel_cb(void *context) + int netvsc_device_add(struct hv_device *device, void *additional_info) + { + int ret = 0; +- int i; + int ring_size = + ((struct netvsc_device_info *)additional_info)->ring_size; + struct netvsc_device *net_device; +- struct hv_netvsc_packet *packet, *pos; + struct net_device *ndev; + + net_device = alloc_net_device(device); +@@ -981,18 +851,6 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) + ndev = net_device->ndev; + + /* Initialize the NetVSC channel extension */ +- spin_lock_init(&net_device->recv_pkt_list_lock); +- +- INIT_LIST_HEAD(&net_device->recv_pkt_list); +- +- for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) { +- packet = kzalloc(sizeof(struct hv_netvsc_packet), GFP_KERNEL); +- if (!packet) +- break; +- +- list_add_tail(&packet->list_ent, +- &net_device->recv_pkt_list); +- } + init_completion(&net_device->channel_init_wait); + + set_per_channel_state(device->channel, net_device->cb_buffer); +@@ -1028,16 +886,8 @@ close: + + cleanup: + +- if (net_device) { +- list_for_each_entry_safe(packet, pos, +- &net_device->recv_pkt_list, +- list_ent) { +- list_del(&packet->list_ent); +- kfree(packet); +- } +- ++ if (net_device) + kfree(net_device); +- } + + return ret; + } +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 093cf3fc46b8..8f6d53a2ed95 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -638,7 +638,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + packet->vlan_tci); + +- skb_record_rx_queue(skb, packet->xfer_page_pkt->channel-> ++ skb_record_rx_queue(skb, packet->channel-> + offermsg.offer.sub_channel_index % + net->real_num_rx_queues); + +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index d92cfbe43410..48f5a0fbd674 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -401,8 +401,6 @@ static void rndis_filter_receive_data(struct rndis_device *dev, + pkt->total_data_buflen = rndis_pkt->data_len; + pkt->data = (void *)((unsigned long)pkt->data + data_offset); + +- pkt->is_data_pkt = true; +- + vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); + if (vlan) { + pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid | +-- +2.4.3 + diff --git a/src/patches/linux/0003-Drivers-net-hyperv-Cleanup-the-netvsc-receive-callba.patch b/src/patches/linux/0003-Drivers-net-hyperv-Cleanup-the-netvsc-receive-callba.patch new file mode 100644 index 0000000000..e0b82eadbb --- /dev/null +++ b/src/patches/linux/0003-Drivers-net-hyperv-Cleanup-the-netvsc-receive-callba.patch @@ -0,0 +1,101 @@ +From c9f2db35ac4f789930522d9d36200cb71b442bed Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sun, 16 Feb 2014 16:38:45 -0800 +Subject: [PATCH 03/25] Drivers: net: hyperv: Cleanup the netvsc receive + callback functio + +Get rid of the buffer allocation in the receive path for normal packets. + +Signed-off-by: K. Y. Srinivasan +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 2 ++ + drivers/net/hyperv/netvsc.c | 33 ++++++++++----------------------- + 2 files changed, 12 insertions(+), 23 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 7645ba38bde8..01a16ea77a5a 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -506,6 +506,8 @@ struct netvsc_device { + + /* Holds rndis device info */ + void *extension; ++ /* The recive buffer for this device */ ++ unsigned char cb_buffer[NETVSC_PACKET_SIZE]; + }; + + /* NdisInitialize message */ +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index 7fa2bbade327..9a0e9c6f1414 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -795,22 +795,16 @@ static void netvsc_channel_cb(void *context) + struct netvsc_device *net_device; + u32 bytes_recvd; + u64 request_id; +- unsigned char *packet; + struct vmpacket_descriptor *desc; + unsigned char *buffer; + int bufferlen = NETVSC_PACKET_SIZE; + struct net_device *ndev; + +- packet = kzalloc(NETVSC_PACKET_SIZE * sizeof(unsigned char), +- GFP_ATOMIC); +- if (!packet) +- return; +- buffer = packet; +- + net_device = get_inbound_net_device(device); + if (!net_device) +- goto out; ++ return; + ndev = net_device->ndev; ++ buffer = net_device->cb_buffer; + + do { + ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen, +@@ -838,23 +832,16 @@ static void netvsc_channel_cb(void *context) + break; + } + +- /* reset */ +- if (bufferlen > NETVSC_PACKET_SIZE) { +- kfree(buffer); +- buffer = packet; +- bufferlen = NETVSC_PACKET_SIZE; +- } + } else { +- /* reset */ +- if (bufferlen > NETVSC_PACKET_SIZE) { +- kfree(buffer); +- buffer = packet; +- bufferlen = NETVSC_PACKET_SIZE; +- } +- ++ /* ++ * We are done for this pass. ++ */ + break; + } ++ + } else if (ret == -ENOBUFS) { ++ if (bufferlen > NETVSC_PACKET_SIZE) ++ kfree(buffer); + /* Handle large packet */ + buffer = kmalloc(bytes_recvd, GFP_ATOMIC); + if (buffer == NULL) { +@@ -869,8 +856,8 @@ static void netvsc_channel_cb(void *context) + } + } while (1); + +-out: +- kfree(buffer); ++ if (bufferlen > NETVSC_PACKET_SIZE) ++ kfree(buffer); + return; + } + +-- +2.4.3 + diff --git a/src/patches/linux/0003-hyperv-Simplify-the-send_completion-variables.patch b/src/patches/linux/0003-hyperv-Simplify-the-send_completion-variables.patch new file mode 100644 index 0000000000..dcab56f23b --- /dev/null +++ b/src/patches/linux/0003-hyperv-Simplify-the-send_completion-variables.patch @@ -0,0 +1,105 @@ +From 893f66277799cd46bdf97429cc5d16a815a51273 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Mon, 21 Apr 2014 14:54:44 -0700 +Subject: [PATCH 03/11] hyperv: Simplify the send_completion variables + +The union contains only one member now, so we use the variables in it directly. + +Signed-off-by: Haiyang Zhang +Reviewed-by: K. Y. Srinivasan +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 10 +++------- + drivers/net/hyperv/netvsc.c | 7 +++---- + drivers/net/hyperv/netvsc_drv.c | 8 ++++---- + drivers/net/hyperv/rndis_filter.c | 2 +- + 4 files changed, 11 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index a1af0f7711e2..d1f7826aa75f 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -136,13 +136,9 @@ struct hv_netvsc_packet { + u16 q_idx; + struct vmbus_channel *channel; + +- union { +- struct { +- u64 send_completion_tid; +- void *send_completion_ctx; +- void (*send_completion)(void *context); +- } send; +- } completion; ++ u64 send_completion_tid; ++ void *send_completion_ctx; ++ void (*send_completion)(void *context); + + /* This points to the memory after page_buf */ + struct rndis_message *rndis_msg; +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index b10334773b32..bbee44635035 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -479,9 +479,8 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + if (nvsc_packet) { + q_idx = nvsc_packet->q_idx; + channel = nvsc_packet->channel; +- nvsc_packet->completion.send.send_completion( +- nvsc_packet->completion.send. +- send_completion_ctx); ++ nvsc_packet->send_completion(nvsc_packet-> ++ send_completion_ctx); + } + + num_outstanding_sends = +@@ -534,7 +533,7 @@ int netvsc_send(struct hv_device *device, + 0xFFFFFFFF; + sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; + +- if (packet->completion.send.send_completion) ++ if (packet->send_completion) + req_id = (ulong)packet; + else + req_id = 0; +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 8f6d53a2ed95..c76b66515e92 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -235,7 +235,7 @@ static void netvsc_xmit_completion(void *context) + { + struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; + struct sk_buff *skb = (struct sk_buff *) +- (unsigned long)packet->completion.send.send_completion_tid; ++ (unsigned long)packet->send_completion_tid; + + kfree(packet); + +@@ -425,9 +425,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + (num_data_pgs * sizeof(struct hv_page_buffer))); + + /* Set the completion routine */ +- packet->completion.send.send_completion = netvsc_xmit_completion; +- packet->completion.send.send_completion_ctx = packet; +- packet->completion.send.send_completion_tid = (unsigned long)skb; ++ packet->send_completion = netvsc_xmit_completion; ++ packet->send_completion_ctx = packet; ++ packet->send_completion_tid = (unsigned long)skb; + + isvlan = packet->vlan_tci & VLAN_TAG_PRESENT; + +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index 48f5a0fbd674..99c527adae5b 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -236,7 +236,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, + packet->page_buf[0].len; + } + +- packet->completion.send.send_completion = NULL; ++ packet->send_completion = NULL; + + ret = netvsc_send(dev->net_dev->dev, packet); + return ret; +-- +2.4.3 + diff --git a/src/patches/linux/0004-hyperv-Add-latest-NetVSP-versions-to-auto-negotiatio.patch b/src/patches/linux/0004-hyperv-Add-latest-NetVSP-versions-to-auto-negotiatio.patch new file mode 100644 index 0000000000..f9cd8278e2 --- /dev/null +++ b/src/patches/linux/0004-hyperv-Add-latest-NetVSP-versions-to-auto-negotiatio.patch @@ -0,0 +1,171 @@ +From 3c2a271d9681cc017947c5e027acc64707c30dee Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Wed, 19 Feb 2014 15:49:45 -0800 +Subject: [PATCH 04/25] hyperv: Add latest NetVSP versions to auto negotiation + +It auto negotiates the highest NetVSP version supported by both guest and host. + +Signed-off-by: Haiyang Zhang +Reviewed-by: K. Y. Srinivasan +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 53 +++++++++++++++++++++++++++++++++++++++++ + drivers/net/hyperv/netvsc.c | 25 ++++++++++++------- + drivers/net/hyperv/netvsc_drv.c | 2 +- + 3 files changed, 70 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 01a16ea77a5a..39fc230f5c20 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -139,6 +139,8 @@ int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac); + + #define NVSP_PROTOCOL_VERSION_1 2 + #define NVSP_PROTOCOL_VERSION_2 0x30002 ++#define NVSP_PROTOCOL_VERSION_4 0x40000 ++#define NVSP_PROTOCOL_VERSION_5 0x50000 + + enum { + NVSP_MSG_TYPE_NONE = 0, +@@ -193,6 +195,23 @@ enum { + + NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE, + NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP, ++ ++ NVSP_MSG2_MAX = NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP, ++ ++ /* Version 4 messages */ ++ NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION, ++ NVSP_MSG4_TYPE_SWITCH_DATA_PATH, ++ NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED, ++ ++ NVSP_MSG4_MAX = NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED, ++ ++ /* Version 5 messages */ ++ NVSP_MSG5_TYPE_OID_QUERY_EX, ++ NVSP_MSG5_TYPE_OID_QUERY_EX_COMP, ++ NVSP_MSG5_TYPE_SUBCHANNEL, ++ NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, ++ ++ NVSP_MSG5_MAX = NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, + }; + + enum { +@@ -447,10 +466,44 @@ union nvsp_2_message_uber { + struct nvsp_2_free_rxbuf free_rxbuf; + } __packed; + ++enum nvsp_subchannel_operation { ++ NVSP_SUBCHANNEL_NONE = 0, ++ NVSP_SUBCHANNEL_ALLOCATE, ++ NVSP_SUBCHANNEL_MAX ++}; ++ ++struct nvsp_5_subchannel_request { ++ u32 op; ++ u32 num_subchannels; ++} __packed; ++ ++struct nvsp_5_subchannel_complete { ++ u32 status; ++ u32 num_subchannels; /* Actual number of subchannels allocated */ ++} __packed; ++ ++struct nvsp_5_send_indirect_table { ++ /* The number of entries in the send indirection table */ ++ u32 count; ++ ++ /* The offset of the send indireciton table from top of this struct. ++ * The send indirection table tells which channel to put the send ++ * traffic on. Each entry is a channel number. ++ */ ++ u32 offset; ++} __packed; ++ ++union nvsp_5_message_uber { ++ struct nvsp_5_subchannel_request subchn_req; ++ struct nvsp_5_subchannel_complete subchn_comp; ++ struct nvsp_5_send_indirect_table send_table; ++} __packed; ++ + union nvsp_all_messages { + union nvsp_message_init_uber init_msg; + union nvsp_1_message_uber v1_msg; + union nvsp_2_message_uber v2_msg; ++ union nvsp_5_message_uber v5_msg; + } __packed; + + /* ALL Messages */ +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index 9a0e9c6f1414..1a0280dcba7e 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -290,7 +290,7 @@ static int negotiate_nvsp_ver(struct hv_device *device, + NVSP_STAT_SUCCESS) + return -EINVAL; + +- if (nvsp_ver != NVSP_PROTOCOL_VERSION_2) ++ if (nvsp_ver == NVSP_PROTOCOL_VERSION_1) + return 0; + + /* NVSPv2 only: Send NDIS config */ +@@ -314,6 +314,9 @@ static int netvsc_connect_vsp(struct hv_device *device) + struct nvsp_message *init_packet; + int ndis_version; + struct net_device *ndev; ++ u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, ++ NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; ++ int i, num_ver = 4; /* number of different NVSP versions */ + + net_device = get_outbound_net_device(device); + if (!net_device) +@@ -323,13 +326,14 @@ static int netvsc_connect_vsp(struct hv_device *device) + init_packet = &net_device->channel_init_pkt; + + /* Negotiate the latest NVSP protocol supported */ +- if (negotiate_nvsp_ver(device, net_device, init_packet, +- NVSP_PROTOCOL_VERSION_2) == 0) { +- net_device->nvsp_version = NVSP_PROTOCOL_VERSION_2; +- } else if (negotiate_nvsp_ver(device, net_device, init_packet, +- NVSP_PROTOCOL_VERSION_1) == 0) { +- net_device->nvsp_version = NVSP_PROTOCOL_VERSION_1; +- } else { ++ for (i = num_ver - 1; i >= 0; i--) ++ if (negotiate_nvsp_ver(device, net_device, init_packet, ++ ver_list[i]) == 0) { ++ net_device->nvsp_version = ver_list[i]; ++ break; ++ } ++ ++ if (i < 0) { + ret = -EPROTO; + goto cleanup; + } +@@ -339,7 +343,10 @@ static int netvsc_connect_vsp(struct hv_device *device) + /* Send the ndis version */ + memset(init_packet, 0, sizeof(struct nvsp_message)); + +- ndis_version = 0x00050001; ++ if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4) ++ ndis_version = 0x00050001; ++ else ++ ndis_version = 0x0006001e; + + init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER; + init_packet->msg.v1_msg. +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 28020f83ba6f..8e3a0b00099b 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -328,7 +328,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) + if (nvdev == NULL || nvdev->destroy) + return -ENODEV; + +- if (nvdev->nvsp_version == NVSP_PROTOCOL_VERSION_2) ++ if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) + limit = NETVSC_MTU; + + if (mtu < 68 || mtu > limit) +-- +2.4.3 + diff --git a/src/patches/linux/0004-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch b/src/patches/linux/0004-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch new file mode 100644 index 0000000000..bc09122243 --- /dev/null +++ b/src/patches/linux/0004-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch @@ -0,0 +1,407 @@ +From c25aaf814a63f9d9c4e45416f13d70ef0aa0be2e Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Wed, 30 Apr 2014 10:14:31 -0700 +Subject: [PATCH 04/11] hyperv: Enable sendbuf mechanism on the send path + +We send packets using a copy-free mechanism (this is the Guest to Host transport +via VMBUS). While this is obviously optimal for large packets, +it may not be optimal for small packets. Hyper-V host supports +a second mechanism for sending packets that is "copy based". We implement that +mechanism in this patch. + +In this version of the patch I have addressed a comment from David Miller. + +With this patch (and all of the other offload and VRSS patches), we are now able +to almost saturate a 10G interface between Linux VMs on Hyper-V +on different hosts - close to 9 Gbps as measured via iperf. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 14 +++ + drivers/net/hyperv/netvsc.c | 226 ++++++++++++++++++++++++++++++++++++++-- + drivers/net/hyperv/netvsc_drv.c | 3 +- + 3 files changed, 234 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index d1f7826aa75f..4b7df5a5c966 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -140,6 +140,8 @@ struct hv_netvsc_packet { + void *send_completion_ctx; + void (*send_completion)(void *context); + ++ u32 send_buf_index; ++ + /* This points to the memory after page_buf */ + struct rndis_message *rndis_msg; + +@@ -582,6 +584,9 @@ struct nvsp_message { + + #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ + #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ ++#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024) /* 1MB */ ++#define NETVSC_INVALID_INDEX -1 ++ + + #define NETVSC_RECEIVE_BUFFER_ID 0xcafe + +@@ -607,6 +612,15 @@ struct netvsc_device { + u32 recv_section_cnt; + struct nvsp_1_receive_buffer_section *recv_section; + ++ /* Send buffer allocated by us */ ++ void *send_buf; ++ u32 send_buf_size; ++ u32 send_buf_gpadl_handle; ++ u32 send_section_cnt; ++ u32 send_section_size; ++ unsigned long *send_section_map; ++ int map_words; ++ + /* Used for NetVSP initialization protocol */ + struct completion channel_init_wait; + struct nvsp_message channel_init_pkt; +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index bbee44635035..c041f63a6d30 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + #include "hyperv_net.h" + +@@ -80,7 +81,7 @@ get_in_err: + } + + +-static int netvsc_destroy_recv_buf(struct netvsc_device *net_device) ++static int netvsc_destroy_buf(struct netvsc_device *net_device) + { + struct nvsp_message *revoke_packet; + int ret = 0; +@@ -146,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device) + net_device->recv_section = NULL; + } + ++ /* Deal with the send buffer we may have setup. ++ * If we got a send section size, it means we received a ++ * SendsendBufferComplete msg (ie sent ++ * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need ++ * to send a revoke msg here ++ */ ++ if (net_device->send_section_size) { ++ /* Send the revoke receive buffer */ ++ revoke_packet = &net_device->revoke_packet; ++ memset(revoke_packet, 0, sizeof(struct nvsp_message)); ++ ++ revoke_packet->hdr.msg_type = ++ NVSP_MSG1_TYPE_REVOKE_SEND_BUF; ++ revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0; ++ ++ ret = vmbus_sendpacket(net_device->dev->channel, ++ revoke_packet, ++ sizeof(struct nvsp_message), ++ (unsigned long)revoke_packet, ++ VM_PKT_DATA_INBAND, 0); ++ /* If we failed here, we might as well return and ++ * have a leak rather than continue and a bugchk ++ */ ++ if (ret != 0) { ++ netdev_err(ndev, "unable to send " ++ "revoke send buffer to netvsp\n"); ++ return ret; ++ } ++ } ++ /* Teardown the gpadl on the vsp end */ ++ if (net_device->send_buf_gpadl_handle) { ++ ret = vmbus_teardown_gpadl(net_device->dev->channel, ++ net_device->send_buf_gpadl_handle); ++ ++ /* If we failed here, we might as well return and have a leak ++ * rather than continue and a bugchk ++ */ ++ if (ret != 0) { ++ netdev_err(ndev, ++ "unable to teardown send buffer's gpadl\n"); ++ return ret; ++ } ++ net_device->recv_buf_gpadl_handle = 0; ++ } ++ if (net_device->send_buf) { ++ /* Free up the receive buffer */ ++ free_pages((unsigned long)net_device->send_buf, ++ get_order(net_device->send_buf_size)); ++ net_device->send_buf = NULL; ++ } ++ kfree(net_device->send_section_map); ++ + return ret; + } + +-static int netvsc_init_recv_buf(struct hv_device *device) ++static int netvsc_init_buf(struct hv_device *device) + { + int ret = 0; + int t; +@@ -248,10 +301,90 @@ static int netvsc_init_recv_buf(struct hv_device *device) + goto cleanup; + } + ++ /* Now setup the send buffer. ++ */ ++ net_device->send_buf = ++ (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, ++ get_order(net_device->send_buf_size)); ++ if (!net_device->send_buf) { ++ netdev_err(ndev, "unable to allocate send " ++ "buffer of size %d\n", net_device->send_buf_size); ++ ret = -ENOMEM; ++ goto cleanup; ++ } ++ ++ /* Establish the gpadl handle for this buffer on this ++ * channel. Note: This call uses the vmbus connection rather ++ * than the channel to establish the gpadl handle. ++ */ ++ ret = vmbus_establish_gpadl(device->channel, net_device->send_buf, ++ net_device->send_buf_size, ++ &net_device->send_buf_gpadl_handle); ++ if (ret != 0) { ++ netdev_err(ndev, ++ "unable to establish send buffer's gpadl\n"); ++ goto cleanup; ++ } ++ ++ /* Notify the NetVsp of the gpadl handle */ ++ init_packet = &net_device->channel_init_pkt; ++ memset(init_packet, 0, sizeof(struct nvsp_message)); ++ init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF; ++ init_packet->msg.v1_msg.send_recv_buf.gpadl_handle = ++ net_device->send_buf_gpadl_handle; ++ init_packet->msg.v1_msg.send_recv_buf.id = 0; ++ ++ /* Send the gpadl notification request */ ++ ret = vmbus_sendpacket(device->channel, init_packet, ++ sizeof(struct nvsp_message), ++ (unsigned long)init_packet, ++ VM_PKT_DATA_INBAND, ++ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); ++ if (ret != 0) { ++ netdev_err(ndev, ++ "unable to send send buffer's gpadl to netvsp\n"); ++ goto cleanup; ++ } ++ ++ t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); ++ BUG_ON(t == 0); ++ ++ /* Check the response */ ++ if (init_packet->msg.v1_msg. ++ send_send_buf_complete.status != NVSP_STAT_SUCCESS) { ++ netdev_err(ndev, "Unable to complete send buffer " ++ "initialization with NetVsp - status %d\n", ++ init_packet->msg.v1_msg. ++ send_recv_buf_complete.status); ++ ret = -EINVAL; ++ goto cleanup; ++ } ++ ++ /* Parse the response */ ++ net_device->send_section_size = init_packet->msg. ++ v1_msg.send_send_buf_complete.section_size; ++ ++ /* Section count is simply the size divided by the section size. ++ */ ++ net_device->send_section_cnt = ++ net_device->send_buf_size/net_device->send_section_size; ++ ++ dev_info(&device->device, "Send section size: %d, Section count:%d\n", ++ net_device->send_section_size, net_device->send_section_cnt); ++ ++ /* Setup state for managing the send buffer. */ ++ net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, ++ BITS_PER_LONG); ++ ++ net_device->send_section_map = ++ kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); ++ if (net_device->send_section_map == NULL) ++ goto cleanup; ++ + goto exit; + + cleanup: +- netvsc_destroy_recv_buf(net_device); ++ netvsc_destroy_buf(net_device); + + exit: + return ret; +@@ -369,8 +502,9 @@ static int netvsc_connect_vsp(struct hv_device *device) + net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; + else + net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; ++ net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE; + +- ret = netvsc_init_recv_buf(device); ++ ret = netvsc_init_buf(device); + + cleanup: + return ret; +@@ -378,7 +512,7 @@ cleanup: + + static void netvsc_disconnect_vsp(struct netvsc_device *net_device) + { +- netvsc_destroy_recv_buf(net_device); ++ netvsc_destroy_buf(net_device); + } + + /* +@@ -440,6 +574,12 @@ static inline u32 hv_ringbuf_avail_percent( + return avail_write * 100 / ring_info->ring_datasize; + } + ++static inline void netvsc_free_send_slot(struct netvsc_device *net_device, ++ u32 index) ++{ ++ sync_change_bit(index, net_device->send_section_map); ++} ++ + static void netvsc_send_completion(struct netvsc_device *net_device, + struct hv_device *device, + struct vmpacket_descriptor *packet) +@@ -447,6 +587,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + struct nvsp_message *nvsp_packet; + struct hv_netvsc_packet *nvsc_packet; + struct net_device *ndev; ++ u32 send_index; + + ndev = net_device->ndev; + +@@ -477,6 +618,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + + /* Notify the layer above us */ + if (nvsc_packet) { ++ send_index = nvsc_packet->send_buf_index; ++ if (send_index != NETVSC_INVALID_INDEX) ++ netvsc_free_send_slot(net_device, send_index); + q_idx = nvsc_packet->q_idx; + channel = nvsc_packet->channel; + nvsc_packet->send_completion(nvsc_packet-> +@@ -504,6 +648,52 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + + } + ++static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) ++{ ++ unsigned long index; ++ u32 max_words = net_device->map_words; ++ unsigned long *map_addr = (unsigned long *)net_device->send_section_map; ++ u32 section_cnt = net_device->send_section_cnt; ++ int ret_val = NETVSC_INVALID_INDEX; ++ int i; ++ int prev_val; ++ ++ for (i = 0; i < max_words; i++) { ++ if (!~(map_addr[i])) ++ continue; ++ index = ffz(map_addr[i]); ++ prev_val = sync_test_and_set_bit(index, &map_addr[i]); ++ if (prev_val) ++ continue; ++ if ((index + (i * BITS_PER_LONG)) >= section_cnt) ++ break; ++ ret_val = (index + (i * BITS_PER_LONG)); ++ break; ++ } ++ return ret_val; ++} ++ ++u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, ++ unsigned int section_index, ++ struct hv_netvsc_packet *packet) ++{ ++ char *start = net_device->send_buf; ++ char *dest = (start + (section_index * net_device->send_section_size)); ++ int i; ++ u32 msg_size = 0; ++ ++ for (i = 0; i < packet->page_buf_cnt; i++) { ++ char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT); ++ u32 offset = packet->page_buf[i].offset; ++ u32 len = packet->page_buf[i].len; ++ ++ memcpy(dest, (src + offset), len); ++ msg_size += len; ++ dest += len; ++ } ++ return msg_size; ++} ++ + int netvsc_send(struct hv_device *device, + struct hv_netvsc_packet *packet) + { +@@ -513,6 +703,10 @@ int netvsc_send(struct hv_device *device, + struct net_device *ndev; + struct vmbus_channel *out_channel = NULL; + u64 req_id; ++ unsigned int section_index = NETVSC_INVALID_INDEX; ++ u32 msg_size = 0; ++ struct sk_buff *skb; ++ + + net_device = get_outbound_net_device(device); + if (!net_device) +@@ -528,10 +722,26 @@ int netvsc_send(struct hv_device *device, + sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1; + } + +- /* Not using send buffer section */ ++ /* Attempt to send via sendbuf */ ++ if (packet->total_data_buflen < net_device->send_section_size) { ++ section_index = netvsc_get_next_send_section(net_device); ++ if (section_index != NETVSC_INVALID_INDEX) { ++ msg_size = netvsc_copy_to_send_buf(net_device, ++ section_index, ++ packet); ++ skb = (struct sk_buff *) ++ (unsigned long)packet->send_completion_tid; ++ if (skb) ++ dev_kfree_skb_any(skb); ++ packet->page_buf_cnt = 0; ++ } ++ } ++ packet->send_buf_index = section_index; ++ ++ + sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index = +- 0xFFFFFFFF; +- sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; ++ section_index; ++ sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size; + + if (packet->send_completion) + req_id = (ulong)packet; +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index c76b66515e92..939e3af60ec4 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -236,10 +236,11 @@ static void netvsc_xmit_completion(void *context) + struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; + struct sk_buff *skb = (struct sk_buff *) + (unsigned long)packet->send_completion_tid; ++ u32 index = packet->send_buf_index; + + kfree(packet); + +- if (skb) ++ if (skb && (index == NETVSC_INVALID_INDEX)) + dev_kfree_skb_any(skb); + } + +-- +2.4.3 + diff --git a/src/patches/linux/0005-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch b/src/patches/linux/0005-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch new file mode 100644 index 0000000000..4d00ad2111 --- /dev/null +++ b/src/patches/linux/0005-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch @@ -0,0 +1,42 @@ +From e565e803d437b36c4fb4ced5e346827981183284 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Thu, 8 May 2014 15:14:10 -0700 +Subject: [PATCH 05/11] Add support for netvsc build without CONFIG_SYSFS flag + +This change ensures the driver can be built successfully without the +CONFIG_SYSFS flag. +MS-TFS: 182270 + +Signed-off-by: Haiyang Zhang +Reviewed-by: K. Y. Srinivasan +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc_drv.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 939e3af60ec4..083d084396d3 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -640,8 +640,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, + packet->vlan_tci); + + skb_record_rx_queue(skb, packet->channel-> +- offermsg.offer.sub_channel_index % +- net->real_num_rx_queues); ++ offermsg.offer.sub_channel_index); + + net->stats.rx_packets++; + net->stats.rx_bytes += packet->total_data_buflen; +@@ -824,8 +823,6 @@ static int netvsc_probe(struct hv_device *dev, + nvdev = hv_get_drvdata(dev); + netif_set_real_num_tx_queues(net, nvdev->num_chn); + netif_set_real_num_rx_queues(net, nvdev->num_chn); +- dev_info(&dev->device, "real num tx,rx queues:%u, %u\n", +- net->real_num_tx_queues, net->real_num_rx_queues); + + ret = register_netdev(net); + if (ret != 0) { +-- +2.4.3 + diff --git a/src/patches/linux/0005-Drivers-net-hyperv-Enable-scatter-gather-I-O.patch b/src/patches/linux/0005-Drivers-net-hyperv-Enable-scatter-gather-I-O.patch new file mode 100644 index 0000000000..27fb2d98ca --- /dev/null +++ b/src/patches/linux/0005-Drivers-net-hyperv-Enable-scatter-gather-I-O.patch @@ -0,0 +1,212 @@ +From 4c06034001e20ff9f6e2a1a3dfa155bf3f31440c Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sat, 8 Mar 2014 19:23:13 -0800 +Subject: [PATCH 05/25] Drivers: net: hyperv: Enable scatter gather I/O + +Cleanup the code and enable scatter gather I/O. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc_drv.c | 153 ++++++++++++++++++++++++++++++---------- + 1 file changed, 114 insertions(+), 39 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 8e3a0b00099b..72961741be54 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -140,22 +140,124 @@ static void netvsc_xmit_completion(void *context) + dev_kfree_skb_any(skb); + } + ++static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, ++ struct hv_page_buffer *pb) ++{ ++ int j = 0; ++ ++ /* Deal with compund pages by ignoring unused part ++ * of the page. ++ */ ++ page += (offset >> PAGE_SHIFT); ++ offset &= ~PAGE_MASK; ++ ++ while (len > 0) { ++ unsigned long bytes; ++ ++ bytes = PAGE_SIZE - offset; ++ if (bytes > len) ++ bytes = len; ++ pb[j].pfn = page_to_pfn(page); ++ pb[j].offset = offset; ++ pb[j].len = bytes; ++ ++ offset += bytes; ++ len -= bytes; ++ ++ if (offset == PAGE_SIZE && len) { ++ page++; ++ offset = 0; ++ j++; ++ } ++ } ++ ++ return j + 1; ++} ++ ++static void init_page_array(void *hdr, u32 len, struct sk_buff *skb, ++ struct hv_page_buffer *pb) ++{ ++ u32 slots_used = 0; ++ char *data = skb->data; ++ int frags = skb_shinfo(skb)->nr_frags; ++ int i; ++ ++ /* The packet is laid out thus: ++ * 1. hdr ++ * 2. skb linear data ++ * 3. skb fragment data ++ */ ++ if (hdr != NULL) ++ slots_used += fill_pg_buf(virt_to_page(hdr), ++ offset_in_page(hdr), ++ len, &pb[slots_used]); ++ ++ slots_used += fill_pg_buf(virt_to_page(data), ++ offset_in_page(data), ++ skb_headlen(skb), &pb[slots_used]); ++ ++ for (i = 0; i < frags; i++) { ++ skb_frag_t *frag = skb_shinfo(skb)->frags + i; ++ ++ slots_used += fill_pg_buf(skb_frag_page(frag), ++ frag->page_offset, ++ skb_frag_size(frag), &pb[slots_used]); ++ } ++} ++ ++static int count_skb_frag_slots(struct sk_buff *skb) ++{ ++ int i, frags = skb_shinfo(skb)->nr_frags; ++ int pages = 0; ++ ++ for (i = 0; i < frags; i++) { ++ skb_frag_t *frag = skb_shinfo(skb)->frags + i; ++ unsigned long size = skb_frag_size(frag); ++ unsigned long offset = frag->page_offset; ++ ++ /* Skip unused frames from start of page */ ++ offset &= ~PAGE_MASK; ++ pages += PFN_UP(offset + size); ++ } ++ return pages; ++} ++ ++static int netvsc_get_slots(struct sk_buff *skb) ++{ ++ char *data = skb->data; ++ unsigned int offset = offset_in_page(data); ++ unsigned int len = skb_headlen(skb); ++ int slots; ++ int frag_slots; ++ ++ slots = DIV_ROUND_UP(offset + len, PAGE_SIZE); ++ frag_slots = count_skb_frag_slots(skb); ++ return slots + frag_slots; ++} ++ + static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + { + struct net_device_context *net_device_ctx = netdev_priv(net); + struct hv_netvsc_packet *packet; + int ret; +- unsigned int i, num_pages, npg_data; ++ unsigned int num_data_pages; + u32 skb_length = skb->len; + +- /* Add multipages for skb->data and additional 2 for RNDIS */ +- npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1) +- >> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1; +- num_pages = skb_shinfo(skb)->nr_frags + npg_data + 2; ++ /* We will atmost need two pages to describe the rndis ++ * header. We can only transmit MAX_PAGE_BUFFER_COUNT number ++ * of pages in a single packet. ++ */ ++ num_data_pages = netvsc_get_slots(skb) + 2; ++ if (num_data_pages > MAX_PAGE_BUFFER_COUNT) { ++ netdev_err(net, "Packet too big: %u\n", skb->len); ++ dev_kfree_skb(skb); ++ net->stats.tx_dropped++; ++ return NETDEV_TX_OK; ++ } + + /* Allocate a netvsc packet based on # of frags. */ + packet = kzalloc(sizeof(struct hv_netvsc_packet) + +- (num_pages * sizeof(struct hv_page_buffer)) + ++ (num_data_pages * sizeof(struct hv_page_buffer)) + + sizeof(struct rndis_message) + + NDIS_VLAN_PPI_SIZE, GFP_ATOMIC); + if (!packet) { +@@ -170,44 +272,17 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + packet->vlan_tci = skb->vlan_tci; + + packet->extension = (void *)(unsigned long)packet + +- sizeof(struct hv_netvsc_packet) + +- (num_pages * sizeof(struct hv_page_buffer)); ++ sizeof(struct hv_netvsc_packet) + ++ (num_data_pages * sizeof(struct hv_page_buffer)); + + /* If the rndis msg goes beyond 1 page, we will add 1 later */ +- packet->page_buf_cnt = num_pages - 1; ++ packet->page_buf_cnt = num_data_pages - 1; + + /* Initialize it from the skb */ + packet->total_data_buflen = skb->len; + + /* Start filling in the page buffers starting after RNDIS buffer. */ +- packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT; +- packet->page_buf[1].offset +- = (unsigned long)skb->data & (PAGE_SIZE - 1); +- if (npg_data == 1) +- packet->page_buf[1].len = skb_headlen(skb); +- else +- packet->page_buf[1].len = PAGE_SIZE +- - packet->page_buf[1].offset; +- +- for (i = 2; i <= npg_data; i++) { +- packet->page_buf[i].pfn = virt_to_phys(skb->data +- + PAGE_SIZE * (i-1)) >> PAGE_SHIFT; +- packet->page_buf[i].offset = 0; +- packet->page_buf[i].len = PAGE_SIZE; +- } +- if (npg_data > 1) +- packet->page_buf[npg_data].len = (((unsigned long)skb->data +- + skb_headlen(skb) - 1) & (PAGE_SIZE - 1)) + 1; +- +- /* Additional fragments are after SKB data */ +- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { +- const skb_frag_t *f = &skb_shinfo(skb)->frags[i]; +- +- packet->page_buf[i+npg_data+1].pfn = +- page_to_pfn(skb_frag_page(f)); +- packet->page_buf[i+npg_data+1].offset = f->page_offset; +- packet->page_buf[i+npg_data+1].len = skb_frag_size(f); +- } ++ init_page_array(NULL, 0, skb, &packet->page_buf[1]); + + /* Set the completion routine */ + packet->completion.send.send_completion = netvsc_xmit_completion; +@@ -454,8 +529,8 @@ static int netvsc_probe(struct hv_device *dev, + net->netdev_ops = &device_ops; + + /* TODO: Add GSO and Checksum offload */ +- net->hw_features = 0; +- net->features = NETIF_F_HW_VLAN_CTAG_TX; ++ net->hw_features = NETIF_F_SG; ++ net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG; + + SET_ETHTOOL_OPS(net, ðtool_ops); + SET_NETDEV_DEV(net, &dev->device); +-- +2.4.3 + diff --git a/src/patches/linux/0006-Drivers-net-hyperv-Cleanup-the-send-path.patch b/src/patches/linux/0006-Drivers-net-hyperv-Cleanup-the-send-path.patch new file mode 100644 index 0000000000..20fcb3f607 --- /dev/null +++ b/src/patches/linux/0006-Drivers-net-hyperv-Cleanup-the-send-path.patch @@ -0,0 +1,266 @@ +From d972eb71fb95660fe74616901b55b0d7a336daed Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sat, 8 Mar 2014 19:23:14 -0800 +Subject: [PATCH 06/25] Drivers: net: hyperv: Cleanup the send path + +In preparation for enabling offloads, cleanup the send path. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 7 +--- + drivers/net/hyperv/netvsc_drv.c | 88 +++++++++++++++++++++++++++++++-------- + drivers/net/hyperv/rndis_filter.c | 66 ----------------------------- + 3 files changed, 71 insertions(+), 90 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 39fc230f5c20..694bf7cada90 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -73,7 +73,7 @@ struct hv_netvsc_packet { + } completion; + + /* This points to the memory after page_buf */ +- void *extension; ++ struct rndis_message *rndis_msg; + + u32 total_data_buflen; + /* Points to the send/receive buffer where the ethernet frame is */ +@@ -126,11 +126,6 @@ void rndis_filter_device_remove(struct hv_device *dev); + int rndis_filter_receive(struct hv_device *dev, + struct hv_netvsc_packet *pkt); + +- +- +-int rndis_filter_send(struct hv_device *dev, +- struct hv_netvsc_packet *pkt); +- + int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter); + int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac); + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 72961741be54..87293a15e470 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -128,6 +128,27 @@ static int netvsc_close(struct net_device *net) + return ret; + } + ++static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, ++ int pkt_type) ++{ ++ struct rndis_packet *rndis_pkt; ++ struct rndis_per_packet_info *ppi; ++ ++ rndis_pkt = &msg->msg.pkt; ++ rndis_pkt->data_offset += ppi_size; ++ ++ ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt + ++ rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len); ++ ++ ppi->size = ppi_size; ++ ppi->type = pkt_type; ++ ppi->ppi_offset = sizeof(struct rndis_per_packet_info); ++ ++ rndis_pkt->per_pkt_info_len += ppi_size; ++ ++ return ppi; ++} ++ + static void netvsc_xmit_completion(void *context) + { + struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; +@@ -174,8 +195,8 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, + return j + 1; + } + +-static void init_page_array(void *hdr, u32 len, struct sk_buff *skb, +- struct hv_page_buffer *pb) ++static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, ++ struct hv_page_buffer *pb) + { + u32 slots_used = 0; + char *data = skb->data; +@@ -203,6 +224,7 @@ static void init_page_array(void *hdr, u32 len, struct sk_buff *skb, + frag->page_offset, + skb_frag_size(frag), &pb[slots_used]); + } ++ return slots_used; + } + + static int count_skb_frag_slots(struct sk_buff *skb) +@@ -240,15 +262,20 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + struct net_device_context *net_device_ctx = netdev_priv(net); + struct hv_netvsc_packet *packet; + int ret; +- unsigned int num_data_pages; + u32 skb_length = skb->len; ++ unsigned int num_data_pgs; ++ struct rndis_message *rndis_msg; ++ struct rndis_packet *rndis_pkt; ++ u32 rndis_msg_size; ++ bool isvlan; ++ struct rndis_per_packet_info *ppi; + + /* We will atmost need two pages to describe the rndis + * header. We can only transmit MAX_PAGE_BUFFER_COUNT number + * of pages in a single packet. + */ +- num_data_pages = netvsc_get_slots(skb) + 2; +- if (num_data_pages > MAX_PAGE_BUFFER_COUNT) { ++ num_data_pgs = netvsc_get_slots(skb) + 2; ++ if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { + netdev_err(net, "Packet too big: %u\n", skb->len); + dev_kfree_skb(skb); + net->stats.tx_dropped++; +@@ -257,7 +284,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + + /* Allocate a netvsc packet based on # of frags. */ + packet = kzalloc(sizeof(struct hv_netvsc_packet) + +- (num_data_pages * sizeof(struct hv_page_buffer)) + ++ (num_data_pgs * sizeof(struct hv_page_buffer)) + + sizeof(struct rndis_message) + + NDIS_VLAN_PPI_SIZE, GFP_ATOMIC); + if (!packet) { +@@ -271,26 +298,51 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + + packet->vlan_tci = skb->vlan_tci; + +- packet->extension = (void *)(unsigned long)packet + +- sizeof(struct hv_netvsc_packet) + +- (num_data_pages * sizeof(struct hv_page_buffer)); +- +- /* If the rndis msg goes beyond 1 page, we will add 1 later */ +- packet->page_buf_cnt = num_data_pages - 1; +- +- /* Initialize it from the skb */ ++ packet->is_data_pkt = true; + packet->total_data_buflen = skb->len; + +- /* Start filling in the page buffers starting after RNDIS buffer. */ +- init_page_array(NULL, 0, skb, &packet->page_buf[1]); ++ packet->rndis_msg = (struct rndis_message *)((unsigned long)packet + ++ sizeof(struct hv_netvsc_packet) + ++ (num_data_pgs * sizeof(struct hv_page_buffer))); + + /* Set the completion routine */ + packet->completion.send.send_completion = netvsc_xmit_completion; + packet->completion.send.send_completion_ctx = packet; + packet->completion.send.send_completion_tid = (unsigned long)skb; + +- ret = rndis_filter_send(net_device_ctx->device_ctx, +- packet); ++ isvlan = packet->vlan_tci & VLAN_TAG_PRESENT; ++ ++ /* Add the rndis header */ ++ rndis_msg = packet->rndis_msg; ++ rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; ++ rndis_msg->msg_len = packet->total_data_buflen; ++ rndis_pkt = &rndis_msg->msg.pkt; ++ rndis_pkt->data_offset = sizeof(struct rndis_packet); ++ rndis_pkt->data_len = packet->total_data_buflen; ++ rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); ++ ++ rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); ++ ++ if (isvlan) { ++ struct ndis_pkt_8021q_info *vlan; ++ ++ rndis_msg_size += NDIS_VLAN_PPI_SIZE; ++ ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, ++ IEEE_8021Q_INFO); ++ vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + ++ ppi->ppi_offset); ++ vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK; ++ vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >> ++ VLAN_PRIO_SHIFT; ++ } ++ ++ /* Start filling in the page buffers with the rndis hdr */ ++ rndis_msg->msg_len += rndis_msg_size; ++ packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, ++ skb, &packet->page_buf[0]); ++ ++ ret = netvsc_send(net_device_ctx->device_ctx, packet); ++ + if (ret == 0) { + net->stats.tx_bytes += skb_length; + net->stats.tx_packets++; +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index 6a9f6021f09c..dcbf144ea7da 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -910,69 +910,3 @@ int rndis_filter_close(struct hv_device *dev) + + return rndis_filter_close_device(nvdev->extension); + } +- +-int rndis_filter_send(struct hv_device *dev, +- struct hv_netvsc_packet *pkt) +-{ +- struct rndis_message *rndis_msg; +- struct rndis_packet *rndis_pkt; +- u32 rndis_msg_size; +- bool isvlan = pkt->vlan_tci & VLAN_TAG_PRESENT; +- +- /* Add the rndis header */ +- rndis_msg = (struct rndis_message *)pkt->extension; +- +- rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); +- if (isvlan) +- rndis_msg_size += NDIS_VLAN_PPI_SIZE; +- +- rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; +- rndis_msg->msg_len = pkt->total_data_buflen + +- rndis_msg_size; +- +- rndis_pkt = &rndis_msg->msg.pkt; +- rndis_pkt->data_offset = sizeof(struct rndis_packet); +- if (isvlan) +- rndis_pkt->data_offset += NDIS_VLAN_PPI_SIZE; +- rndis_pkt->data_len = pkt->total_data_buflen; +- +- if (isvlan) { +- struct rndis_per_packet_info *ppi; +- struct ndis_pkt_8021q_info *vlan; +- +- rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); +- rndis_pkt->per_pkt_info_len = NDIS_VLAN_PPI_SIZE; +- +- ppi = (struct rndis_per_packet_info *)((ulong)rndis_pkt + +- rndis_pkt->per_pkt_info_offset); +- ppi->size = NDIS_VLAN_PPI_SIZE; +- ppi->type = IEEE_8021Q_INFO; +- ppi->ppi_offset = sizeof(struct rndis_per_packet_info); +- +- vlan = (struct ndis_pkt_8021q_info *)((ulong)ppi + +- ppi->ppi_offset); +- vlan->vlanid = pkt->vlan_tci & VLAN_VID_MASK; +- vlan->pri = (pkt->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; +- } +- +- pkt->is_data_pkt = true; +- pkt->page_buf[0].pfn = virt_to_phys(rndis_msg) >> PAGE_SHIFT; +- pkt->page_buf[0].offset = +- (unsigned long)rndis_msg & (PAGE_SIZE-1); +- pkt->page_buf[0].len = rndis_msg_size; +- +- /* Add one page_buf if the rndis msg goes beyond page boundary */ +- if (pkt->page_buf[0].offset + rndis_msg_size > PAGE_SIZE) { +- int i; +- for (i = pkt->page_buf_cnt; i > 1; i--) +- pkt->page_buf[i] = pkt->page_buf[i-1]; +- pkt->page_buf_cnt++; +- pkt->page_buf[0].len = PAGE_SIZE - pkt->page_buf[0].offset; +- pkt->page_buf[1].pfn = virt_to_phys((void *)((ulong) +- rndis_msg + pkt->page_buf[0].len)) >> PAGE_SHIFT; +- pkt->page_buf[1].offset = 0; +- pkt->page_buf[1].len = rndis_msg_size - pkt->page_buf[0].len; +- } +- +- return netvsc_send(dev, pkt); +-} +-- +2.4.3 + diff --git a/src/patches/linux/0006-net-get-rid-of-SET_ETHTOOL_OPS.patch b/src/patches/linux/0006-net-get-rid-of-SET_ETHTOOL_OPS.patch new file mode 100644 index 0000000000..e4a7a4699c --- /dev/null +++ b/src/patches/linux/0006-net-get-rid-of-SET_ETHTOOL_OPS.patch @@ -0,0 +1,44 @@ +From 7ad24ea4bf620a32631d7b3069c3e30c078b0c3e Mon Sep 17 00:00:00 2001 +From: Wilfried Klaebe +Date: Sun, 11 May 2014 00:12:32 +0000 +Subject: [PATCH 06/11] net: get rid of SET_ETHTOOL_OPS + +net: get rid of SET_ETHTOOL_OPS + +Dave Miller mentioned he'd like to see SET_ETHTOOL_OPS gone. +This does that. + +Mostly done via coccinelle script: +@@ +struct ethtool_ops *ops; +struct net_device *dev; +@@ +- SET_ETHTOOL_OPS(dev, ops); ++ dev->ethtool_ops = ops; + +Compile tested only, but I'd seriously wonder if this broke anything. + +Suggested-by: Dave Miller +Signed-off-by: Wilfried Klaebe +Acked-by: Felipe Balbi +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc_drv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 1de3ef5dd5d2..2e967a7bdb33 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -810,7 +810,7 @@ static int netvsc_probe(struct hv_device *dev, + net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | + NETIF_F_IP_CSUM | NETIF_F_TSO; + +- SET_ETHTOOL_OPS(net, ðtool_ops); ++ net->ethtool_ops = ðtool_ops; + SET_NETDEV_DEV(net, &dev->device); + + /* Notify the netvsc driver of the new device */ +-- +2.4.3 + diff --git a/src/patches/linux/0007-Drivers-net-hyperv-Enable-offloads-on-the-host.patch b/src/patches/linux/0007-Drivers-net-hyperv-Enable-offloads-on-the-host.patch new file mode 100644 index 0000000000..a109f04b65 --- /dev/null +++ b/src/patches/linux/0007-Drivers-net-hyperv-Enable-offloads-on-the-host.patch @@ -0,0 +1,196 @@ +From 6b15b5f37e976a5f3840c7ea59560e10c6251250 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sat, 8 Mar 2014 19:23:15 -0800 +Subject: [PATCH 07/25] Drivers: net: hyperv: Enable offloads on the host + +Prior to enabling guest side offloads, enable the offloads on the host. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 55 +++++++++++++++++++++++++++ + drivers/net/hyperv/rndis_filter.c | 80 +++++++++++++++++++++++++++++++++++++++ + 2 files changed, 135 insertions(+) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 694bf7cada90..8bc4e766589b 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -721,6 +721,61 @@ struct ndis_pkt_8021q_info { + }; + }; + ++struct ndis_oject_header { ++ u8 type; ++ u8 revision; ++ u16 size; ++}; ++ ++#define NDIS_OBJECT_TYPE_DEFAULT 0x80 ++#define NDIS_OFFLOAD_PARAMETERS_REVISION_3 3 ++#define NDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0 ++#define NDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1 ++#define NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 2 ++#define NDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED 2 ++#define NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1 ++#define NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2 ++#define NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1 ++#define NDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2 ++#define NDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3 ++#define NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4 ++ ++/* ++ * New offload OIDs for NDIS 6 ++ */ ++#define OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */ ++#define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C /* set only */ ++#define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */ ++#define OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */ ++#define OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */ ++#define OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */ ++ ++struct ndis_offload_params { ++ struct ndis_oject_header header; ++ u8 ip_v4_csum; ++ u8 tcp_ip_v4_csum; ++ u8 udp_ip_v4_csum; ++ u8 tcp_ip_v6_csum; ++ u8 udp_ip_v6_csum; ++ u8 lso_v1; ++ u8 ip_sec_v1; ++ u8 lso_v2_ipv4; ++ u8 lso_v2_ipv6; ++ u8 tcp_connection_ip_v4; ++ u8 tcp_connection_ip_v6; ++ u32 flags; ++ u8 ip_sec_v2; ++ u8 ip_sec_v2_ip_v4; ++ struct { ++ u8 rsc_ip_v4; ++ u8 rsc_ip_v6; ++ }; ++ struct { ++ u8 encapsulated_packet_task_offload; ++ u8 encapsulation_types; ++ }; ++}; ++ + #define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(struct ndis_pkt_8021q_info)) + +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index dcbf144ea7da..9b02f21097a7 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -627,6 +627,61 @@ cleanup: + return ret; + } + ++int rndis_filter_set_offload_params(struct hv_device *hdev, ++ struct ndis_offload_params *req_offloads) ++{ ++ struct netvsc_device *nvdev = hv_get_drvdata(hdev); ++ struct rndis_device *rdev = nvdev->extension; ++ struct net_device *ndev = nvdev->ndev; ++ struct rndis_request *request; ++ struct rndis_set_request *set; ++ struct ndis_offload_params *offload_params; ++ struct rndis_set_complete *set_complete; ++ u32 extlen = sizeof(struct ndis_offload_params); ++ int ret, t; ++ ++ request = get_rndis_request(rdev, RNDIS_MSG_SET, ++ RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); ++ if (!request) ++ return -ENOMEM; ++ ++ set = &request->request_msg.msg.set_req; ++ set->oid = OID_TCP_OFFLOAD_PARAMETERS; ++ set->info_buflen = extlen; ++ set->info_buf_offset = sizeof(struct rndis_set_request); ++ set->dev_vc_handle = 0; ++ ++ offload_params = (struct ndis_offload_params *)((ulong)set + ++ set->info_buf_offset); ++ *offload_params = *req_offloads; ++ offload_params->header.type = NDIS_OBJECT_TYPE_DEFAULT; ++ offload_params->header.revision = NDIS_OFFLOAD_PARAMETERS_REVISION_3; ++ offload_params->header.size = extlen; ++ ++ ret = rndis_filter_send_request(rdev, request); ++ if (ret != 0) ++ goto cleanup; ++ ++ t = wait_for_completion_timeout(&request->wait_event, 5*HZ); ++ if (t == 0) { ++ netdev_err(ndev, "timeout before we got aOFFLOAD set response...\n"); ++ /* can't put_rndis_request, since we may still receive a ++ * send-completion. ++ */ ++ return -EBUSY; ++ } else { ++ set_complete = &request->response_msg.msg.set_complete; ++ if (set_complete->status != RNDIS_STATUS_SUCCESS) { ++ netdev_err(ndev, "Fail to set MAC on host side:0x%x\n", ++ set_complete->status); ++ ret = -EINVAL; ++ } ++ } ++ ++cleanup: ++ put_rndis_request(rdev, request); ++ return ret; ++} + + static int rndis_filter_query_device_link_status(struct rndis_device *dev) + { +@@ -826,6 +881,7 @@ int rndis_filter_device_add(struct hv_device *dev, + struct netvsc_device *net_device; + struct rndis_device *rndis_device; + struct netvsc_device_info *device_info = additional_info; ++ struct ndis_offload_params offloads; + + rndis_device = get_rndis_device(); + if (!rndis_device) +@@ -865,6 +921,26 @@ int rndis_filter_device_add(struct hv_device *dev, + + memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); + ++ /* Turn on the offloads; the host supports all of the relevant ++ * offloads. ++ */ ++ memset(&offloads, 0, sizeof(struct ndis_offload_params)); ++ /* A value of zero means "no change"; now turn on what we ++ * want. ++ */ ++ offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; ++ offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; ++ offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; ++ offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; ++ offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; ++ offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; ++ ++ ++ ret = rndis_filter_set_offload_params(dev, &offloads); ++ if (ret) ++ goto err_dev_remv; ++ ++ + rndis_filter_query_device_link_status(rndis_device); + + device_info->link_state = rndis_device->link_state; +@@ -874,6 +950,10 @@ int rndis_filter_device_add(struct hv_device *dev, + device_info->link_state ? "down" : "up"); + + return ret; ++ ++err_dev_remv: ++ rndis_filter_device_remove(dev); ++ return ret; + } + + void rndis_filter_device_remove(struct hv_device *dev) +-- +2.4.3 + diff --git a/src/patches/linux/0007-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch b/src/patches/linux/0007-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch new file mode 100644 index 0000000000..0db5f72a2b --- /dev/null +++ b/src/patches/linux/0007-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch @@ -0,0 +1,93 @@ +From 307f099520b66504cf6c5638f3f404c48b9fb45b Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Wed, 21 May 2014 12:55:39 -0700 +Subject: [PATCH 07/11] hyperv: Add hash value into RNDIS Per-packet info + +It passes the hash value as the RNDIS Per-packet info to the Hyper-V host, +so that the send completion notices can be spread across multiple channels. +MS-TFS: 140273 + +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 4 ++++ + drivers/net/hyperv/netvsc_drv.c | 18 ++++++++++++++---- + 2 files changed, 18 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 4b7df5a5c966..6cc37c15e0bf 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -791,6 +791,7 @@ enum ndis_per_pkt_info_type { + IEEE_8021Q_INFO, + ORIGINAL_PKTINFO, + PACKET_CANCEL_ID, ++ NBL_HASH_VALUE = PACKET_CANCEL_ID, + ORIGINAL_NET_BUFLIST, + CACHED_NET_BUFLIST, + SHORT_PKT_PADINFO, +@@ -937,6 +938,9 @@ struct ndis_tcp_lso_info { + #define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(struct ndis_tcp_lso_info)) + ++#define NDIS_HASH_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ ++ sizeof(u32)) ++ + /* Format of Information buffer passed in a SetRequest for the OID */ + /* OID_GEN_RNDIS_CONFIG_PARAMETER. */ + struct rndis_config_parameter_info { +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 2e967a7bdb33..4fd71b75e666 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -224,9 +224,11 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, + if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) + return 0; + +- if (netvsc_set_hash(&hash, skb)) ++ if (netvsc_set_hash(&hash, skb)) { + q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % + ndev->real_num_tx_queues; ++ skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); ++ } + + return q_idx; + } +@@ -384,6 +386,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + struct ndis_tcp_lso_info *lso_info; + int hdr_offset; + u32 net_trans_info; ++ u32 hash; + + + /* We will atmost need two pages to describe the rndis +@@ -402,9 +405,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + packet = kzalloc(sizeof(struct hv_netvsc_packet) + + (num_data_pgs * sizeof(struct hv_page_buffer)) + + sizeof(struct rndis_message) + +- NDIS_VLAN_PPI_SIZE + +- NDIS_CSUM_PPI_SIZE + +- NDIS_LSO_PPI_SIZE, GFP_ATOMIC); ++ NDIS_VLAN_PPI_SIZE + NDIS_CSUM_PPI_SIZE + ++ NDIS_LSO_PPI_SIZE + NDIS_HASH_PPI_SIZE, GFP_ATOMIC); + if (!packet) { + /* out of memory, drop packet */ + netdev_err(net, "unable to allocate hv_netvsc_packet\n"); +@@ -443,6 +445,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + + rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); + ++ hash = skb_get_hash_raw(skb); ++ if (hash != 0 && net->real_num_tx_queues > 1) { ++ rndis_msg_size += NDIS_HASH_PPI_SIZE; ++ ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, ++ NBL_HASH_VALUE); ++ *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; ++ } ++ + if (isvlan) { + struct ndis_pkt_8021q_info *vlan; + +-- +2.4.3 + diff --git a/src/patches/linux/0008-Drivers-net-hyperv-Enable-receive-side-IP-checksum-o.patch b/src/patches/linux/0008-Drivers-net-hyperv-Enable-receive-side-IP-checksum-o.patch new file mode 100644 index 0000000000..e77e9f2c41 --- /dev/null +++ b/src/patches/linux/0008-Drivers-net-hyperv-Enable-receive-side-IP-checksum-o.patch @@ -0,0 +1,147 @@ +From 59e0c70c618668522a9431686f7e3a69ef396dff Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sat, 8 Mar 2014 19:23:16 -0800 +Subject: [PATCH 08/25] Drivers: net: hyperv: Enable receive side IP checksum + offload + +Enable receive side checksum offload. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 33 ++++++++++++++++++++++++++++++++- + drivers/net/hyperv/netvsc_drv.c | 19 +++++++++++++++---- + drivers/net/hyperv/rndis_filter.c | 4 +++- + 3 files changed, 50 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 8bc4e766589b..faeb74623fbd 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -30,6 +30,7 @@ + + /* Fwd declaration */ + struct hv_netvsc_packet; ++struct ndis_tcp_ip_checksum_info; + + /* Represent the xfer page packet which contains 1 or more netvsc packet */ + struct xferpage_packet { +@@ -117,7 +118,8 @@ int netvsc_send(struct hv_device *device, + void netvsc_linkstatus_callback(struct hv_device *device_obj, + unsigned int status); + int netvsc_recv_callback(struct hv_device *device_obj, +- struct hv_netvsc_packet *packet); ++ struct hv_netvsc_packet *packet, ++ struct ndis_tcp_ip_checksum_info *csum_info); + int rndis_filter_open(struct hv_device *dev); + int rndis_filter_close(struct hv_device *dev); + int rndis_filter_device_add(struct hv_device *dev, +@@ -776,9 +778,38 @@ struct ndis_offload_params { + }; + }; + ++struct ndis_tcp_ip_checksum_info { ++ union { ++ struct { ++ u32 is_ipv4:1; ++ u32 is_ipv6:1; ++ u32 tcp_checksum:1; ++ u32 udp_checksum:1; ++ u32 ip_header_checksum:1; ++ u32 reserved:11; ++ u32 tcp_header_offset:10; ++ } transmit; ++ struct { ++ u32 tcp_checksum_failed:1; ++ u32 udp_checksum_failed:1; ++ u32 ip_checksum_failed:1; ++ u32 tcp_checksum_succeeded:1; ++ u32 udp_checksum_succeeded:1; ++ u32 ip_checksum_succeeded:1; ++ u32 loopback:1; ++ u32 tcp_checksum_value_invalid:1; ++ u32 ip_checksum_value_invalid:1; ++ } receive; ++ u32 value; ++ }; ++}; ++ + #define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(struct ndis_pkt_8021q_info)) + ++#define NDIS_CSUM_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ ++ sizeof(struct ndis_tcp_ip_checksum_info)) ++ + /* Format of Information buffer passed in a SetRequest for the OID */ + /* OID_GEN_RNDIS_CONFIG_PARAMETER. */ + struct rndis_config_parameter_info { +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 87293a15e470..7438360b7a02 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -392,7 +392,8 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, + * "wire" on the specified device. + */ + int netvsc_recv_callback(struct hv_device *device_obj, +- struct hv_netvsc_packet *packet) ++ struct hv_netvsc_packet *packet, ++ struct ndis_tcp_ip_checksum_info *csum_info) + { + struct net_device *net; + struct sk_buff *skb; +@@ -419,7 +420,17 @@ int netvsc_recv_callback(struct hv_device *device_obj, + packet->total_data_buflen); + + skb->protocol = eth_type_trans(skb, net); +- skb->ip_summed = CHECKSUM_NONE; ++ if (csum_info) { ++ /* We only look at the IP checksum here. ++ * Should we be dropping the packet if checksum ++ * failed? How do we deal with other checksums - TCP/UDP? ++ */ ++ if (csum_info->receive.ip_checksum_succeeded) ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ else ++ skb->ip_summed = CHECKSUM_NONE; ++ } ++ + if (packet->vlan_tci & VLAN_TAG_PRESENT) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + packet->vlan_tci); +@@ -581,8 +592,8 @@ static int netvsc_probe(struct hv_device *dev, + net->netdev_ops = &device_ops; + + /* TODO: Add GSO and Checksum offload */ +- net->hw_features = NETIF_F_SG; +- net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG; ++ net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG; ++ net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM; + + SET_ETHTOOL_OPS(net, ðtool_ops); + SET_NETDEV_DEV(net, &dev->device); +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index 9b02f21097a7..4a37e3db9e32 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -370,6 +370,7 @@ static void rndis_filter_receive_data(struct rndis_device *dev, + struct rndis_packet *rndis_pkt; + u32 data_offset; + struct ndis_pkt_8021q_info *vlan; ++ struct ndis_tcp_ip_checksum_info *csum_info; + + rndis_pkt = &msg->msg.pkt; + +@@ -408,7 +409,8 @@ static void rndis_filter_receive_data(struct rndis_device *dev, + pkt->vlan_tci = 0; + } + +- netvsc_recv_callback(dev->net_dev->dev, pkt); ++ csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO); ++ netvsc_recv_callback(dev->net_dev->dev, pkt, csum_info); + } + + int rndis_filter_receive(struct hv_device *dev, +-- +2.4.3 + diff --git a/src/patches/linux/0008-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch b/src/patches/linux/0008-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch new file mode 100644 index 0000000000..b774f817f3 --- /dev/null +++ b/src/patches/linux/0008-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch @@ -0,0 +1,32 @@ +From 2f18423d7ebf8044a9edaca840be5ae199fb32f6 Mon Sep 17 00:00:00 2001 +From: Dave Jones +Date: Mon, 16 Jun 2014 16:59:02 -0400 +Subject: [PATCH 08/11] hyperv: fix apparent cut-n-paste error in send path + teardown + +c25aaf814a63: "hyperv: Enable sendbuf mechanism on the send path" added +some teardown code that looks like it was copied from the recieve path +above, but missed a variable name replacement. + +Signed-off-by: Dave Jones +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index c041f63a6d30..4ed38eaecea8 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -189,7 +189,7 @@ static int netvsc_destroy_buf(struct netvsc_device *net_device) + "unable to teardown send buffer's gpadl\n"); + return ret; + } +- net_device->recv_buf_gpadl_handle = 0; ++ net_device->send_buf_gpadl_handle = 0; + } + if (net_device->send_buf) { + /* Free up the receive buffer */ +-- +2.4.3 + diff --git a/src/patches/linux/0009-Drivers-net-hyperv-Enable-send-side-checksum-offload.patch b/src/patches/linux/0009-Drivers-net-hyperv-Enable-send-side-checksum-offload.patch new file mode 100644 index 0000000000..db421089c8 --- /dev/null +++ b/src/patches/linux/0009-Drivers-net-hyperv-Enable-send-side-checksum-offload.patch @@ -0,0 +1,140 @@ +From d2a0be7f4263eb669af84240c5424a72cce4cdb4 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sat, 8 Mar 2014 19:23:17 -0800 +Subject: [PATCH 09/25] Drivers: net: hyperv: Enable send side checksum offload + +Enable send side checksum offload. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 10 ++++++ + drivers/net/hyperv/netvsc_drv.c | 69 +++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 77 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index faeb74623fbd..4cf238234321 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -1035,6 +1035,16 @@ struct rndis_message { + #define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400 + #define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800 + ++#define INFO_IPV4 2 ++#define INFO_IPV6 4 ++#define INFO_TCP 2 ++#define INFO_UDP 4 ++ ++#define TRANSPORT_INFO_NOT_IP 0 ++#define TRANSPORT_INFO_IPV4_TCP ((INFO_IPV4 << 16) | INFO_TCP) ++#define TRANSPORT_INFO_IPV4_UDP ((INFO_IPV4 << 16) | INFO_UDP) ++#define TRANSPORT_INFO_IPV6_TCP ((INFO_IPV6 << 16) | INFO_TCP) ++#define TRANSPORT_INFO_IPV6_UDP ((INFO_IPV6 << 16) | INFO_UDP) + + + #endif /* _HYPERV_NET_H */ +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 7438360b7a02..2fab69ac61ef 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -257,6 +257,35 @@ static int netvsc_get_slots(struct sk_buff *skb) + return slots + frag_slots; + } + ++static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off) ++{ ++ u32 ret_val = TRANSPORT_INFO_NOT_IP; ++ ++ if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) && ++ (eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) { ++ goto not_ip; ++ } ++ ++ *trans_off = skb_transport_offset(skb); ++ ++ if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) { ++ struct iphdr *iphdr = ip_hdr(skb); ++ ++ if (iphdr->protocol == IPPROTO_TCP) ++ ret_val = TRANSPORT_INFO_IPV4_TCP; ++ else if (iphdr->protocol == IPPROTO_UDP) ++ ret_val = TRANSPORT_INFO_IPV4_UDP; ++ } else { ++ if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) ++ ret_val = TRANSPORT_INFO_IPV6_TCP; ++ else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) ++ ret_val = TRANSPORT_INFO_IPV6_UDP; ++ } ++ ++not_ip: ++ return ret_val; ++} ++ + static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + { + struct net_device_context *net_device_ctx = netdev_priv(net); +@@ -269,6 +298,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + u32 rndis_msg_size; + bool isvlan; + struct rndis_per_packet_info *ppi; ++ struct ndis_tcp_ip_checksum_info *csum_info; ++ int hdr_offset; ++ u32 net_trans_info; ++ + + /* We will atmost need two pages to describe the rndis + * header. We can only transmit MAX_PAGE_BUFFER_COUNT number +@@ -336,6 +369,37 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + VLAN_PRIO_SHIFT; + } + ++ net_trans_info = get_net_transport_info(skb, &hdr_offset); ++ if (net_trans_info == TRANSPORT_INFO_NOT_IP) ++ goto do_send; ++ ++ /* ++ * Setup the sendside checksum offload only if this is not a ++ * GSO packet. ++ */ ++ if (skb_is_gso(skb)) ++ goto do_send; ++ ++ rndis_msg_size += NDIS_CSUM_PPI_SIZE; ++ ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, ++ TCPIP_CHKSUM_PKTINFO); ++ ++ csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + ++ ppi->ppi_offset); ++ ++ if (net_trans_info & (INFO_IPV4 << 16)) ++ csum_info->transmit.is_ipv4 = 1; ++ else ++ csum_info->transmit.is_ipv6 = 1; ++ ++ if (net_trans_info & INFO_TCP) { ++ csum_info->transmit.tcp_checksum = 1; ++ csum_info->transmit.tcp_header_offset = hdr_offset; ++ } else if (net_trans_info & INFO_UDP) { ++ csum_info->transmit.udp_checksum = 1; ++ } ++ ++do_send: + /* Start filling in the page buffers with the rndis hdr */ + rndis_msg->msg_len += rndis_msg_size; + packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, +@@ -592,8 +656,9 @@ static int netvsc_probe(struct hv_device *dev, + net->netdev_ops = &device_ops; + + /* TODO: Add GSO and Checksum offload */ +- net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG; +- net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM; ++ net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM; ++ net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | ++ NETIF_F_IP_CSUM; + + SET_ETHTOOL_OPS(net, ðtool_ops); + SET_NETDEV_DEV(net, &dev->device); +-- +2.4.3 + diff --git a/src/patches/linux/0009-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch b/src/patches/linux/0009-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch new file mode 100644 index 0000000000..fcae5312e4 --- /dev/null +++ b/src/patches/linux/0009-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch @@ -0,0 +1,34 @@ +From dd1d3f8f9920926aa426589e542eed6bf58b7354 Mon Sep 17 00:00:00 2001 +From: Wei Yongjun +Date: Wed, 23 Jul 2014 09:00:35 +0800 +Subject: [PATCH 09/11] hyperv: Fix error return code in netvsc_init_buf() + +Fix to return -ENOMEM from the kalloc error handling +case instead of 0. + +Signed-off-by: Wei Yongjun +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index 4ed38eaecea8..d97d5f39a04e 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -378,8 +378,10 @@ static int netvsc_init_buf(struct hv_device *device) + + net_device->send_section_map = + kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); +- if (net_device->send_section_map == NULL) ++ if (net_device->send_section_map == NULL) { ++ ret = -ENOMEM; + goto cleanup; ++ } + + goto exit; + +-- +2.4.3 + diff --git a/src/patches/linux/0010-Drivers-net-hyperv-Enable-large-send-offload.patch b/src/patches/linux/0010-Drivers-net-hyperv-Enable-large-send-offload.patch new file mode 100644 index 0000000000..36be2a87a0 --- /dev/null +++ b/src/patches/linux/0010-Drivers-net-hyperv-Enable-large-send-offload.patch @@ -0,0 +1,153 @@ +From a4ec4f58017b456281ee17c35fb82dfe4eab2193 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sat, 8 Mar 2014 19:23:18 -0800 +Subject: [PATCH 10/25] Drivers: net: hyperv: Enable large send offload + +Enable segmentation offload. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 40 ++++++++++++++++++++++++++++++++++++++++ + drivers/net/hyperv/netvsc_drv.c | 38 ++++++++++++++++++++++++++++++++++---- + 2 files changed, 74 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 4cf238234321..7d06b4959383 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -742,6 +742,10 @@ struct ndis_oject_header { + #define NDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3 + #define NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4 + ++#define NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE 1 ++#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4 0 ++#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6 1 ++ + /* + * New offload OIDs for NDIS 6 + */ +@@ -804,12 +808,48 @@ struct ndis_tcp_ip_checksum_info { + }; + }; + ++struct ndis_tcp_lso_info { ++ union { ++ struct { ++ u32 unused:30; ++ u32 type:1; ++ u32 reserved2:1; ++ } transmit; ++ struct { ++ u32 mss:20; ++ u32 tcp_header_offset:10; ++ u32 type:1; ++ u32 reserved2:1; ++ } lso_v1_transmit; ++ struct { ++ u32 tcp_payload:30; ++ u32 type:1; ++ u32 reserved2:1; ++ } lso_v1_transmit_complete; ++ struct { ++ u32 mss:20; ++ u32 tcp_header_offset:10; ++ u32 type:1; ++ u32 ip_version:1; ++ } lso_v2_transmit; ++ struct { ++ u32 reserved:30; ++ u32 type:1; ++ u32 reserved2:1; ++ } lso_v2_transmit_complete; ++ u32 value; ++ }; ++}; ++ + #define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(struct ndis_pkt_8021q_info)) + + #define NDIS_CSUM_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(struct ndis_tcp_ip_checksum_info)) + ++#define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ ++ sizeof(struct ndis_tcp_lso_info)) ++ + /* Format of Information buffer passed in a SetRequest for the OID */ + /* OID_GEN_RNDIS_CONFIG_PARAMETER. */ + struct rndis_config_parameter_info { +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 2fab69ac61ef..5baa1fa7e692 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -299,6 +299,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + bool isvlan; + struct rndis_per_packet_info *ppi; + struct ndis_tcp_ip_checksum_info *csum_info; ++ struct ndis_tcp_lso_info *lso_info; + int hdr_offset; + u32 net_trans_info; + +@@ -378,7 +379,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + * GSO packet. + */ + if (skb_is_gso(skb)) +- goto do_send; ++ goto do_lso; + + rndis_msg_size += NDIS_CSUM_PPI_SIZE; + ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, +@@ -398,6 +399,35 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + } else if (net_trans_info & INFO_UDP) { + csum_info->transmit.udp_checksum = 1; + } ++ goto do_send; ++ ++do_lso: ++ rndis_msg_size += NDIS_LSO_PPI_SIZE; ++ ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, ++ TCP_LARGESEND_PKTINFO); ++ ++ lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + ++ ppi->ppi_offset); ++ ++ lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; ++ if (net_trans_info & (INFO_IPV4 << 16)) { ++ lso_info->lso_v2_transmit.ip_version = ++ NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ++ ip_hdr(skb)->tot_len = 0; ++ ip_hdr(skb)->check = 0; ++ tcp_hdr(skb)->check = ++ ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ++ ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); ++ } else { ++ lso_info->lso_v2_transmit.ip_version = ++ NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; ++ ipv6_hdr(skb)->payload_len = 0; ++ tcp_hdr(skb)->check = ++ ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, ++ &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); ++ } ++ lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; ++ lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; + + do_send: + /* Start filling in the page buffers with the rndis hdr */ +@@ -655,10 +685,10 @@ static int netvsc_probe(struct hv_device *dev, + + net->netdev_ops = &device_ops; + +- /* TODO: Add GSO and Checksum offload */ +- net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM; ++ net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | ++ NETIF_F_TSO; + net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | +- NETIF_F_IP_CSUM; ++ NETIF_F_IP_CSUM | NETIF_F_TSO; + + SET_ETHTOOL_OPS(net, ðtool_ops); + SET_NETDEV_DEV(net, &dev->device); +-- +2.4.3 + diff --git a/src/patches/linux/0010-hyperv-Fix-a-bug-in-netvsc_start_xmit.patch b/src/patches/linux/0010-hyperv-Fix-a-bug-in-netvsc_start_xmit.patch new file mode 100644 index 0000000000..5e08805265 --- /dev/null +++ b/src/patches/linux/0010-hyperv-Fix-a-bug-in-netvsc_start_xmit.patch @@ -0,0 +1,47 @@ +From b3e774263908a834c1c0d5abf3a7658280e42fc7 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sun, 28 Sep 2014 22:16:43 -0700 +Subject: [PATCH 10/11] hyperv: Fix a bug in netvsc_start_xmit() + +[ Upstream commit dedb845ded56ded1c62f5398a94ffa8615d4592d ] + +After the packet is successfully sent, we should not touch the skb +as it may have been freed. This patch is based on the work done by +Long Li . + +In this version of the patch I have fixed issues pointed out by David. +David, please queue this up for stable. + +Signed-off-by: K. Y. Srinivasan +Tested-by: Long Li +Tested-by: Sitsofe Wheeler +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 4fd71b75e666..f15297201777 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -387,6 +387,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + int hdr_offset; + u32 net_trans_info; + u32 hash; ++ u32 skb_length = skb->len; + + + /* We will atmost need two pages to describe the rndis +@@ -562,7 +563,7 @@ do_send: + + drop: + if (ret == 0) { +- net->stats.tx_bytes += skb->len; ++ net->stats.tx_bytes += skb_length; + net->stats.tx_packets++; + } else { + kfree(packet); +-- +2.4.3 + diff --git a/src/patches/linux-3.14.x-hyperv-2008-fix.patch b/src/patches/linux/0011-hyperv-Change-the-receive-buffer-size-for-legacy-hos.patch similarity index 81% rename from src/patches/linux-3.14.x-hyperv-2008-fix.patch rename to src/patches/linux/0011-hyperv-Change-the-receive-buffer-size-for-legacy-hos.patch index e538e08e5c..e3ee8cbc99 100644 --- a/src/patches/linux-3.14.x-hyperv-2008-fix.patch +++ b/src/patches/linux/0011-hyperv-Change-the-receive-buffer-size-for-legacy-hos.patch @@ -1,16 +1,20 @@ -From 99d3016de4f2a29635f5382b0e9bd0e5f2151487 Mon Sep 17 00:00:00 2001 +From 56b3f72d3fed7f9b8d17dcf5d81455fa1b4327d7 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Sun, 9 Mar 2014 16:10:59 -0700 -Subject: hyperv: Change the receive buffer size for legacy hosts +Subject: [PATCH 11/25] hyperv: Change the receive buffer size for legacy hosts Due to a bug in the Hyper-V host verion 2008R2, we need to use a slightly smaller receive buffer size, otherwise the buffer will not be accepted by the legacy hosts. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 1 + + drivers/net/hyperv/netvsc.c | 6 +++++- + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h -index 7d06b49..13010b4 100644 +index 7d06b4959383..13010b4dae5b 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -513,6 +513,7 @@ struct nvsp_message { @@ -22,7 +26,7 @@ index 7d06b49..13010b4 100644 #define NETVSC_RECEIVE_BUFFER_ID 0xcafe diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c -index 1a0280d..daddea2 100644 +index 1a0280dcba7e..daddea2654ce 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -365,6 +365,11 @@ static int netvsc_connect_vsp(struct hv_device *device) @@ -46,5 +50,5 @@ index 1a0280d..daddea2 100644 INIT_LIST_HEAD(&net_device->recv_pkt_list); -- -cgit v0.10.2 +2.4.3 diff --git a/src/patches/linux/0011-hyperv-Fix-a-bug-in-netvsc_send.patch b/src/patches/linux/0011-hyperv-Fix-a-bug-in-netvsc_send.patch new file mode 100644 index 0000000000..00f9c5d7b7 --- /dev/null +++ b/src/patches/linux/0011-hyperv-Fix-a-bug-in-netvsc_send.patch @@ -0,0 +1,68 @@ +From 26875bba869bd91a1d8fef9229a56a1e6d9fef2b Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sun, 5 Oct 2014 10:42:51 -0700 +Subject: [PATCH 11/11] hyperv: Fix a bug in netvsc_send() + +[ Upstream commit 3a67c9ccad926a168d8b7891537a452018368a5b ] + +After the packet is successfully sent, we should not touch the packet +as it may have been freed. This patch is based on the work done by +Long Li . + +David, please queue this up for stable. + +Signed-off-by: K. Y. Srinivasan +Reported-by: Sitsofe Wheeler +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index d97d5f39a04e..7edf976ecfa0 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -708,6 +708,7 @@ int netvsc_send(struct hv_device *device, + unsigned int section_index = NETVSC_INVALID_INDEX; + u32 msg_size = 0; + struct sk_buff *skb; ++ u16 q_idx = packet->q_idx; + + + net_device = get_outbound_net_device(device); +@@ -772,24 +773,24 @@ int netvsc_send(struct hv_device *device, + + if (ret == 0) { + atomic_inc(&net_device->num_outstanding_sends); +- atomic_inc(&net_device->queue_sends[packet->q_idx]); ++ atomic_inc(&net_device->queue_sends[q_idx]); + + if (hv_ringbuf_avail_percent(&out_channel->outbound) < + RING_AVAIL_PERCENT_LOWATER) { + netif_tx_stop_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); ++ ndev, q_idx)); + + if (atomic_read(&net_device-> +- queue_sends[packet->q_idx]) < 1) ++ queue_sends[q_idx]) < 1) + netif_tx_wake_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); ++ ndev, q_idx)); + } + } else if (ret == -EAGAIN) { + netif_tx_stop_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); +- if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) { ++ ndev, q_idx)); ++ if (atomic_read(&net_device->queue_sends[q_idx]) < 1) { + netif_tx_wake_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); ++ ndev, q_idx)); + ret = -ENOSPC; + } + } else { +-- +2.4.3 + diff --git a/src/patches/linux/0012-Drivers-net-hyperv-Allocate-memory-for-all-possible-.patch b/src/patches/linux/0012-Drivers-net-hyperv-Allocate-memory-for-all-possible-.patch new file mode 100644 index 0000000000..c626bb06ce --- /dev/null +++ b/src/patches/linux/0012-Drivers-net-hyperv-Allocate-memory-for-all-possible-.patch @@ -0,0 +1,34 @@ +From e0f6906eb7e5b395370da9499189d13b59020382 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Wed, 9 Apr 2014 15:00:45 -0700 +Subject: [PATCH 12/25] Drivers: net: hyperv: Allocate memory for all possible + per-pecket information + +An outgoing packet can potentially need per-packet information for +all the offloads and VLAN tagging. Fix this issue. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc_drv.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 5baa1fa7e692..c76c85176644 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -320,7 +320,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + packet = kzalloc(sizeof(struct hv_netvsc_packet) + + (num_data_pgs * sizeof(struct hv_page_buffer)) + + sizeof(struct rndis_message) + +- NDIS_VLAN_PPI_SIZE, GFP_ATOMIC); ++ NDIS_VLAN_PPI_SIZE + ++ NDIS_CSUM_PPI_SIZE + ++ NDIS_LSO_PPI_SIZE, GFP_ATOMIC); + if (!packet) { + /* out of memory, drop packet */ + netdev_err(net, "unable to allocate hv_netvsc_packet\n"); +-- +2.4.3 + diff --git a/src/patches/linux/0013-Drivers-net-hyperv-Negotiate-suitable-ndis-version-f.patch b/src/patches/linux/0013-Drivers-net-hyperv-Negotiate-suitable-ndis-version-f.patch new file mode 100644 index 0000000000..7044c524d9 --- /dev/null +++ b/src/patches/linux/0013-Drivers-net-hyperv-Negotiate-suitable-ndis-version-f.patch @@ -0,0 +1,32 @@ +From 8a7882bada78e7d7355aafc0ca3c5696d25eb443 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Wed, 9 Apr 2014 15:00:46 -0700 +Subject: [PATCH 13/25] Drivers: net: hyperv: Negotiate suitable ndis version + for offload support + +Ws2008R2 supports ndis_version 6.1 and 6.1 is the minimal version required +for various offloads. Negotiate ndis_version 6.1 when on ws2008r2. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index daddea2654ce..f7629ecefa84 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -344,7 +344,7 @@ static int netvsc_connect_vsp(struct hv_device *device) + memset(init_packet, 0, sizeof(struct nvsp_message)); + + if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4) +- ndis_version = 0x00050001; ++ ndis_version = 0x00060001; + else + ndis_version = 0x0006001e; + +-- +2.4.3 + diff --git a/src/patches/linux/0014-Drivers-net-hyperv-Address-UDP-checksum-issues.patch b/src/patches/linux/0014-Drivers-net-hyperv-Address-UDP-checksum-issues.patch new file mode 100644 index 0000000000..eaf343e8ca --- /dev/null +++ b/src/patches/linux/0014-Drivers-net-hyperv-Address-UDP-checksum-issues.patch @@ -0,0 +1,111 @@ +From b822ee7a17efd1bc8c7584da5d0a2c042e9ca5b6 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Wed, 9 Apr 2014 15:00:47 -0700 +Subject: [PATCH 14/25] Drivers: net: hyperv: Address UDP checksum issues + +ws2008r2 does not support UDP checksum offload. Thus, we cannnot turn on +UDP offload in the host. Also, on ws2012 and ws2012 r2, there appear to be +an issue with UDP checksum offload. +Fix this issue by computing the UDP checksum in the Hyper-V driver. + +Based on Dave Miller's comments, in this version, I have COWed the skb +before modifying the UDP header (the checksum field). + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 1 + + drivers/net/hyperv/netvsc_drv.c | 26 +++++++++++++++++++++++++- + drivers/net/hyperv/rndis_filter.c | 12 +++++++++++- + 3 files changed, 37 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 13010b4dae5b..d18f711d0b0c 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -747,6 +747,7 @@ struct ndis_oject_header { + #define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4 0 + #define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6 1 + ++#define VERSION_4_OFFLOAD_SIZE 22 + /* + * New offload OIDs for NDIS 6 + */ +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index c76c85176644..0d898876689e 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -399,7 +399,30 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + csum_info->transmit.tcp_checksum = 1; + csum_info->transmit.tcp_header_offset = hdr_offset; + } else if (net_trans_info & INFO_UDP) { +- csum_info->transmit.udp_checksum = 1; ++ /* UDP checksum offload is not supported on ws2008r2. ++ * Furthermore, on ws2012 and ws2012r2, there are some ++ * issues with udp checksum offload from Linux guests. ++ * (these are host issues). ++ * For now compute the checksum here. ++ */ ++ struct udphdr *uh; ++ u16 udp_len; ++ ++ ret = skb_cow_head(skb, 0); ++ if (ret) ++ goto drop; ++ ++ uh = udp_hdr(skb); ++ udp_len = ntohs(uh->len); ++ uh->check = 0; ++ uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr, ++ ip_hdr(skb)->daddr, ++ udp_len, IPPROTO_UDP, ++ csum_partial(uh, udp_len, 0)); ++ if (uh->check == 0) ++ uh->check = CSUM_MANGLED_0; ++ ++ csum_info->transmit.udp_checksum = 0; + } + goto do_send; + +@@ -439,6 +462,7 @@ do_send: + + ret = netvsc_send(net_device_ctx->device_ctx, packet); + ++drop: + if (ret == 0) { + net->stats.tx_bytes += skb_length; + net->stats.tx_packets++; +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index 4a37e3db9e32..143a98caf618 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -641,6 +641,16 @@ int rndis_filter_set_offload_params(struct hv_device *hdev, + struct rndis_set_complete *set_complete; + u32 extlen = sizeof(struct ndis_offload_params); + int ret, t; ++ u32 vsp_version = nvdev->nvsp_version; ++ ++ if (vsp_version <= NVSP_PROTOCOL_VERSION_4) { ++ extlen = VERSION_4_OFFLOAD_SIZE; ++ /* On NVSP_PROTOCOL_VERSION_4 and below, we do not support ++ * UDP checksum offload. ++ */ ++ req_offloads->udp_ip_v4_csum = 0; ++ req_offloads->udp_ip_v6_csum = 0; ++ } + + request = get_rndis_request(rdev, RNDIS_MSG_SET, + RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); +@@ -674,7 +684,7 @@ int rndis_filter_set_offload_params(struct hv_device *hdev, + } else { + set_complete = &request->response_msg.msg.set_complete; + if (set_complete->status != RNDIS_STATUS_SUCCESS) { +- netdev_err(ndev, "Fail to set MAC on host side:0x%x\n", ++ netdev_err(ndev, "Fail to set offload on host side:0x%x\n", + set_complete->status); + ret = -EINVAL; + } +-- +2.4.3 + diff --git a/src/patches/linux/0015-hyperv-Properly-handle-checksum-offload.patch b/src/patches/linux/0015-hyperv-Properly-handle-checksum-offload.patch new file mode 100644 index 0000000000..07608334ec --- /dev/null +++ b/src/patches/linux/0015-hyperv-Properly-handle-checksum-offload.patch @@ -0,0 +1,42 @@ +From 540360773bad2b81f14c38ca92b62797fa32cbc6 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Wed, 30 Apr 2014 11:58:25 -0700 +Subject: [PATCH 15/25] hyperv: Properly handle checksum offload + +Do checksum offload only if the client of the driver wants checksum to be +offloaded. + +In V1 version of this patch, I addressed comments from +Stephen Hemminger and +Eric Dumazet . + +In this version of the patch I have addressed comments from +David Miller. + +This patch fixes a bug that is exposed in gateway scenarios. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc_drv.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 0d898876689e..ce6d870dd7ae 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -383,6 +383,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + if (skb_is_gso(skb)) + goto do_lso; + ++ if ((skb->ip_summed == CHECKSUM_NONE) || ++ (skb->ip_summed == CHECKSUM_UNNECESSARY)) ++ goto do_send; ++ + rndis_msg_size += NDIS_CSUM_PPI_SIZE; + ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, + TCPIP_CHKSUM_PKTINFO); +-- +2.4.3 + diff --git a/src/patches/linux/0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch b/src/patches/linux/0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch new file mode 100644 index 0000000000..d01a428e10 --- /dev/null +++ b/src/patches/linux/0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch @@ -0,0 +1,917 @@ +From 44559a96c6864eb6e95db0ae896c621b82e605f3 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Mon, 21 Apr 2014 10:20:28 -0700 +Subject: [PATCH 16/25] hyperv: Add support for virtual Receive Side Scaling + (vRSS) + +This feature allows multiple channels to be used by each virtual NIC. +It is available on Hyper-V host 2012 R2. + +Signed-off-by: Haiyang Zhang +Reviewed-by: K. Y. Srinivasan +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 110 +++++++++++++++++++++- + drivers/net/hyperv/netvsc.c | 136 +++++++++++++++++++++------ + drivers/net/hyperv/netvsc_drv.c | 103 ++++++++++++++++++++- + drivers/net/hyperv/rndis_filter.c | 189 +++++++++++++++++++++++++++++++++++++- + 4 files changed, 504 insertions(+), 34 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index d18f711d0b0c..57eb3f906d64 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -28,6 +28,96 @@ + #include + #include + ++/* RSS related */ ++#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */ ++#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */ ++ ++#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 ++#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 ++ ++#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 ++#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 ++ ++struct ndis_obj_header { ++ u8 type; ++ u8 rev; ++ u16 size; ++} __packed; ++ ++/* ndis_recv_scale_cap/cap_flag */ ++#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 ++#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000 ++#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000 ++#define NDIS_RSS_CAPS_USING_MSI_X 0x08000000 ++#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000 ++#define NDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000 ++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100 ++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200 ++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400 ++ ++struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */ ++ struct ndis_obj_header hdr; ++ u32 cap_flag; ++ u32 num_int_msg; ++ u32 num_recv_que; ++ u16 num_indirect_tabent; ++} __packed; ++ ++ ++/* ndis_recv_scale_param flags */ ++#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001 ++#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002 ++#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004 ++#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008 ++#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010 ++ ++/* Hash info bits */ ++#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001 ++#define NDIS_HASH_IPV4 0x00000100 ++#define NDIS_HASH_TCP_IPV4 0x00000200 ++#define NDIS_HASH_IPV6 0x00000400 ++#define NDIS_HASH_IPV6_EX 0x00000800 ++#define NDIS_HASH_TCP_IPV6 0x00001000 ++#define NDIS_HASH_TCP_IPV6_EX 0x00002000 ++ ++#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) ++#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 ++ ++#define ITAB_NUM 128 ++#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 ++extern u8 netvsc_hash_key[]; ++ ++struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ ++ struct ndis_obj_header hdr; ++ ++ /* Qualifies the rest of the information */ ++ u16 flag; ++ ++ /* The base CPU number to do receive processing. not used */ ++ u16 base_cpu_number; ++ ++ /* This describes the hash function and type being enabled */ ++ u32 hashinfo; ++ ++ /* The size of indirection table array */ ++ u16 indirect_tabsize; ++ ++ /* The offset of the indirection table from the beginning of this ++ * structure ++ */ ++ u32 indirect_taboffset; ++ ++ /* The size of the hash secret key */ ++ u16 hashkey_size; ++ ++ /* The offset of the secret key from the beginning of this structure */ ++ u32 kashkey_offset; ++ ++ u32 processor_masks_offset; ++ u32 num_processor_masks; ++ u32 processor_masks_entry_size; ++}; ++ + /* Fwd declaration */ + struct hv_netvsc_packet; + struct ndis_tcp_ip_checksum_info; +@@ -39,6 +129,8 @@ struct xferpage_packet { + + /* # of netvsc packets this xfer packet contains */ + u32 count; ++ ++ struct vmbus_channel *channel; + }; + + /* +@@ -54,6 +146,9 @@ struct hv_netvsc_packet { + bool is_data_pkt; + u16 vlan_tci; + ++ u16 q_idx; ++ struct vmbus_channel *channel; ++ + /* + * Valid only for receives when we break a xfer page packet + * into multiple netvsc packets +@@ -120,6 +215,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, + int netvsc_recv_callback(struct hv_device *device_obj, + struct hv_netvsc_packet *packet, + struct ndis_tcp_ip_checksum_info *csum_info); ++void netvsc_channel_cb(void *context); + int rndis_filter_open(struct hv_device *dev); + int rndis_filter_close(struct hv_device *dev); + int rndis_filter_device_add(struct hv_device *dev, +@@ -522,6 +618,8 @@ struct nvsp_message { + + #define NETVSC_PACKET_SIZE 2048 + ++#define VRSS_SEND_TAB_SIZE 16 ++ + /* Per netvsc channel-specific */ + struct netvsc_device { + struct hv_device *dev; +@@ -555,10 +653,20 @@ struct netvsc_device { + + struct net_device *ndev; + ++ struct vmbus_channel *chn_table[NR_CPUS]; ++ u32 send_table[VRSS_SEND_TAB_SIZE]; ++ u32 num_chn; ++ atomic_t queue_sends[NR_CPUS]; ++ + /* Holds rndis device info */ + void *extension; +- /* The recive buffer for this device */ ++ ++ int ring_size; ++ ++ /* The primary channel callback buffer */ + unsigned char cb_buffer[NETVSC_PACKET_SIZE]; ++ /* The sub channel callback buffer */ ++ unsigned char *sub_cb_buf; + }; + + /* NdisInitialize message */ +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index f7629ecefa84..e7e77f12bc38 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -422,6 +422,9 @@ int netvsc_device_remove(struct hv_device *device) + kfree(netvsc_packet); + } + ++ if (net_device->sub_cb_buf) ++ vfree(net_device->sub_cb_buf); ++ + kfree(net_device); + return 0; + } +@@ -461,7 +464,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + (nvsp_packet->hdr.msg_type == + NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) || + (nvsp_packet->hdr.msg_type == +- NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) { ++ NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) || ++ (nvsp_packet->hdr.msg_type == ++ NVSP_MSG5_TYPE_SUBCHANNEL)) { + /* Copy the response back */ + memcpy(&net_device->channel_init_pkt, nvsp_packet, + sizeof(struct nvsp_message)); +@@ -469,28 +474,37 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + } else if (nvsp_packet->hdr.msg_type == + NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) { + int num_outstanding_sends; ++ u16 q_idx = 0; ++ struct vmbus_channel *channel = device->channel; ++ int queue_sends; + + /* Get the send context */ + nvsc_packet = (struct hv_netvsc_packet *)(unsigned long) + packet->trans_id; + + /* Notify the layer above us */ +- if (nvsc_packet) ++ if (nvsc_packet) { ++ q_idx = nvsc_packet->q_idx; ++ channel = nvsc_packet->channel; + nvsc_packet->completion.send.send_completion( + nvsc_packet->completion.send. + send_completion_ctx); ++ } + + num_outstanding_sends = + atomic_dec_return(&net_device->num_outstanding_sends); ++ queue_sends = atomic_dec_return(&net_device-> ++ queue_sends[q_idx]); + + if (net_device->destroy && num_outstanding_sends == 0) + wake_up(&net_device->wait_drain); + +- if (netif_queue_stopped(ndev) && !net_device->start_remove && +- (hv_ringbuf_avail_percent(&device->channel->outbound) +- > RING_AVAIL_PERCENT_HIWATER || +- num_outstanding_sends < 1)) +- netif_wake_queue(ndev); ++ if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && ++ !net_device->start_remove && ++ (hv_ringbuf_avail_percent(&channel->outbound) > ++ RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) ++ netif_tx_wake_queue(netdev_get_tx_queue( ++ ndev, q_idx)); + } else { + netdev_err(ndev, "Unknown send completion packet type- " + "%d received!!\n", nvsp_packet->hdr.msg_type); +@@ -505,6 +519,7 @@ int netvsc_send(struct hv_device *device, + int ret = 0; + struct nvsp_message sendMessage; + struct net_device *ndev; ++ struct vmbus_channel *out_channel = NULL; + u64 req_id; + + net_device = get_outbound_net_device(device); +@@ -531,15 +546,20 @@ int netvsc_send(struct hv_device *device, + else + req_id = 0; + ++ out_channel = net_device->chn_table[packet->q_idx]; ++ if (out_channel == NULL) ++ out_channel = device->channel; ++ packet->channel = out_channel; ++ + if (packet->page_buf_cnt) { +- ret = vmbus_sendpacket_pagebuffer(device->channel, ++ ret = vmbus_sendpacket_pagebuffer(out_channel, + packet->page_buf, + packet->page_buf_cnt, + &sendMessage, + sizeof(struct nvsp_message), + req_id); + } else { +- ret = vmbus_sendpacket(device->channel, &sendMessage, ++ ret = vmbus_sendpacket(out_channel, &sendMessage, + sizeof(struct nvsp_message), + req_id, + VM_PKT_DATA_INBAND, +@@ -548,17 +568,24 @@ int netvsc_send(struct hv_device *device, + + if (ret == 0) { + atomic_inc(&net_device->num_outstanding_sends); +- if (hv_ringbuf_avail_percent(&device->channel->outbound) < ++ atomic_inc(&net_device->queue_sends[packet->q_idx]); ++ ++ if (hv_ringbuf_avail_percent(&out_channel->outbound) < + RING_AVAIL_PERCENT_LOWATER) { +- netif_stop_queue(ndev); ++ netif_tx_stop_queue(netdev_get_tx_queue( ++ ndev, packet->q_idx)); ++ + if (atomic_read(&net_device-> +- num_outstanding_sends) < 1) +- netif_wake_queue(ndev); ++ queue_sends[packet->q_idx]) < 1) ++ netif_tx_wake_queue(netdev_get_tx_queue( ++ ndev, packet->q_idx)); + } + } else if (ret == -EAGAIN) { +- netif_stop_queue(ndev); +- if (atomic_read(&net_device->num_outstanding_sends) < 1) { +- netif_wake_queue(ndev); ++ netif_tx_stop_queue(netdev_get_tx_queue( ++ ndev, packet->q_idx)); ++ if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) { ++ netif_tx_wake_queue(netdev_get_tx_queue( ++ ndev, packet->q_idx)); + ret = -ENOSPC; + } + } else { +@@ -570,6 +597,7 @@ int netvsc_send(struct hv_device *device, + } + + static void netvsc_send_recv_completion(struct hv_device *device, ++ struct vmbus_channel *channel, + struct netvsc_device *net_device, + u64 transaction_id, u32 status) + { +@@ -587,7 +615,7 @@ static void netvsc_send_recv_completion(struct hv_device *device, + + retry_send_cmplt: + /* Send the completion */ +- ret = vmbus_sendpacket(device->channel, &recvcompMessage, ++ ret = vmbus_sendpacket(channel, &recvcompMessage, + sizeof(struct nvsp_message), transaction_id, + VM_PKT_COMP, 0); + if (ret == 0) { +@@ -618,6 +646,7 @@ static void netvsc_receive_completion(void *context) + { + struct hv_netvsc_packet *packet = context; + struct hv_device *device = packet->device; ++ struct vmbus_channel *channel; + struct netvsc_device *net_device; + u64 transaction_id = 0; + bool fsend_receive_comp = false; +@@ -649,6 +678,7 @@ static void netvsc_receive_completion(void *context) + */ + if (packet->xfer_page_pkt->count == 0) { + fsend_receive_comp = true; ++ channel = packet->xfer_page_pkt->channel; + transaction_id = packet->completion.recv.recv_completion_tid; + status = packet->xfer_page_pkt->status; + list_add_tail(&packet->xfer_page_pkt->list_ent, +@@ -662,12 +692,13 @@ static void netvsc_receive_completion(void *context) + + /* Send a receive completion for the xfer page packet */ + if (fsend_receive_comp) +- netvsc_send_recv_completion(device, net_device, transaction_id, +- status); ++ netvsc_send_recv_completion(device, channel, net_device, ++ transaction_id, status); + + } + + static void netvsc_receive(struct netvsc_device *net_device, ++ struct vmbus_channel *channel, + struct hv_device *device, + struct vmpacket_descriptor *packet) + { +@@ -748,7 +779,7 @@ static void netvsc_receive(struct netvsc_device *net_device, + spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, + flags); + +- netvsc_send_recv_completion(device, net_device, ++ netvsc_send_recv_completion(device, channel, net_device, + vmxferpage_packet->d.trans_id, + NVSP_STAT_FAIL); + +@@ -759,6 +790,7 @@ static void netvsc_receive(struct netvsc_device *net_device, + xferpage_packet = (struct xferpage_packet *)listHead.next; + list_del(&xferpage_packet->list_ent); + xferpage_packet->status = NVSP_STAT_SUCCESS; ++ xferpage_packet->channel = channel; + + /* This is how much we can satisfy */ + xferpage_packet->count = count - 1; +@@ -800,10 +832,45 @@ static void netvsc_receive(struct netvsc_device *net_device, + + } + +-static void netvsc_channel_cb(void *context) ++ ++static void netvsc_send_table(struct hv_device *hdev, ++ struct vmpacket_descriptor *vmpkt) ++{ ++ struct netvsc_device *nvscdev; ++ struct net_device *ndev; ++ struct nvsp_message *nvmsg; ++ int i; ++ u32 count, *tab; ++ ++ nvscdev = get_outbound_net_device(hdev); ++ if (!nvscdev) ++ return; ++ ndev = nvscdev->ndev; ++ ++ nvmsg = (struct nvsp_message *)((unsigned long)vmpkt + ++ (vmpkt->offset8 << 3)); ++ ++ if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE) ++ return; ++ ++ count = nvmsg->msg.v5_msg.send_table.count; ++ if (count != VRSS_SEND_TAB_SIZE) { ++ netdev_err(ndev, "Received wrong send-table size:%u\n", count); ++ return; ++ } ++ ++ tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table + ++ nvmsg->msg.v5_msg.send_table.offset); ++ ++ for (i = 0; i < count; i++) ++ nvscdev->send_table[i] = tab[i]; ++} ++ ++void netvsc_channel_cb(void *context) + { + int ret; +- struct hv_device *device = context; ++ struct vmbus_channel *channel = (struct vmbus_channel *)context; ++ struct hv_device *device; + struct netvsc_device *net_device; + u32 bytes_recvd; + u64 request_id; +@@ -812,14 +879,19 @@ static void netvsc_channel_cb(void *context) + int bufferlen = NETVSC_PACKET_SIZE; + struct net_device *ndev; + ++ if (channel->primary_channel != NULL) ++ device = channel->primary_channel->device_obj; ++ else ++ device = channel->device_obj; ++ + net_device = get_inbound_net_device(device); + if (!net_device) + return; + ndev = net_device->ndev; +- buffer = net_device->cb_buffer; ++ buffer = get_per_channel_state(channel); + + do { +- ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen, ++ ret = vmbus_recvpacket_raw(channel, buffer, bufferlen, + &bytes_recvd, &request_id); + if (ret == 0) { + if (bytes_recvd > 0) { +@@ -831,8 +903,12 @@ static void netvsc_channel_cb(void *context) + break; + + case VM_PKT_DATA_USING_XFER_PAGES: +- netvsc_receive(net_device, +- device, desc); ++ netvsc_receive(net_device, channel, ++ device, desc); ++ break; ++ ++ case VM_PKT_DATA_INBAND: ++ netvsc_send_table(device, desc); + break; + + default: +@@ -893,6 +969,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) + goto cleanup; + } + ++ net_device->ring_size = ring_size; ++ + /* + * Coming into this function, struct net_device * is + * registered as the driver private data. +@@ -917,10 +995,12 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) + } + init_completion(&net_device->channel_init_wait); + ++ set_per_channel_state(device->channel, net_device->cb_buffer); ++ + /* Open the channel */ + ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, + ring_size * PAGE_SIZE, NULL, 0, +- netvsc_channel_cb, device); ++ netvsc_channel_cb, device->channel); + + if (ret != 0) { + netdev_err(ndev, "unable to open channel: %d\n", ret); +@@ -930,6 +1010,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) + /* Channel is opened */ + pr_info("hv_netvsc channel opened successfully\n"); + ++ net_device->chn_table[0] = device->channel; ++ + /* Connect with the NetVsp */ + ret = netvsc_connect_vsp(device); + if (ret != 0) { +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index ce6d870dd7ae..e486dbd33f61 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -101,7 +101,7 @@ static int netvsc_open(struct net_device *net) + return ret; + } + +- netif_start_queue(net); ++ netif_tx_start_all_queues(net); + + nvdev = hv_get_drvdata(device_obj); + rdev = nvdev->extension; +@@ -149,6 +149,88 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, + return ppi; + } + ++union sub_key { ++ u64 k; ++ struct { ++ u8 pad[3]; ++ u8 kb; ++ u32 ka; ++ }; ++}; ++ ++/* Toeplitz hash function ++ * data: network byte order ++ * return: host byte order ++ */ ++static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen) ++{ ++ union sub_key subk; ++ int k_next = 4; ++ u8 dt; ++ int i, j; ++ u32 ret = 0; ++ ++ subk.k = 0; ++ subk.ka = ntohl(*(u32 *)key); ++ ++ for (i = 0; i < dlen; i++) { ++ subk.kb = key[k_next]; ++ k_next = (k_next + 1) % klen; ++ dt = data[i]; ++ for (j = 0; j < 8; j++) { ++ if (dt & 0x80) ++ ret ^= subk.ka; ++ dt <<= 1; ++ subk.k <<= 1; ++ } ++ } ++ ++ return ret; ++} ++ ++static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) ++{ ++ struct iphdr *iphdr; ++ int data_len; ++ bool ret = false; ++ ++ if (eth_hdr(skb)->h_proto != htons(ETH_P_IP)) ++ return false; ++ ++ iphdr = ip_hdr(skb); ++ ++ if (iphdr->version == 4) { ++ if (iphdr->protocol == IPPROTO_TCP) ++ data_len = 12; ++ else ++ data_len = 8; ++ *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, ++ (u8 *)&iphdr->saddr, data_len); ++ ret = true; ++ } ++ ++ return ret; ++} ++ ++static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, ++ void *accel_priv, select_queue_fallback_t fallback) ++{ ++ struct net_device_context *net_device_ctx = netdev_priv(ndev); ++ struct hv_device *hdev = net_device_ctx->device_ctx; ++ struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev); ++ u32 hash; ++ u16 q_idx = 0; ++ ++ if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) ++ return 0; ++ ++ if (netvsc_set_hash(&hash, skb)) ++ q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % ++ ndev->real_num_tx_queues; ++ ++ return q_idx; ++} ++ + static void netvsc_xmit_completion(void *context) + { + struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; +@@ -334,6 +416,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + + packet->vlan_tci = skb->vlan_tci; + ++ packet->q_idx = skb_get_queue_mapping(skb); ++ + packet->is_data_pkt = true; + packet->total_data_buflen = skb->len; + +@@ -559,6 +643,10 @@ int netvsc_recv_callback(struct hv_device *device_obj, + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + packet->vlan_tci); + ++ skb_record_rx_queue(skb, packet->xfer_page_pkt->channel-> ++ offermsg.offer.sub_channel_index % ++ net->real_num_rx_queues); ++ + net->stats.rx_packets++; + net->stats.rx_bytes += packet->total_data_buflen; + +@@ -607,7 +695,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) + hv_set_drvdata(hdev, ndev); + device_info.ring_size = ring_size; + rndis_filter_device_add(hdev, &device_info); +- netif_wake_queue(ndev); ++ netif_tx_wake_all_queues(ndev); + + return 0; + } +@@ -653,6 +741,7 @@ static const struct net_device_ops device_ops = { + .ndo_change_mtu = netvsc_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = netvsc_set_mac_addr, ++ .ndo_select_queue = netvsc_select_queue, + }; + + /* +@@ -699,9 +788,11 @@ static int netvsc_probe(struct hv_device *dev, + struct net_device *net = NULL; + struct net_device_context *net_device_ctx; + struct netvsc_device_info device_info; ++ struct netvsc_device *nvdev; + int ret; + +- net = alloc_etherdev(sizeof(struct net_device_context)); ++ net = alloc_etherdev_mq(sizeof(struct net_device_context), ++ num_online_cpus()); + if (!net) + return -ENOMEM; + +@@ -734,6 +825,12 @@ static int netvsc_probe(struct hv_device *dev, + } + memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + ++ nvdev = hv_get_drvdata(dev); ++ netif_set_real_num_tx_queues(net, nvdev->num_chn); ++ netif_set_real_num_rx_queues(net, nvdev->num_chn); ++ dev_info(&dev->device, "real num tx,rx queues:%u, %u\n", ++ net->real_num_tx_queues, net->real_num_rx_queues); ++ + ret = register_netdev(net); + if (ret != 0) { + pr_err("Unable to register netdev.\n"); +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index 143a98caf618..d92cfbe43410 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -31,7 +31,7 @@ + #include "hyperv_net.h" + + +-#define RNDIS_EXT_LEN 100 ++#define RNDIS_EXT_LEN PAGE_SIZE + struct rndis_request { + struct list_head list_ent; + struct completion wait_event; +@@ -94,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev, + rndis_msg->ndis_msg_type = msg_type; + rndis_msg->msg_len = msg_len; + ++ request->pkt.q_idx = 0; ++ + /* + * Set the request id. This field is always after the rndis header for + * request/response packet types so we just used the SetRequest as a +@@ -509,6 +511,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, + query->info_buflen = 0; + query->dev_vc_handle = 0; + ++ if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) { ++ struct ndis_recv_scale_cap *cap; ++ ++ request->request_msg.msg_len += ++ sizeof(struct ndis_recv_scale_cap); ++ query->info_buflen = sizeof(struct ndis_recv_scale_cap); ++ cap = (struct ndis_recv_scale_cap *)((unsigned long)query + ++ query->info_buf_offset); ++ cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES; ++ cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2; ++ cap->hdr.size = sizeof(struct ndis_recv_scale_cap); ++ } ++ + ret = rndis_filter_send_request(dev, request); + if (ret != 0) + goto cleanup; +@@ -695,6 +710,89 @@ cleanup: + return ret; + } + ++u8 netvsc_hash_key[HASH_KEYLEN] = { ++ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, ++ 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, ++ 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, ++ 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, ++ 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa ++}; ++ ++int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) ++{ ++ struct net_device *ndev = rdev->net_dev->ndev; ++ struct rndis_request *request; ++ struct rndis_set_request *set; ++ struct rndis_set_complete *set_complete; ++ u32 extlen = sizeof(struct ndis_recv_scale_param) + ++ 4*ITAB_NUM + HASH_KEYLEN; ++ struct ndis_recv_scale_param *rssp; ++ u32 *itab; ++ u8 *keyp; ++ int i, t, ret; ++ ++ request = get_rndis_request( ++ rdev, RNDIS_MSG_SET, ++ RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); ++ if (!request) ++ return -ENOMEM; ++ ++ set = &request->request_msg.msg.set_req; ++ set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS; ++ set->info_buflen = extlen; ++ set->info_buf_offset = sizeof(struct rndis_set_request); ++ set->dev_vc_handle = 0; ++ ++ rssp = (struct ndis_recv_scale_param *)(set + 1); ++ rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS; ++ rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2; ++ rssp->hdr.size = sizeof(struct ndis_recv_scale_param); ++ rssp->flag = 0; ++ rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 | ++ NDIS_HASH_TCP_IPV4; ++ rssp->indirect_tabsize = 4*ITAB_NUM; ++ rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param); ++ rssp->hashkey_size = HASH_KEYLEN; ++ rssp->kashkey_offset = rssp->indirect_taboffset + ++ rssp->indirect_tabsize; ++ ++ /* Set indirection table entries */ ++ itab = (u32 *)(rssp + 1); ++ for (i = 0; i < ITAB_NUM; i++) ++ itab[i] = i % num_queue; ++ ++ /* Set hask key values */ ++ keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); ++ for (i = 0; i < HASH_KEYLEN; i++) ++ keyp[i] = netvsc_hash_key[i]; ++ ++ ++ ret = rndis_filter_send_request(rdev, request); ++ if (ret != 0) ++ goto cleanup; ++ ++ t = wait_for_completion_timeout(&request->wait_event, 5*HZ); ++ if (t == 0) { ++ netdev_err(ndev, "timeout before we got a set response...\n"); ++ /* can't put_rndis_request, since we may still receive a ++ * send-completion. ++ */ ++ return -ETIMEDOUT; ++ } else { ++ set_complete = &request->response_msg.msg.set_complete; ++ if (set_complete->status != RNDIS_STATUS_SUCCESS) { ++ netdev_err(ndev, "Fail to set RSS parameters:0x%x\n", ++ set_complete->status); ++ ret = -EINVAL; ++ } ++ } ++ ++cleanup: ++ put_rndis_request(rdev, request); ++ return ret; ++} ++ ++ + static int rndis_filter_query_device_link_status(struct rndis_device *dev) + { + u32 size = sizeof(u32); +@@ -886,6 +984,28 @@ static int rndis_filter_close_device(struct rndis_device *dev) + return ret; + } + ++static void netvsc_sc_open(struct vmbus_channel *new_sc) ++{ ++ struct netvsc_device *nvscdev; ++ u16 chn_index = new_sc->offermsg.offer.sub_channel_index; ++ int ret; ++ ++ nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj); ++ ++ if (chn_index >= nvscdev->num_chn) ++ return; ++ ++ set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) * ++ NETVSC_PACKET_SIZE); ++ ++ ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, ++ nvscdev->ring_size * PAGE_SIZE, NULL, 0, ++ netvsc_channel_cb, new_sc); ++ ++ if (ret == 0) ++ nvscdev->chn_table[chn_index] = new_sc; ++} ++ + int rndis_filter_device_add(struct hv_device *dev, + void *additional_info) + { +@@ -894,6 +1014,10 @@ int rndis_filter_device_add(struct hv_device *dev, + struct rndis_device *rndis_device; + struct netvsc_device_info *device_info = additional_info; + struct ndis_offload_params offloads; ++ struct nvsp_message *init_packet; ++ int t; ++ struct ndis_recv_scale_cap rsscap; ++ u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); + + rndis_device = get_rndis_device(); + if (!rndis_device) +@@ -913,6 +1037,7 @@ int rndis_filter_device_add(struct hv_device *dev, + + /* Initialize the rndis device */ + net_device = hv_get_drvdata(dev); ++ net_device->num_chn = 1; + + net_device->extension = rndis_device; + rndis_device->net_dev = net_device; +@@ -952,7 +1077,6 @@ int rndis_filter_device_add(struct hv_device *dev, + if (ret) + goto err_dev_remv; + +- + rndis_filter_query_device_link_status(rndis_device); + + device_info->link_state = rndis_device->link_state; +@@ -961,7 +1085,66 @@ int rndis_filter_device_add(struct hv_device *dev, + rndis_device->hw_mac_adr, + device_info->link_state ? "down" : "up"); + +- return ret; ++ if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) ++ return 0; ++ ++ /* vRSS setup */ ++ memset(&rsscap, 0, rsscap_size); ++ ret = rndis_filter_query_device(rndis_device, ++ OID_GEN_RECEIVE_SCALE_CAPABILITIES, ++ &rsscap, &rsscap_size); ++ if (ret || rsscap.num_recv_que < 2) ++ goto out; ++ ++ net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ? ++ num_online_cpus() : rsscap.num_recv_que; ++ if (net_device->num_chn == 1) ++ goto out; ++ ++ net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) * ++ NETVSC_PACKET_SIZE); ++ if (!net_device->sub_cb_buf) { ++ net_device->num_chn = 1; ++ dev_info(&dev->device, "No memory for subchannels.\n"); ++ goto out; ++ } ++ ++ vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); ++ ++ init_packet = &net_device->channel_init_pkt; ++ memset(init_packet, 0, sizeof(struct nvsp_message)); ++ init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; ++ init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE; ++ init_packet->msg.v5_msg.subchn_req.num_subchannels = ++ net_device->num_chn - 1; ++ ret = vmbus_sendpacket(dev->channel, init_packet, ++ sizeof(struct nvsp_message), ++ (unsigned long)init_packet, ++ VM_PKT_DATA_INBAND, ++ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); ++ if (ret) ++ goto out; ++ t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); ++ if (t == 0) { ++ ret = -ETIMEDOUT; ++ goto out; ++ } ++ if (init_packet->msg.v5_msg.subchn_comp.status != ++ NVSP_STAT_SUCCESS) { ++ ret = -ENODEV; ++ goto out; ++ } ++ net_device->num_chn = 1 + ++ init_packet->msg.v5_msg.subchn_comp.num_subchannels; ++ ++ vmbus_are_subchannels_present(dev->channel); ++ ++ ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn); ++ ++out: ++ if (ret) ++ net_device->num_chn = 1; ++ return 0; /* return 0 because primary channel can be used alone */ + + err_dev_remv: + rndis_filter_device_remove(dev); +-- +2.4.3 + diff --git a/src/patches/linux/0017-hyperv-Remove-recv_pkt_list-and-lock.patch b/src/patches/linux/0017-hyperv-Remove-recv_pkt_list-and-lock.patch new file mode 100644 index 0000000000..31465a1ba4 --- /dev/null +++ b/src/patches/linux/0017-hyperv-Remove-recv_pkt_list-and-lock.patch @@ -0,0 +1,384 @@ +From d6bf5567c1438b4f3b1bcff1a1525ddb1754df19 Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Mon, 21 Apr 2014 14:54:43 -0700 +Subject: [PATCH 17/25] hyperv: Remove recv_pkt_list and lock + +Removed recv_pkt_list and lock, and updated related code, so that +the locking overhead is reduced especially when multiple channels +are in use. + +The recv_pkt_list isn't actually necessary because the packets are +processed sequentially in each channel. It has been replaced by a +local variable, and the related lock for this list is also removed. +The is_data_pkt field is not used in receive path, so its assignment +is cleaned up. + +Signed-off-by: Haiyang Zhang +Reviewed-by: K. Y. Srinivasan +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 33 -------- + drivers/net/hyperv/netvsc.c | 174 +++----------------------------------- + drivers/net/hyperv/netvsc_drv.c | 2 +- + drivers/net/hyperv/rndis_filter.c | 2 - + 4 files changed, 13 insertions(+), 198 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 57eb3f906d64..a1af0f7711e2 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -119,27 +119,14 @@ struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ + }; + + /* Fwd declaration */ +-struct hv_netvsc_packet; + struct ndis_tcp_ip_checksum_info; + +-/* Represent the xfer page packet which contains 1 or more netvsc packet */ +-struct xferpage_packet { +- struct list_head list_ent; +- u32 status; +- +- /* # of netvsc packets this xfer packet contains */ +- u32 count; +- +- struct vmbus_channel *channel; +-}; +- + /* + * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame + * within the RNDIS + */ + struct hv_netvsc_packet { + /* Bookkeeping stuff */ +- struct list_head list_ent; + u32 status; + + struct hv_device *device; +@@ -149,19 +136,8 @@ struct hv_netvsc_packet { + u16 q_idx; + struct vmbus_channel *channel; + +- /* +- * Valid only for receives when we break a xfer page packet +- * into multiple netvsc packets +- */ +- struct xferpage_packet *xfer_page_pkt; +- + union { + struct { +- u64 recv_completion_tid; +- void *recv_completion_ctx; +- void (*recv_completion)(void *context); +- } recv; +- struct { + u64 send_completion_tid; + void *send_completion_ctx; + void (*send_completion)(void *context); +@@ -613,9 +589,6 @@ struct nvsp_message { + + #define NETVSC_RECEIVE_BUFFER_ID 0xcafe + +-/* Preallocated receive packets */ +-#define NETVSC_RECEIVE_PACKETLIST_COUNT 256 +- + #define NETVSC_PACKET_SIZE 2048 + + #define VRSS_SEND_TAB_SIZE 16 +@@ -630,12 +603,6 @@ struct netvsc_device { + wait_queue_head_t wait_drain; + bool start_remove; + bool destroy; +- /* +- * List of free preallocated hv_netvsc_packet to represent receive +- * packet +- */ +- struct list_head recv_pkt_list; +- spinlock_t recv_pkt_list_lock; + + /* Receive buffer allocated by us but manages by NetVSP */ + void *recv_buf; +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index e7e77f12bc38..b10334773b32 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -387,7 +387,6 @@ static void netvsc_disconnect_vsp(struct netvsc_device *net_device) + int netvsc_device_remove(struct hv_device *device) + { + struct netvsc_device *net_device; +- struct hv_netvsc_packet *netvsc_packet, *pos; + unsigned long flags; + + net_device = hv_get_drvdata(device); +@@ -416,12 +415,6 @@ int netvsc_device_remove(struct hv_device *device) + vmbus_close(device->channel); + + /* Release all resources */ +- list_for_each_entry_safe(netvsc_packet, pos, +- &net_device->recv_pkt_list, list_ent) { +- list_del(&netvsc_packet->list_ent); +- kfree(netvsc_packet); +- } +- + if (net_device->sub_cb_buf) + vfree(net_device->sub_cb_buf); + +@@ -641,62 +634,6 @@ retry_send_cmplt: + } + } + +-/* Send a receive completion packet to RNDIS device (ie NetVsp) */ +-static void netvsc_receive_completion(void *context) +-{ +- struct hv_netvsc_packet *packet = context; +- struct hv_device *device = packet->device; +- struct vmbus_channel *channel; +- struct netvsc_device *net_device; +- u64 transaction_id = 0; +- bool fsend_receive_comp = false; +- unsigned long flags; +- struct net_device *ndev; +- u32 status = NVSP_STAT_NONE; +- +- /* +- * Even though it seems logical to do a GetOutboundNetDevice() here to +- * send out receive completion, we are using GetInboundNetDevice() +- * since we may have disable outbound traffic already. +- */ +- net_device = get_inbound_net_device(device); +- if (!net_device) +- return; +- ndev = net_device->ndev; +- +- /* Overloading use of the lock. */ +- spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); +- +- if (packet->status != NVSP_STAT_SUCCESS) +- packet->xfer_page_pkt->status = NVSP_STAT_FAIL; +- +- packet->xfer_page_pkt->count--; +- +- /* +- * Last one in the line that represent 1 xfer page packet. +- * Return the xfer page packet itself to the freelist +- */ +- if (packet->xfer_page_pkt->count == 0) { +- fsend_receive_comp = true; +- channel = packet->xfer_page_pkt->channel; +- transaction_id = packet->completion.recv.recv_completion_tid; +- status = packet->xfer_page_pkt->status; +- list_add_tail(&packet->xfer_page_pkt->list_ent, +- &net_device->recv_pkt_list); +- +- } +- +- /* Put the packet back */ +- list_add_tail(&packet->list_ent, &net_device->recv_pkt_list); +- spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags); +- +- /* Send a receive completion for the xfer page packet */ +- if (fsend_receive_comp) +- netvsc_send_recv_completion(device, channel, net_device, +- transaction_id, status); +- +-} +- + static void netvsc_receive(struct netvsc_device *net_device, + struct vmbus_channel *channel, + struct hv_device *device, +@@ -704,16 +641,13 @@ static void netvsc_receive(struct netvsc_device *net_device, + { + struct vmtransfer_page_packet_header *vmxferpage_packet; + struct nvsp_message *nvsp_packet; +- struct hv_netvsc_packet *netvsc_packet = NULL; +- /* struct netvsc_driver *netvscDriver; */ +- struct xferpage_packet *xferpage_packet = NULL; ++ struct hv_netvsc_packet nv_pkt; ++ struct hv_netvsc_packet *netvsc_packet = &nv_pkt; ++ u32 status = NVSP_STAT_SUCCESS; + int i; + int count = 0; +- unsigned long flags; + struct net_device *ndev; + +- LIST_HEAD(listHead); +- + ndev = net_device->ndev; + + /* +@@ -746,78 +680,14 @@ static void netvsc_receive(struct netvsc_device *net_device, + return; + } + +- /* +- * Grab free packets (range count + 1) to represent this xfer +- * page packet. +1 to represent the xfer page packet itself. +- * We grab it here so that we know exactly how many we can +- * fulfil +- */ +- spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); +- while (!list_empty(&net_device->recv_pkt_list)) { +- list_move_tail(net_device->recv_pkt_list.next, &listHead); +- if (++count == vmxferpage_packet->range_cnt + 1) +- break; +- } +- spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags); +- +- /* +- * We need at least 2 netvsc pkts (1 to represent the xfer +- * page and at least 1 for the range) i.e. we can handled +- * some of the xfer page packet ranges... +- */ +- if (count < 2) { +- netdev_err(ndev, "Got only %d netvsc pkt...needed " +- "%d pkts. Dropping this xfer page packet completely!\n", +- count, vmxferpage_packet->range_cnt + 1); +- +- /* Return it to the freelist */ +- spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags); +- for (i = count; i != 0; i--) { +- list_move_tail(listHead.next, +- &net_device->recv_pkt_list); +- } +- spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, +- flags); +- +- netvsc_send_recv_completion(device, channel, net_device, +- vmxferpage_packet->d.trans_id, +- NVSP_STAT_FAIL); +- +- return; +- } +- +- /* Remove the 1st packet to represent the xfer page packet itself */ +- xferpage_packet = (struct xferpage_packet *)listHead.next; +- list_del(&xferpage_packet->list_ent); +- xferpage_packet->status = NVSP_STAT_SUCCESS; +- xferpage_packet->channel = channel; +- +- /* This is how much we can satisfy */ +- xferpage_packet->count = count - 1; +- +- if (xferpage_packet->count != vmxferpage_packet->range_cnt) { +- netdev_err(ndev, "Needed %d netvsc pkts to satisfy " +- "this xfer page...got %d\n", +- vmxferpage_packet->range_cnt, xferpage_packet->count); +- } ++ count = vmxferpage_packet->range_cnt; ++ netvsc_packet->device = device; ++ netvsc_packet->channel = channel; + + /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ +- for (i = 0; i < (count - 1); i++) { +- netvsc_packet = (struct hv_netvsc_packet *)listHead.next; +- list_del(&netvsc_packet->list_ent); +- ++ for (i = 0; i < count; i++) { + /* Initialize the netvsc packet */ + netvsc_packet->status = NVSP_STAT_SUCCESS; +- netvsc_packet->xfer_page_pkt = xferpage_packet; +- netvsc_packet->completion.recv.recv_completion = +- netvsc_receive_completion; +- netvsc_packet->completion.recv.recv_completion_ctx = +- netvsc_packet; +- netvsc_packet->device = device; +- /* Save this so that we can send it back */ +- netvsc_packet->completion.recv.recv_completion_tid = +- vmxferpage_packet->d.trans_id; +- + netvsc_packet->data = (void *)((unsigned long)net_device-> + recv_buf + vmxferpage_packet->ranges[i].byte_offset); + netvsc_packet->total_data_buflen = +@@ -826,10 +696,12 @@ static void netvsc_receive(struct netvsc_device *net_device, + /* Pass it to the upper layer */ + rndis_filter_receive(device, netvsc_packet); + +- netvsc_receive_completion(netvsc_packet-> +- completion.recv.recv_completion_ctx); ++ if (netvsc_packet->status != NVSP_STAT_SUCCESS) ++ status = NVSP_STAT_FAIL; + } + ++ netvsc_send_recv_completion(device, channel, net_device, ++ vmxferpage_packet->d.trans_id, status); + } + + +@@ -956,11 +828,9 @@ void netvsc_channel_cb(void *context) + int netvsc_device_add(struct hv_device *device, void *additional_info) + { + int ret = 0; +- int i; + int ring_size = + ((struct netvsc_device_info *)additional_info)->ring_size; + struct netvsc_device *net_device; +- struct hv_netvsc_packet *packet, *pos; + struct net_device *ndev; + + net_device = alloc_net_device(device); +@@ -981,18 +851,6 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) + ndev = net_device->ndev; + + /* Initialize the NetVSC channel extension */ +- spin_lock_init(&net_device->recv_pkt_list_lock); +- +- INIT_LIST_HEAD(&net_device->recv_pkt_list); +- +- for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) { +- packet = kzalloc(sizeof(struct hv_netvsc_packet), GFP_KERNEL); +- if (!packet) +- break; +- +- list_add_tail(&packet->list_ent, +- &net_device->recv_pkt_list); +- } + init_completion(&net_device->channel_init_wait); + + set_per_channel_state(device->channel, net_device->cb_buffer); +@@ -1028,16 +886,8 @@ close: + + cleanup: + +- if (net_device) { +- list_for_each_entry_safe(packet, pos, +- &net_device->recv_pkt_list, +- list_ent) { +- list_del(&packet->list_ent); +- kfree(packet); +- } +- ++ if (net_device) + kfree(net_device); +- } + + return ret; + } +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index e486dbd33f61..6cc4db064fec 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -643,7 +643,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + packet->vlan_tci); + +- skb_record_rx_queue(skb, packet->xfer_page_pkt->channel-> ++ skb_record_rx_queue(skb, packet->channel-> + offermsg.offer.sub_channel_index % + net->real_num_rx_queues); + +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index d92cfbe43410..48f5a0fbd674 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -401,8 +401,6 @@ static void rndis_filter_receive_data(struct rndis_device *dev, + pkt->total_data_buflen = rndis_pkt->data_len; + pkt->data = (void *)((unsigned long)pkt->data + data_offset); + +- pkt->is_data_pkt = true; +- + vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); + if (vlan) { + pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid | +-- +2.4.3 + diff --git a/src/patches/linux/0018-hyperv-Simplify-the-send_completion-variables.patch b/src/patches/linux/0018-hyperv-Simplify-the-send_completion-variables.patch new file mode 100644 index 0000000000..82b742baa1 --- /dev/null +++ b/src/patches/linux/0018-hyperv-Simplify-the-send_completion-variables.patch @@ -0,0 +1,105 @@ +From d6eeeb452c1e6e7cf14f4a581a8f2ea2c50ec17a Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Mon, 21 Apr 2014 14:54:44 -0700 +Subject: [PATCH 18/25] hyperv: Simplify the send_completion variables + +The union contains only one member now, so we use the variables in it directly. + +Signed-off-by: Haiyang Zhang +Reviewed-by: K. Y. Srinivasan +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 10 +++------- + drivers/net/hyperv/netvsc.c | 7 +++---- + drivers/net/hyperv/netvsc_drv.c | 8 ++++---- + drivers/net/hyperv/rndis_filter.c | 2 +- + 4 files changed, 11 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index a1af0f7711e2..d1f7826aa75f 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -136,13 +136,9 @@ struct hv_netvsc_packet { + u16 q_idx; + struct vmbus_channel *channel; + +- union { +- struct { +- u64 send_completion_tid; +- void *send_completion_ctx; +- void (*send_completion)(void *context); +- } send; +- } completion; ++ u64 send_completion_tid; ++ void *send_completion_ctx; ++ void (*send_completion)(void *context); + + /* This points to the memory after page_buf */ + struct rndis_message *rndis_msg; +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index b10334773b32..bbee44635035 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -479,9 +479,8 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + if (nvsc_packet) { + q_idx = nvsc_packet->q_idx; + channel = nvsc_packet->channel; +- nvsc_packet->completion.send.send_completion( +- nvsc_packet->completion.send. +- send_completion_ctx); ++ nvsc_packet->send_completion(nvsc_packet-> ++ send_completion_ctx); + } + + num_outstanding_sends = +@@ -534,7 +533,7 @@ int netvsc_send(struct hv_device *device, + 0xFFFFFFFF; + sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; + +- if (packet->completion.send.send_completion) ++ if (packet->send_completion) + req_id = (ulong)packet; + else + req_id = 0; +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 6cc4db064fec..f3d3525ed42d 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -235,7 +235,7 @@ static void netvsc_xmit_completion(void *context) + { + struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; + struct sk_buff *skb = (struct sk_buff *) +- (unsigned long)packet->completion.send.send_completion_tid; ++ (unsigned long)packet->send_completion_tid; + + kfree(packet); + +@@ -426,9 +426,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + (num_data_pgs * sizeof(struct hv_page_buffer))); + + /* Set the completion routine */ +- packet->completion.send.send_completion = netvsc_xmit_completion; +- packet->completion.send.send_completion_ctx = packet; +- packet->completion.send.send_completion_tid = (unsigned long)skb; ++ packet->send_completion = netvsc_xmit_completion; ++ packet->send_completion_ctx = packet; ++ packet->send_completion_tid = (unsigned long)skb; + + isvlan = packet->vlan_tci & VLAN_TAG_PRESENT; + +diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c +index 48f5a0fbd674..99c527adae5b 100644 +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -236,7 +236,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, + packet->page_buf[0].len; + } + +- packet->completion.send.send_completion = NULL; ++ packet->send_completion = NULL; + + ret = netvsc_send(dev->net_dev->dev, packet); + return ret; +-- +2.4.3 + diff --git a/src/patches/linux/0019-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch b/src/patches/linux/0019-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch new file mode 100644 index 0000000000..3940865f92 --- /dev/null +++ b/src/patches/linux/0019-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch @@ -0,0 +1,407 @@ +From 4685e50349d5dc5fe485c898ca3ce539e93a0118 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Wed, 30 Apr 2014 10:14:31 -0700 +Subject: [PATCH 19/25] hyperv: Enable sendbuf mechanism on the send path + +We send packets using a copy-free mechanism (this is the Guest to Host transport +via VMBUS). While this is obviously optimal for large packets, +it may not be optimal for small packets. Hyper-V host supports +a second mechanism for sending packets that is "copy based". We implement that +mechanism in this patch. + +In this version of the patch I have addressed a comment from David Miller. + +With this patch (and all of the other offload and VRSS patches), we are now able +to almost saturate a 10G interface between Linux VMs on Hyper-V +on different hosts - close to 9 Gbps as measured via iperf. + +Signed-off-by: K. Y. Srinivasan +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 14 +++ + drivers/net/hyperv/netvsc.c | 226 ++++++++++++++++++++++++++++++++++++++-- + drivers/net/hyperv/netvsc_drv.c | 3 +- + 3 files changed, 234 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index d1f7826aa75f..4b7df5a5c966 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -140,6 +140,8 @@ struct hv_netvsc_packet { + void *send_completion_ctx; + void (*send_completion)(void *context); + ++ u32 send_buf_index; ++ + /* This points to the memory after page_buf */ + struct rndis_message *rndis_msg; + +@@ -582,6 +584,9 @@ struct nvsp_message { + + #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ + #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ ++#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024) /* 1MB */ ++#define NETVSC_INVALID_INDEX -1 ++ + + #define NETVSC_RECEIVE_BUFFER_ID 0xcafe + +@@ -607,6 +612,15 @@ struct netvsc_device { + u32 recv_section_cnt; + struct nvsp_1_receive_buffer_section *recv_section; + ++ /* Send buffer allocated by us */ ++ void *send_buf; ++ u32 send_buf_size; ++ u32 send_buf_gpadl_handle; ++ u32 send_section_cnt; ++ u32 send_section_size; ++ unsigned long *send_section_map; ++ int map_words; ++ + /* Used for NetVSP initialization protocol */ + struct completion channel_init_wait; + struct nvsp_message channel_init_pkt; +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index bbee44635035..c041f63a6d30 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + #include "hyperv_net.h" + +@@ -80,7 +81,7 @@ get_in_err: + } + + +-static int netvsc_destroy_recv_buf(struct netvsc_device *net_device) ++static int netvsc_destroy_buf(struct netvsc_device *net_device) + { + struct nvsp_message *revoke_packet; + int ret = 0; +@@ -146,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device) + net_device->recv_section = NULL; + } + ++ /* Deal with the send buffer we may have setup. ++ * If we got a send section size, it means we received a ++ * SendsendBufferComplete msg (ie sent ++ * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need ++ * to send a revoke msg here ++ */ ++ if (net_device->send_section_size) { ++ /* Send the revoke receive buffer */ ++ revoke_packet = &net_device->revoke_packet; ++ memset(revoke_packet, 0, sizeof(struct nvsp_message)); ++ ++ revoke_packet->hdr.msg_type = ++ NVSP_MSG1_TYPE_REVOKE_SEND_BUF; ++ revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0; ++ ++ ret = vmbus_sendpacket(net_device->dev->channel, ++ revoke_packet, ++ sizeof(struct nvsp_message), ++ (unsigned long)revoke_packet, ++ VM_PKT_DATA_INBAND, 0); ++ /* If we failed here, we might as well return and ++ * have a leak rather than continue and a bugchk ++ */ ++ if (ret != 0) { ++ netdev_err(ndev, "unable to send " ++ "revoke send buffer to netvsp\n"); ++ return ret; ++ } ++ } ++ /* Teardown the gpadl on the vsp end */ ++ if (net_device->send_buf_gpadl_handle) { ++ ret = vmbus_teardown_gpadl(net_device->dev->channel, ++ net_device->send_buf_gpadl_handle); ++ ++ /* If we failed here, we might as well return and have a leak ++ * rather than continue and a bugchk ++ */ ++ if (ret != 0) { ++ netdev_err(ndev, ++ "unable to teardown send buffer's gpadl\n"); ++ return ret; ++ } ++ net_device->recv_buf_gpadl_handle = 0; ++ } ++ if (net_device->send_buf) { ++ /* Free up the receive buffer */ ++ free_pages((unsigned long)net_device->send_buf, ++ get_order(net_device->send_buf_size)); ++ net_device->send_buf = NULL; ++ } ++ kfree(net_device->send_section_map); ++ + return ret; + } + +-static int netvsc_init_recv_buf(struct hv_device *device) ++static int netvsc_init_buf(struct hv_device *device) + { + int ret = 0; + int t; +@@ -248,10 +301,90 @@ static int netvsc_init_recv_buf(struct hv_device *device) + goto cleanup; + } + ++ /* Now setup the send buffer. ++ */ ++ net_device->send_buf = ++ (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, ++ get_order(net_device->send_buf_size)); ++ if (!net_device->send_buf) { ++ netdev_err(ndev, "unable to allocate send " ++ "buffer of size %d\n", net_device->send_buf_size); ++ ret = -ENOMEM; ++ goto cleanup; ++ } ++ ++ /* Establish the gpadl handle for this buffer on this ++ * channel. Note: This call uses the vmbus connection rather ++ * than the channel to establish the gpadl handle. ++ */ ++ ret = vmbus_establish_gpadl(device->channel, net_device->send_buf, ++ net_device->send_buf_size, ++ &net_device->send_buf_gpadl_handle); ++ if (ret != 0) { ++ netdev_err(ndev, ++ "unable to establish send buffer's gpadl\n"); ++ goto cleanup; ++ } ++ ++ /* Notify the NetVsp of the gpadl handle */ ++ init_packet = &net_device->channel_init_pkt; ++ memset(init_packet, 0, sizeof(struct nvsp_message)); ++ init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF; ++ init_packet->msg.v1_msg.send_recv_buf.gpadl_handle = ++ net_device->send_buf_gpadl_handle; ++ init_packet->msg.v1_msg.send_recv_buf.id = 0; ++ ++ /* Send the gpadl notification request */ ++ ret = vmbus_sendpacket(device->channel, init_packet, ++ sizeof(struct nvsp_message), ++ (unsigned long)init_packet, ++ VM_PKT_DATA_INBAND, ++ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); ++ if (ret != 0) { ++ netdev_err(ndev, ++ "unable to send send buffer's gpadl to netvsp\n"); ++ goto cleanup; ++ } ++ ++ t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); ++ BUG_ON(t == 0); ++ ++ /* Check the response */ ++ if (init_packet->msg.v1_msg. ++ send_send_buf_complete.status != NVSP_STAT_SUCCESS) { ++ netdev_err(ndev, "Unable to complete send buffer " ++ "initialization with NetVsp - status %d\n", ++ init_packet->msg.v1_msg. ++ send_recv_buf_complete.status); ++ ret = -EINVAL; ++ goto cleanup; ++ } ++ ++ /* Parse the response */ ++ net_device->send_section_size = init_packet->msg. ++ v1_msg.send_send_buf_complete.section_size; ++ ++ /* Section count is simply the size divided by the section size. ++ */ ++ net_device->send_section_cnt = ++ net_device->send_buf_size/net_device->send_section_size; ++ ++ dev_info(&device->device, "Send section size: %d, Section count:%d\n", ++ net_device->send_section_size, net_device->send_section_cnt); ++ ++ /* Setup state for managing the send buffer. */ ++ net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, ++ BITS_PER_LONG); ++ ++ net_device->send_section_map = ++ kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); ++ if (net_device->send_section_map == NULL) ++ goto cleanup; ++ + goto exit; + + cleanup: +- netvsc_destroy_recv_buf(net_device); ++ netvsc_destroy_buf(net_device); + + exit: + return ret; +@@ -369,8 +502,9 @@ static int netvsc_connect_vsp(struct hv_device *device) + net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; + else + net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; ++ net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE; + +- ret = netvsc_init_recv_buf(device); ++ ret = netvsc_init_buf(device); + + cleanup: + return ret; +@@ -378,7 +512,7 @@ cleanup: + + static void netvsc_disconnect_vsp(struct netvsc_device *net_device) + { +- netvsc_destroy_recv_buf(net_device); ++ netvsc_destroy_buf(net_device); + } + + /* +@@ -440,6 +574,12 @@ static inline u32 hv_ringbuf_avail_percent( + return avail_write * 100 / ring_info->ring_datasize; + } + ++static inline void netvsc_free_send_slot(struct netvsc_device *net_device, ++ u32 index) ++{ ++ sync_change_bit(index, net_device->send_section_map); ++} ++ + static void netvsc_send_completion(struct netvsc_device *net_device, + struct hv_device *device, + struct vmpacket_descriptor *packet) +@@ -447,6 +587,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + struct nvsp_message *nvsp_packet; + struct hv_netvsc_packet *nvsc_packet; + struct net_device *ndev; ++ u32 send_index; + + ndev = net_device->ndev; + +@@ -477,6 +618,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + + /* Notify the layer above us */ + if (nvsc_packet) { ++ send_index = nvsc_packet->send_buf_index; ++ if (send_index != NETVSC_INVALID_INDEX) ++ netvsc_free_send_slot(net_device, send_index); + q_idx = nvsc_packet->q_idx; + channel = nvsc_packet->channel; + nvsc_packet->send_completion(nvsc_packet-> +@@ -504,6 +648,52 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + + } + ++static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) ++{ ++ unsigned long index; ++ u32 max_words = net_device->map_words; ++ unsigned long *map_addr = (unsigned long *)net_device->send_section_map; ++ u32 section_cnt = net_device->send_section_cnt; ++ int ret_val = NETVSC_INVALID_INDEX; ++ int i; ++ int prev_val; ++ ++ for (i = 0; i < max_words; i++) { ++ if (!~(map_addr[i])) ++ continue; ++ index = ffz(map_addr[i]); ++ prev_val = sync_test_and_set_bit(index, &map_addr[i]); ++ if (prev_val) ++ continue; ++ if ((index + (i * BITS_PER_LONG)) >= section_cnt) ++ break; ++ ret_val = (index + (i * BITS_PER_LONG)); ++ break; ++ } ++ return ret_val; ++} ++ ++u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, ++ unsigned int section_index, ++ struct hv_netvsc_packet *packet) ++{ ++ char *start = net_device->send_buf; ++ char *dest = (start + (section_index * net_device->send_section_size)); ++ int i; ++ u32 msg_size = 0; ++ ++ for (i = 0; i < packet->page_buf_cnt; i++) { ++ char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT); ++ u32 offset = packet->page_buf[i].offset; ++ u32 len = packet->page_buf[i].len; ++ ++ memcpy(dest, (src + offset), len); ++ msg_size += len; ++ dest += len; ++ } ++ return msg_size; ++} ++ + int netvsc_send(struct hv_device *device, + struct hv_netvsc_packet *packet) + { +@@ -513,6 +703,10 @@ int netvsc_send(struct hv_device *device, + struct net_device *ndev; + struct vmbus_channel *out_channel = NULL; + u64 req_id; ++ unsigned int section_index = NETVSC_INVALID_INDEX; ++ u32 msg_size = 0; ++ struct sk_buff *skb; ++ + + net_device = get_outbound_net_device(device); + if (!net_device) +@@ -528,10 +722,26 @@ int netvsc_send(struct hv_device *device, + sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1; + } + +- /* Not using send buffer section */ ++ /* Attempt to send via sendbuf */ ++ if (packet->total_data_buflen < net_device->send_section_size) { ++ section_index = netvsc_get_next_send_section(net_device); ++ if (section_index != NETVSC_INVALID_INDEX) { ++ msg_size = netvsc_copy_to_send_buf(net_device, ++ section_index, ++ packet); ++ skb = (struct sk_buff *) ++ (unsigned long)packet->send_completion_tid; ++ if (skb) ++ dev_kfree_skb_any(skb); ++ packet->page_buf_cnt = 0; ++ } ++ } ++ packet->send_buf_index = section_index; ++ ++ + sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index = +- 0xFFFFFFFF; +- sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; ++ section_index; ++ sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size; + + if (packet->send_completion) + req_id = (ulong)packet; +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index f3d3525ed42d..9a19aa5672e6 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -236,10 +236,11 @@ static void netvsc_xmit_completion(void *context) + struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; + struct sk_buff *skb = (struct sk_buff *) + (unsigned long)packet->send_completion_tid; ++ u32 index = packet->send_buf_index; + + kfree(packet); + +- if (skb) ++ if (skb && (index == NETVSC_INVALID_INDEX)) + dev_kfree_skb_any(skb); + } + +-- +2.4.3 + diff --git a/src/patches/linux/0020-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch b/src/patches/linux/0020-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch new file mode 100644 index 0000000000..b610b5475c --- /dev/null +++ b/src/patches/linux/0020-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch @@ -0,0 +1,42 @@ +From 3f0b77385356301cf4718a94f76a4068588ecb8e Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Thu, 8 May 2014 15:14:10 -0700 +Subject: [PATCH 20/25] Add support for netvsc build without CONFIG_SYSFS flag + +This change ensures the driver can be built successfully without the +CONFIG_SYSFS flag. +MS-TFS: 182270 + +Signed-off-by: Haiyang Zhang +Reviewed-by: K. Y. Srinivasan +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc_drv.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 9a19aa5672e6..346f1aeb9c24 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -645,8 +645,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, + packet->vlan_tci); + + skb_record_rx_queue(skb, packet->channel-> +- offermsg.offer.sub_channel_index % +- net->real_num_rx_queues); ++ offermsg.offer.sub_channel_index); + + net->stats.rx_packets++; + net->stats.rx_bytes += packet->total_data_buflen; +@@ -829,8 +828,6 @@ static int netvsc_probe(struct hv_device *dev, + nvdev = hv_get_drvdata(dev); + netif_set_real_num_tx_queues(net, nvdev->num_chn); + netif_set_real_num_rx_queues(net, nvdev->num_chn); +- dev_info(&dev->device, "real num tx,rx queues:%u, %u\n", +- net->real_num_tx_queues, net->real_num_rx_queues); + + ret = register_netdev(net); + if (ret != 0) { +-- +2.4.3 + diff --git a/src/patches/linux/0021-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch b/src/patches/linux/0021-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch new file mode 100644 index 0000000000..c522d28019 --- /dev/null +++ b/src/patches/linux/0021-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch @@ -0,0 +1,93 @@ +From 1267d9b235e7612f8cdfa842a0433bb82e75f7fb Mon Sep 17 00:00:00 2001 +From: Haiyang Zhang +Date: Wed, 21 May 2014 12:55:39 -0700 +Subject: [PATCH 21/25] hyperv: Add hash value into RNDIS Per-packet info + +It passes the hash value as the RNDIS Per-packet info to the Hyper-V host, +so that the send completion notices can be spread across multiple channels. +MS-TFS: 140273 + +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/hyperv_net.h | 4 ++++ + drivers/net/hyperv/netvsc_drv.c | 18 ++++++++++++++---- + 2 files changed, 18 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h +index 4b7df5a5c966..6cc37c15e0bf 100644 +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -791,6 +791,7 @@ enum ndis_per_pkt_info_type { + IEEE_8021Q_INFO, + ORIGINAL_PKTINFO, + PACKET_CANCEL_ID, ++ NBL_HASH_VALUE = PACKET_CANCEL_ID, + ORIGINAL_NET_BUFLIST, + CACHED_NET_BUFLIST, + SHORT_PKT_PADINFO, +@@ -937,6 +938,9 @@ struct ndis_tcp_lso_info { + #define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ + sizeof(struct ndis_tcp_lso_info)) + ++#define NDIS_HASH_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ ++ sizeof(u32)) ++ + /* Format of Information buffer passed in a SetRequest for the OID */ + /* OID_GEN_RNDIS_CONFIG_PARAMETER. */ + struct rndis_config_parameter_info { +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 346f1aeb9c24..bd3b3acd04dc 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -224,9 +224,11 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, + if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) + return 0; + +- if (netvsc_set_hash(&hash, skb)) ++ if (netvsc_set_hash(&hash, skb)) { + q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % + ndev->real_num_tx_queues; ++ skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); ++ } + + return q_idx; + } +@@ -385,6 +387,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + struct ndis_tcp_lso_info *lso_info; + int hdr_offset; + u32 net_trans_info; ++ u32 hash; + + + /* We will atmost need two pages to describe the rndis +@@ -403,9 +406,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + packet = kzalloc(sizeof(struct hv_netvsc_packet) + + (num_data_pgs * sizeof(struct hv_page_buffer)) + + sizeof(struct rndis_message) + +- NDIS_VLAN_PPI_SIZE + +- NDIS_CSUM_PPI_SIZE + +- NDIS_LSO_PPI_SIZE, GFP_ATOMIC); ++ NDIS_VLAN_PPI_SIZE + NDIS_CSUM_PPI_SIZE + ++ NDIS_LSO_PPI_SIZE + NDIS_HASH_PPI_SIZE, GFP_ATOMIC); + if (!packet) { + /* out of memory, drop packet */ + netdev_err(net, "unable to allocate hv_netvsc_packet\n"); +@@ -444,6 +446,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) + + rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); + ++ hash = skb_get_hash_raw(skb); ++ if (hash != 0 && net->real_num_tx_queues > 1) { ++ rndis_msg_size += NDIS_HASH_PPI_SIZE; ++ ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, ++ NBL_HASH_VALUE); ++ *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; ++ } ++ + if (isvlan) { + struct ndis_pkt_8021q_info *vlan; + +-- +2.4.3 + diff --git a/src/patches/linux/0022-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch b/src/patches/linux/0022-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch new file mode 100644 index 0000000000..d849ebbfbb --- /dev/null +++ b/src/patches/linux/0022-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch @@ -0,0 +1,32 @@ +From ee99150c7f34737e4382e7199d2ffe3dfbb54a5c Mon Sep 17 00:00:00 2001 +From: Dave Jones +Date: Mon, 16 Jun 2014 16:59:02 -0400 +Subject: [PATCH 22/25] hyperv: fix apparent cut-n-paste error in send path + teardown + +c25aaf814a63: "hyperv: Enable sendbuf mechanism on the send path" added +some teardown code that looks like it was copied from the recieve path +above, but missed a variable name replacement. + +Signed-off-by: Dave Jones +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index c041f63a6d30..4ed38eaecea8 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -189,7 +189,7 @@ static int netvsc_destroy_buf(struct netvsc_device *net_device) + "unable to teardown send buffer's gpadl\n"); + return ret; + } +- net_device->recv_buf_gpadl_handle = 0; ++ net_device->send_buf_gpadl_handle = 0; + } + if (net_device->send_buf) { + /* Free up the receive buffer */ +-- +2.4.3 + diff --git a/src/patches/linux/0023-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch b/src/patches/linux/0023-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch new file mode 100644 index 0000000000..ab0960db46 --- /dev/null +++ b/src/patches/linux/0023-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch @@ -0,0 +1,34 @@ +From f1009dfec7c439a958bd9ed8893dd6aa692c61f5 Mon Sep 17 00:00:00 2001 +From: Wei Yongjun +Date: Wed, 23 Jul 2014 09:00:35 +0800 +Subject: [PATCH 23/25] hyperv: Fix error return code in netvsc_init_buf() + +Fix to return -ENOMEM from the kalloc error handling +case instead of 0. + +Signed-off-by: Wei Yongjun +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +--- + drivers/net/hyperv/netvsc.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index 4ed38eaecea8..d97d5f39a04e 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -378,8 +378,10 @@ static int netvsc_init_buf(struct hv_device *device) + + net_device->send_section_map = + kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); +- if (net_device->send_section_map == NULL) ++ if (net_device->send_section_map == NULL) { ++ ret = -ENOMEM; + goto cleanup; ++ } + + goto exit; + +-- +2.4.3 + diff --git a/src/patches/linux/0024-hyperv-Fix-a-bug-in-netvsc_send.patch b/src/patches/linux/0024-hyperv-Fix-a-bug-in-netvsc_send.patch new file mode 100644 index 0000000000..8495fa24a4 --- /dev/null +++ b/src/patches/linux/0024-hyperv-Fix-a-bug-in-netvsc_send.patch @@ -0,0 +1,68 @@ +From 38bca2d5bd6fdaa0b8e1e415f79d89322c6825a8 Mon Sep 17 00:00:00 2001 +From: KY Srinivasan +Date: Sun, 5 Oct 2014 10:42:51 -0700 +Subject: [PATCH 24/25] hyperv: Fix a bug in netvsc_send() + +[ Upstream commit 3a67c9ccad926a168d8b7891537a452018368a5b ] + +After the packet is successfully sent, we should not touch the packet +as it may have been freed. This patch is based on the work done by +Long Li . + +David, please queue this up for stable. + +Signed-off-by: K. Y. Srinivasan +Reported-by: Sitsofe Wheeler +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index d97d5f39a04e..7edf976ecfa0 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -708,6 +708,7 @@ int netvsc_send(struct hv_device *device, + unsigned int section_index = NETVSC_INVALID_INDEX; + u32 msg_size = 0; + struct sk_buff *skb; ++ u16 q_idx = packet->q_idx; + + + net_device = get_outbound_net_device(device); +@@ -772,24 +773,24 @@ int netvsc_send(struct hv_device *device, + + if (ret == 0) { + atomic_inc(&net_device->num_outstanding_sends); +- atomic_inc(&net_device->queue_sends[packet->q_idx]); ++ atomic_inc(&net_device->queue_sends[q_idx]); + + if (hv_ringbuf_avail_percent(&out_channel->outbound) < + RING_AVAIL_PERCENT_LOWATER) { + netif_tx_stop_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); ++ ndev, q_idx)); + + if (atomic_read(&net_device-> +- queue_sends[packet->q_idx]) < 1) ++ queue_sends[q_idx]) < 1) + netif_tx_wake_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); ++ ndev, q_idx)); + } + } else if (ret == -EAGAIN) { + netif_tx_stop_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); +- if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) { ++ ndev, q_idx)); ++ if (atomic_read(&net_device->queue_sends[q_idx]) < 1) { + netif_tx_wake_queue(netdev_get_tx_queue( +- ndev, packet->q_idx)); ++ ndev, q_idx)); + ret = -ENOSPC; + } + } else { +-- +2.4.3 + diff --git a/src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch b/src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch new file mode 100644 index 0000000000..47037be838 --- /dev/null +++ b/src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch @@ -0,0 +1,51 @@ +From 4b71288758aa1d510402b84ca93b1ef566575d22 Mon Sep 17 00:00:00 2001 +From: "K. Y. Srinivasan" +Date: Mon, 3 Feb 2014 12:42:45 -0800 +Subject: [PATCH 25/25] Drivers: hv: vmbus: Support per-channel driver state + +As we implement Virtual Receive Side Scaling on the networking side +(the VRSS patches are currently under review), it will be useful to have +per-channel state that vmbus drivers can manage. Add support for +managing per-channel state. + +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/hyperv.h | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 6088058a3e00..732dc7e37e96 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1045,6 +1045,10 @@ struct vmbus_channel { + * This will be NULL for the primary channel. + */ + struct vmbus_channel *primary_channel; ++ /* ++ * Support per-channel state for use by vmbus drivers. ++ */ ++ void *per_channel_state; + }; + + static inline void set_channel_read_state(struct vmbus_channel *c, bool state) +@@ -1052,6 +1056,16 @@ static inline void set_channel_read_state(struct vmbus_channel *c, bool state) + c->batched_reading = state; + } + ++static inline void set_per_channel_state(struct vmbus_channel *c, void *s) ++{ ++ c->per_channel_state = s; ++} ++ ++static inline void *get_per_channel_state(struct vmbus_channel *c) ++{ ++ return c->per_channel_state; ++} ++ + void vmbus_onmessage(void *context); + + int vmbus_request_offers(void); +-- +2.4.3 + -- 2.39.2