]> git.ipfire.org Git - ipfire-2.x.git/commitdiff
linux: Backport Hyper-V network driver
authorMichael Tremer <michael.tremer@ipfire.org>
Tue, 15 Dec 2015 18:30:56 +0000 (18:30 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Tue, 22 Dec 2015 14:47:43 +0000 (14:47 +0000)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
37 files changed:
lfs/linux
src/patches/linux/0001-Drivers-net-hyperv-Get-rid-of-the-rndis_filter_packe.patch [new file with mode: 0644]
src/patches/linux/0001-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch [new file with mode: 0644]
src/patches/linux/0002-Drivers-net-hyperv-Cleanup-the-receive-path.patch [new file with mode: 0644]
src/patches/linux/0002-hyperv-Remove-recv_pkt_list-and-lock.patch [new file with mode: 0644]
src/patches/linux/0003-Drivers-net-hyperv-Cleanup-the-netvsc-receive-callba.patch [new file with mode: 0644]
src/patches/linux/0003-hyperv-Simplify-the-send_completion-variables.patch [new file with mode: 0644]
src/patches/linux/0004-hyperv-Add-latest-NetVSP-versions-to-auto-negotiatio.patch [new file with mode: 0644]
src/patches/linux/0004-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch [new file with mode: 0644]
src/patches/linux/0005-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch [new file with mode: 0644]
src/patches/linux/0005-Drivers-net-hyperv-Enable-scatter-gather-I-O.patch [new file with mode: 0644]
src/patches/linux/0006-Drivers-net-hyperv-Cleanup-the-send-path.patch [new file with mode: 0644]
src/patches/linux/0006-net-get-rid-of-SET_ETHTOOL_OPS.patch [new file with mode: 0644]
src/patches/linux/0007-Drivers-net-hyperv-Enable-offloads-on-the-host.patch [new file with mode: 0644]
src/patches/linux/0007-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch [new file with mode: 0644]
src/patches/linux/0008-Drivers-net-hyperv-Enable-receive-side-IP-checksum-o.patch [new file with mode: 0644]
src/patches/linux/0008-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch [new file with mode: 0644]
src/patches/linux/0009-Drivers-net-hyperv-Enable-send-side-checksum-offload.patch [new file with mode: 0644]
src/patches/linux/0009-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch [new file with mode: 0644]
src/patches/linux/0010-Drivers-net-hyperv-Enable-large-send-offload.patch [new file with mode: 0644]
src/patches/linux/0010-hyperv-Fix-a-bug-in-netvsc_start_xmit.patch [new file with mode: 0644]
src/patches/linux/0011-hyperv-Change-the-receive-buffer-size-for-legacy-hos.patch [moved from src/patches/linux-3.14.x-hyperv-2008-fix.patch with 81% similarity]
src/patches/linux/0011-hyperv-Fix-a-bug-in-netvsc_send.patch [new file with mode: 0644]
src/patches/linux/0012-Drivers-net-hyperv-Allocate-memory-for-all-possible-.patch [new file with mode: 0644]
src/patches/linux/0013-Drivers-net-hyperv-Negotiate-suitable-ndis-version-f.patch [new file with mode: 0644]
src/patches/linux/0014-Drivers-net-hyperv-Address-UDP-checksum-issues.patch [new file with mode: 0644]
src/patches/linux/0015-hyperv-Properly-handle-checksum-offload.patch [new file with mode: 0644]
src/patches/linux/0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch [new file with mode: 0644]
src/patches/linux/0017-hyperv-Remove-recv_pkt_list-and-lock.patch [new file with mode: 0644]
src/patches/linux/0018-hyperv-Simplify-the-send_completion-variables.patch [new file with mode: 0644]
src/patches/linux/0019-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch [new file with mode: 0644]
src/patches/linux/0020-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch [new file with mode: 0644]
src/patches/linux/0021-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch [new file with mode: 0644]
src/patches/linux/0022-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch [new file with mode: 0644]
src/patches/linux/0023-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch [new file with mode: 0644]
src/patches/linux/0024-hyperv-Fix-a-bug-in-netvsc_send.patch [new file with mode: 0644]
src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch [new file with mode: 0644]

index e1d6a4ecb9a3dfd89339b4f7af787e1c162f5365..7f9c36cebfa3a1bee499cd6bf035a0c04ee2517c 100644 (file)
--- a/lfs/linux
+++ b/lfs/linux
@@ -171,8 +171,32 @@ endif
        # update the queued trim blacklist from kernel 4.2rc1
        cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-3.14.43_new_qtrim_blacklist.patch
 
-       # HyperV 2008 patch
-       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-3.14.x-hyperv-2008-fix.patch
+       # Hyper-V patches
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0001-Drivers-net-hyperv-Get-rid-of-the-rndis_filter_packe.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0002-Drivers-net-hyperv-Cleanup-the-receive-path.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0003-Drivers-net-hyperv-Cleanup-the-netvsc-receive-callba.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0004-hyperv-Add-latest-NetVSP-versions-to-auto-negotiatio.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0005-Drivers-net-hyperv-Enable-scatter-gather-I-O.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0006-Drivers-net-hyperv-Cleanup-the-send-path.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0007-Drivers-net-hyperv-Enable-offloads-on-the-host.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0008-Drivers-net-hyperv-Enable-receive-side-IP-checksum-o.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0009-Drivers-net-hyperv-Enable-send-side-checksum-offload.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0010-Drivers-net-hyperv-Enable-large-send-offload.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0011-hyperv-Change-the-receive-buffer-size-for-legacy-hos.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0012-Drivers-net-hyperv-Allocate-memory-for-all-possible-.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0013-Drivers-net-hyperv-Negotiate-suitable-ndis-version-f.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0014-Drivers-net-hyperv-Address-UDP-checksum-issues.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0015-hyperv-Properly-handle-checksum-offload.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0017-hyperv-Remove-recv_pkt_list-and-lock.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0018-hyperv-Simplify-the-send_completion-variables.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0019-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0020-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0021-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0022-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0023-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0024-hyperv-Fix-a-bug-in-netvsc_send.patch
+       cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch
 
        # fix empty symbol crc's
        cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-genksyms_fix_typeof_handling.patch
diff --git a/src/patches/linux/0001-Drivers-net-hyperv-Get-rid-of-the-rndis_filter_packe.patch b/src/patches/linux/0001-Drivers-net-hyperv-Get-rid-of-the-rndis_filter_packe.patch
new file mode 100644 (file)
index 0000000..2e00392
--- /dev/null
@@ -0,0 +1,129 @@
+From f3f885fa684ff18fa4d223dc22b782f5e5d32560 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sun, 16 Feb 2014 16:38:43 -0800
+Subject: [PATCH 01/25] Drivers: net: hyperv: Get rid of the
+ rndis_filter_packet structure
+
+This structure is redundant; get rid of it make the code little more efficient -
+get rid of the unnecessary indirection.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   |  6 ------
+ drivers/net/hyperv/netvsc_drv.c   |  2 +-
+ drivers/net/hyperv/rndis_filter.c | 41 +++------------------------------------
+ 3 files changed, 4 insertions(+), 45 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 7b594ce3f21d..7645ba38bde8 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -846,12 +846,6 @@ struct rndis_message {
+ };
+-struct rndis_filter_packet {
+-      void *completion_ctx;
+-      void (*completion)(void *context);
+-      struct rndis_message msg;
+-};
+-
+ /* Handy macros */
+ /* get the size of an RNDIS message. Pass in the message type, */
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 3c1c33ceffba..28020f83ba6f 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -156,7 +156,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       /* Allocate a netvsc packet based on # of frags. */
+       packet = kzalloc(sizeof(struct hv_netvsc_packet) +
+                        (num_pages * sizeof(struct hv_page_buffer)) +
+-                       sizeof(struct rndis_filter_packet) +
++                       sizeof(struct rndis_message) +
+                        NDIS_VLAN_PPI_SIZE, GFP_ATOMIC);
+       if (!packet) {
+               /* out of memory, drop packet */
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index b54fd257652b..6a9f6021f09c 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -58,9 +58,6 @@ struct rndis_request {
+       u8 request_ext[RNDIS_EXT_LEN];
+ };
+-static void rndis_filter_send_completion(void *ctx);
+-
+-
+ static struct rndis_device *get_rndis_device(void)
+ {
+       struct rndis_device *device;
+@@ -297,7 +294,7 @@ static void rndis_filter_receive_response(struct rndis_device *dev,
+                               "rndis response buffer overflow "
+                               "detected (size %u max %zu)\n",
+                               resp->msg_len,
+-                              sizeof(struct rndis_filter_packet));
++                              sizeof(struct rndis_message));
+                       if (resp->ndis_msg_type ==
+                           RNDIS_MSG_RESET_C) {
+@@ -917,17 +914,14 @@ int rndis_filter_close(struct hv_device *dev)
+ int rndis_filter_send(struct hv_device *dev,
+                            struct hv_netvsc_packet *pkt)
+ {
+-      int ret;
+-      struct rndis_filter_packet *filter_pkt;
+       struct rndis_message *rndis_msg;
+       struct rndis_packet *rndis_pkt;
+       u32 rndis_msg_size;
+       bool isvlan = pkt->vlan_tci & VLAN_TAG_PRESENT;
+       /* Add the rndis header */
+-      filter_pkt = (struct rndis_filter_packet *)pkt->extension;
++      rndis_msg = (struct rndis_message *)pkt->extension;
+-      rndis_msg = &filter_pkt->msg;
+       rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
+       if (isvlan)
+               rndis_msg_size += NDIS_VLAN_PPI_SIZE;
+@@ -980,34 +974,5 @@ int rndis_filter_send(struct hv_device *dev,
+               pkt->page_buf[1].len = rndis_msg_size - pkt->page_buf[0].len;
+       }
+-      /* Save the packet send completion and context */
+-      filter_pkt->completion = pkt->completion.send.send_completion;
+-      filter_pkt->completion_ctx =
+-                              pkt->completion.send.send_completion_ctx;
+-
+-      /* Use ours */
+-      pkt->completion.send.send_completion = rndis_filter_send_completion;
+-      pkt->completion.send.send_completion_ctx = filter_pkt;
+-
+-      ret = netvsc_send(dev, pkt);
+-      if (ret != 0) {
+-              /*
+-               * Reset the completion to originals to allow retries from
+-               * above
+-               */
+-              pkt->completion.send.send_completion =
+-                              filter_pkt->completion;
+-              pkt->completion.send.send_completion_ctx =
+-                              filter_pkt->completion_ctx;
+-      }
+-
+-      return ret;
+-}
+-
+-static void rndis_filter_send_completion(void *ctx)
+-{
+-      struct rndis_filter_packet *filter_pkt = ctx;
+-
+-      /* Pass it back to the original handler */
+-      filter_pkt->completion(filter_pkt->completion_ctx);
++      return netvsc_send(dev, pkt);
+ }
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0001-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch b/src/patches/linux/0001-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch
new file mode 100644 (file)
index 0000000..52c4da6
--- /dev/null
@@ -0,0 +1,917 @@
+From 5b54dac856cb5bd6f33f4159012773e4a33704f7 Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 21 Apr 2014 10:20:28 -0700
+Subject: [PATCH 01/11] hyperv: Add support for virtual Receive Side Scaling
+ (vRSS)
+
+This feature allows multiple channels to be used by each virtual NIC.
+It is available on Hyper-V host 2012 R2.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   | 110 +++++++++++++++++++++-
+ drivers/net/hyperv/netvsc.c       | 136 +++++++++++++++++++++------
+ drivers/net/hyperv/netvsc_drv.c   | 103 ++++++++++++++++++++-
+ drivers/net/hyperv/rndis_filter.c | 189 +++++++++++++++++++++++++++++++++++++-
+ 4 files changed, 504 insertions(+), 34 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index d18f711d0b0c..57eb3f906d64 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -28,6 +28,96 @@
+ #include <linux/hyperv.h>
+ #include <linux/rndis.h>
++/* RSS related */
++#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203  /* query only */
++#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204  /* query and set */
++
++#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
++#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
++
++#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
++#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
++
++struct ndis_obj_header {
++      u8 type;
++      u8 rev;
++      u16 size;
++} __packed;
++
++/* ndis_recv_scale_cap/cap_flag */
++#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
++#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR       0x02000000
++#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC       0x04000000
++#define NDIS_RSS_CAPS_USING_MSI_X                 0x08000000
++#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS      0x10000000
++#define NDIS_RSS_CAPS_SUPPORTS_MSI_X              0x20000000
++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4          0x00000100
++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6          0x00000200
++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX       0x00000400
++
++struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
++      struct ndis_obj_header hdr;
++      u32 cap_flag;
++      u32 num_int_msg;
++      u32 num_recv_que;
++      u16 num_indirect_tabent;
++} __packed;
++
++
++/* ndis_recv_scale_param flags */
++#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED     0x0001
++#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED    0x0002
++#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED       0x0004
++#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED     0x0008
++#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS            0x0010
++
++/* Hash info bits */
++#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001
++#define NDIS_HASH_IPV4          0x00000100
++#define NDIS_HASH_TCP_IPV4      0x00000200
++#define NDIS_HASH_IPV6          0x00000400
++#define NDIS_HASH_IPV6_EX       0x00000800
++#define NDIS_HASH_TCP_IPV6      0x00001000
++#define NDIS_HASH_TCP_IPV6_EX   0x00002000
++
++#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
++#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
++
++#define ITAB_NUM 128
++#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
++extern u8 netvsc_hash_key[];
++
++struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
++      struct ndis_obj_header hdr;
++
++      /* Qualifies the rest of the information */
++      u16 flag;
++
++      /* The base CPU number to do receive processing. not used */
++      u16 base_cpu_number;
++
++      /* This describes the hash function and type being enabled */
++      u32 hashinfo;
++
++      /* The size of indirection table array */
++      u16 indirect_tabsize;
++
++      /* The offset of the indirection table from the beginning of this
++       * structure
++       */
++      u32 indirect_taboffset;
++
++      /* The size of the hash secret key */
++      u16 hashkey_size;
++
++      /* The offset of the secret key from the beginning of this structure */
++      u32 kashkey_offset;
++
++      u32 processor_masks_offset;
++      u32 num_processor_masks;
++      u32 processor_masks_entry_size;
++};
++
+ /* Fwd declaration */
+ struct hv_netvsc_packet;
+ struct ndis_tcp_ip_checksum_info;
+@@ -39,6 +129,8 @@ struct xferpage_packet {
+       /* # of netvsc packets this xfer packet contains */
+       u32 count;
++
++      struct vmbus_channel *channel;
+ };
+ /*
+@@ -54,6 +146,9 @@ struct hv_netvsc_packet {
+       bool is_data_pkt;
+       u16 vlan_tci;
++      u16 q_idx;
++      struct vmbus_channel *channel;
++
+       /*
+        * Valid only for receives when we break a xfer page packet
+        * into multiple netvsc packets
+@@ -120,6 +215,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
+ int netvsc_recv_callback(struct hv_device *device_obj,
+                       struct hv_netvsc_packet *packet,
+                       struct ndis_tcp_ip_checksum_info *csum_info);
++void netvsc_channel_cb(void *context);
+ int rndis_filter_open(struct hv_device *dev);
+ int rndis_filter_close(struct hv_device *dev);
+ int rndis_filter_device_add(struct hv_device *dev,
+@@ -522,6 +618,8 @@ struct nvsp_message {
+ #define NETVSC_PACKET_SIZE                      2048
++#define VRSS_SEND_TAB_SIZE 16
++
+ /* Per netvsc channel-specific */
+ struct netvsc_device {
+       struct hv_device *dev;
+@@ -555,10 +653,20 @@ struct netvsc_device {
+       struct net_device *ndev;
++      struct vmbus_channel *chn_table[NR_CPUS];
++      u32 send_table[VRSS_SEND_TAB_SIZE];
++      u32 num_chn;
++      atomic_t queue_sends[NR_CPUS];
++
+       /* Holds rndis device info */
+       void *extension;
+-      /* The recive buffer for this device */
++
++      int ring_size;
++
++      /* The primary channel callback buffer */
+       unsigned char cb_buffer[NETVSC_PACKET_SIZE];
++      /* The sub channel callback buffer */
++      unsigned char *sub_cb_buf;
+ };
+ /* NdisInitialize message */
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index f7629ecefa84..e7e77f12bc38 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -422,6 +422,9 @@ int netvsc_device_remove(struct hv_device *device)
+               kfree(netvsc_packet);
+       }
++      if (net_device->sub_cb_buf)
++              vfree(net_device->sub_cb_buf);
++
+       kfree(net_device);
+       return 0;
+ }
+@@ -461,7 +464,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+           (nvsp_packet->hdr.msg_type ==
+            NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
+           (nvsp_packet->hdr.msg_type ==
+-           NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
++           NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
++          (nvsp_packet->hdr.msg_type ==
++           NVSP_MSG5_TYPE_SUBCHANNEL)) {
+               /* Copy the response back */
+               memcpy(&net_device->channel_init_pkt, nvsp_packet,
+                      sizeof(struct nvsp_message));
+@@ -469,28 +474,37 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+       } else if (nvsp_packet->hdr.msg_type ==
+                  NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
+               int num_outstanding_sends;
++              u16 q_idx = 0;
++              struct vmbus_channel *channel = device->channel;
++              int queue_sends;
+               /* Get the send context */
+               nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
+                       packet->trans_id;
+               /* Notify the layer above us */
+-              if (nvsc_packet)
++              if (nvsc_packet) {
++                      q_idx = nvsc_packet->q_idx;
++                      channel = nvsc_packet->channel;
+                       nvsc_packet->completion.send.send_completion(
+                               nvsc_packet->completion.send.
+                               send_completion_ctx);
++              }
+               num_outstanding_sends =
+                       atomic_dec_return(&net_device->num_outstanding_sends);
++              queue_sends = atomic_dec_return(&net_device->
++                                              queue_sends[q_idx]);
+               if (net_device->destroy && num_outstanding_sends == 0)
+                       wake_up(&net_device->wait_drain);
+-              if (netif_queue_stopped(ndev) && !net_device->start_remove &&
+-                      (hv_ringbuf_avail_percent(&device->channel->outbound)
+-                      > RING_AVAIL_PERCENT_HIWATER ||
+-                      num_outstanding_sends < 1))
+-                              netif_wake_queue(ndev);
++              if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
++                  !net_device->start_remove &&
++                  (hv_ringbuf_avail_percent(&channel->outbound) >
++                   RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
++                              netif_tx_wake_queue(netdev_get_tx_queue(
++                                                  ndev, q_idx));
+       } else {
+               netdev_err(ndev, "Unknown send completion packet type- "
+                          "%d received!!\n", nvsp_packet->hdr.msg_type);
+@@ -505,6 +519,7 @@ int netvsc_send(struct hv_device *device,
+       int ret = 0;
+       struct nvsp_message sendMessage;
+       struct net_device *ndev;
++      struct vmbus_channel *out_channel = NULL;
+       u64 req_id;
+       net_device = get_outbound_net_device(device);
+@@ -531,15 +546,20 @@ int netvsc_send(struct hv_device *device,
+       else
+               req_id = 0;
++      out_channel = net_device->chn_table[packet->q_idx];
++      if (out_channel == NULL)
++              out_channel = device->channel;
++      packet->channel = out_channel;
++
+       if (packet->page_buf_cnt) {
+-              ret = vmbus_sendpacket_pagebuffer(device->channel,
++              ret = vmbus_sendpacket_pagebuffer(out_channel,
+                                                 packet->page_buf,
+                                                 packet->page_buf_cnt,
+                                                 &sendMessage,
+                                                 sizeof(struct nvsp_message),
+                                                 req_id);
+       } else {
+-              ret = vmbus_sendpacket(device->channel, &sendMessage,
++              ret = vmbus_sendpacket(out_channel, &sendMessage,
+                               sizeof(struct nvsp_message),
+                               req_id,
+                               VM_PKT_DATA_INBAND,
+@@ -548,17 +568,24 @@ int netvsc_send(struct hv_device *device,
+       if (ret == 0) {
+               atomic_inc(&net_device->num_outstanding_sends);
+-              if (hv_ringbuf_avail_percent(&device->channel->outbound) <
++              atomic_inc(&net_device->queue_sends[packet->q_idx]);
++
++              if (hv_ringbuf_avail_percent(&out_channel->outbound) <
+                       RING_AVAIL_PERCENT_LOWATER) {
+-                      netif_stop_queue(ndev);
++                      netif_tx_stop_queue(netdev_get_tx_queue(
++                                          ndev, packet->q_idx));
++
+                       if (atomic_read(&net_device->
+-                              num_outstanding_sends) < 1)
+-                              netif_wake_queue(ndev);
++                              queue_sends[packet->q_idx]) < 1)
++                              netif_tx_wake_queue(netdev_get_tx_queue(
++                                                  ndev, packet->q_idx));
+               }
+       } else if (ret == -EAGAIN) {
+-              netif_stop_queue(ndev);
+-              if (atomic_read(&net_device->num_outstanding_sends) < 1) {
+-                      netif_wake_queue(ndev);
++              netif_tx_stop_queue(netdev_get_tx_queue(
++                                  ndev, packet->q_idx));
++              if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
++                      netif_tx_wake_queue(netdev_get_tx_queue(
++                                          ndev, packet->q_idx));
+                       ret = -ENOSPC;
+               }
+       } else {
+@@ -570,6 +597,7 @@ int netvsc_send(struct hv_device *device,
+ }
+ static void netvsc_send_recv_completion(struct hv_device *device,
++                                      struct vmbus_channel *channel,
+                                       struct netvsc_device *net_device,
+                                       u64 transaction_id, u32 status)
+ {
+@@ -587,7 +615,7 @@ static void netvsc_send_recv_completion(struct hv_device *device,
+ retry_send_cmplt:
+       /* Send the completion */
+-      ret = vmbus_sendpacket(device->channel, &recvcompMessage,
++      ret = vmbus_sendpacket(channel, &recvcompMessage,
+                              sizeof(struct nvsp_message), transaction_id,
+                              VM_PKT_COMP, 0);
+       if (ret == 0) {
+@@ -618,6 +646,7 @@ static void netvsc_receive_completion(void *context)
+ {
+       struct hv_netvsc_packet *packet = context;
+       struct hv_device *device = packet->device;
++      struct vmbus_channel *channel;
+       struct netvsc_device *net_device;
+       u64 transaction_id = 0;
+       bool fsend_receive_comp = false;
+@@ -649,6 +678,7 @@ static void netvsc_receive_completion(void *context)
+        */
+       if (packet->xfer_page_pkt->count == 0) {
+               fsend_receive_comp = true;
++              channel = packet->xfer_page_pkt->channel;
+               transaction_id = packet->completion.recv.recv_completion_tid;
+               status = packet->xfer_page_pkt->status;
+               list_add_tail(&packet->xfer_page_pkt->list_ent,
+@@ -662,12 +692,13 @@ static void netvsc_receive_completion(void *context)
+       /* Send a receive completion for the xfer page packet */
+       if (fsend_receive_comp)
+-              netvsc_send_recv_completion(device, net_device, transaction_id,
+-                                      status);
++              netvsc_send_recv_completion(device, channel, net_device,
++                                          transaction_id, status);
+ }
+ static void netvsc_receive(struct netvsc_device *net_device,
++                      struct vmbus_channel *channel,
+                       struct hv_device *device,
+                       struct vmpacket_descriptor *packet)
+ {
+@@ -748,7 +779,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
+               spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
+                                      flags);
+-              netvsc_send_recv_completion(device, net_device,
++              netvsc_send_recv_completion(device, channel, net_device,
+                                           vmxferpage_packet->d.trans_id,
+                                           NVSP_STAT_FAIL);
+@@ -759,6 +790,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
+       xferpage_packet = (struct xferpage_packet *)listHead.next;
+       list_del(&xferpage_packet->list_ent);
+       xferpage_packet->status = NVSP_STAT_SUCCESS;
++      xferpage_packet->channel = channel;
+       /* This is how much we can satisfy */
+       xferpage_packet->count = count - 1;
+@@ -800,10 +832,45 @@ static void netvsc_receive(struct netvsc_device *net_device,
+ }
+-static void netvsc_channel_cb(void *context)
++
++static void netvsc_send_table(struct hv_device *hdev,
++                            struct vmpacket_descriptor *vmpkt)
++{
++      struct netvsc_device *nvscdev;
++      struct net_device *ndev;
++      struct nvsp_message *nvmsg;
++      int i;
++      u32 count, *tab;
++
++      nvscdev = get_outbound_net_device(hdev);
++      if (!nvscdev)
++              return;
++      ndev = nvscdev->ndev;
++
++      nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
++                                      (vmpkt->offset8 << 3));
++
++      if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
++              return;
++
++      count = nvmsg->msg.v5_msg.send_table.count;
++      if (count != VRSS_SEND_TAB_SIZE) {
++              netdev_err(ndev, "Received wrong send-table size:%u\n", count);
++              return;
++      }
++
++      tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
++                    nvmsg->msg.v5_msg.send_table.offset);
++
++      for (i = 0; i < count; i++)
++              nvscdev->send_table[i] = tab[i];
++}
++
++void netvsc_channel_cb(void *context)
+ {
+       int ret;
+-      struct hv_device *device = context;
++      struct vmbus_channel *channel = (struct vmbus_channel *)context;
++      struct hv_device *device;
+       struct netvsc_device *net_device;
+       u32 bytes_recvd;
+       u64 request_id;
+@@ -812,14 +879,19 @@ static void netvsc_channel_cb(void *context)
+       int bufferlen = NETVSC_PACKET_SIZE;
+       struct net_device *ndev;
++      if (channel->primary_channel != NULL)
++              device = channel->primary_channel->device_obj;
++      else
++              device = channel->device_obj;
++
+       net_device = get_inbound_net_device(device);
+       if (!net_device)
+               return;
+       ndev = net_device->ndev;
+-      buffer = net_device->cb_buffer;
++      buffer = get_per_channel_state(channel);
+       do {
+-              ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
++              ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
+                                          &bytes_recvd, &request_id);
+               if (ret == 0) {
+                       if (bytes_recvd > 0) {
+@@ -831,8 +903,12 @@ static void netvsc_channel_cb(void *context)
+                                       break;
+                               case VM_PKT_DATA_USING_XFER_PAGES:
+-                                      netvsc_receive(net_device,
+-                                                      device, desc);
++                                      netvsc_receive(net_device, channel,
++                                                     device, desc);
++                                      break;
++
++                              case VM_PKT_DATA_INBAND:
++                                      netvsc_send_table(device, desc);
+                                       break;
+                               default:
+@@ -893,6 +969,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
+               goto cleanup;
+       }
++      net_device->ring_size = ring_size;
++
+       /*
+        * Coming into this function, struct net_device * is
+        * registered as the driver private data.
+@@ -917,10 +995,12 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
+       }
+       init_completion(&net_device->channel_init_wait);
++      set_per_channel_state(device->channel, net_device->cb_buffer);
++
+       /* Open the channel */
+       ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
+                        ring_size * PAGE_SIZE, NULL, 0,
+-                       netvsc_channel_cb, device);
++                       netvsc_channel_cb, device->channel);
+       if (ret != 0) {
+               netdev_err(ndev, "unable to open channel: %d\n", ret);
+@@ -930,6 +1010,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
+       /* Channel is opened */
+       pr_info("hv_netvsc channel opened successfully\n");
++      net_device->chn_table[0] = device->channel;
++
+       /* Connect with the NetVsp */
+       ret = netvsc_connect_vsp(device);
+       if (ret != 0) {
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 31e55fba7cad..093cf3fc46b8 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -101,7 +101,7 @@ static int netvsc_open(struct net_device *net)
+               return ret;
+       }
+-      netif_start_queue(net);
++      netif_tx_start_all_queues(net);
+       nvdev = hv_get_drvdata(device_obj);
+       rdev = nvdev->extension;
+@@ -149,6 +149,88 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
+       return ppi;
+ }
++union sub_key {
++      u64 k;
++      struct {
++              u8 pad[3];
++              u8 kb;
++              u32 ka;
++      };
++};
++
++/* Toeplitz hash function
++ * data: network byte order
++ * return: host byte order
++ */
++static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen)
++{
++      union sub_key subk;
++      int k_next = 4;
++      u8 dt;
++      int i, j;
++      u32 ret = 0;
++
++      subk.k = 0;
++      subk.ka = ntohl(*(u32 *)key);
++
++      for (i = 0; i < dlen; i++) {
++              subk.kb = key[k_next];
++              k_next = (k_next + 1) % klen;
++              dt = data[i];
++              for (j = 0; j < 8; j++) {
++                      if (dt & 0x80)
++                              ret ^= subk.ka;
++                      dt <<= 1;
++                      subk.k <<= 1;
++              }
++      }
++
++      return ret;
++}
++
++static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
++{
++      struct iphdr *iphdr;
++      int data_len;
++      bool ret = false;
++
++      if (eth_hdr(skb)->h_proto != htons(ETH_P_IP))
++              return false;
++
++      iphdr = ip_hdr(skb);
++
++      if (iphdr->version == 4) {
++              if (iphdr->protocol == IPPROTO_TCP)
++                      data_len = 12;
++              else
++                      data_len = 8;
++              *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN,
++                                (u8 *)&iphdr->saddr, data_len);
++              ret = true;
++      }
++
++      return ret;
++}
++
++static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
++                      void *accel_priv, select_queue_fallback_t fallback)
++{
++      struct net_device_context *net_device_ctx = netdev_priv(ndev);
++      struct hv_device *hdev =  net_device_ctx->device_ctx;
++      struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev);
++      u32 hash;
++      u16 q_idx = 0;
++
++      if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
++              return 0;
++
++      if (netvsc_set_hash(&hash, skb))
++              q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
++                      ndev->real_num_tx_queues;
++
++      return q_idx;
++}
++
+ static void netvsc_xmit_completion(void *context)
+ {
+       struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
+@@ -333,6 +415,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       packet->vlan_tci = skb->vlan_tci;
++      packet->q_idx = skb_get_queue_mapping(skb);
++
+       packet->is_data_pkt = true;
+       packet->total_data_buflen = skb->len;
+@@ -554,6 +638,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
+               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+                                      packet->vlan_tci);
++      skb_record_rx_queue(skb, packet->xfer_page_pkt->channel->
++                          offermsg.offer.sub_channel_index %
++                          net->real_num_rx_queues);
++
+       net->stats.rx_packets++;
+       net->stats.rx_bytes += packet->total_data_buflen;
+@@ -602,7 +690,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
+       hv_set_drvdata(hdev, ndev);
+       device_info.ring_size = ring_size;
+       rndis_filter_device_add(hdev, &device_info);
+-      netif_wake_queue(ndev);
++      netif_tx_wake_all_queues(ndev);
+       return 0;
+ }
+@@ -648,6 +736,7 @@ static const struct net_device_ops device_ops = {
+       .ndo_change_mtu =               netvsc_change_mtu,
+       .ndo_validate_addr =            eth_validate_addr,
+       .ndo_set_mac_address =          netvsc_set_mac_addr,
++      .ndo_select_queue =             netvsc_select_queue,
+ };
+ /*
+@@ -694,9 +783,11 @@ static int netvsc_probe(struct hv_device *dev,
+       struct net_device *net = NULL;
+       struct net_device_context *net_device_ctx;
+       struct netvsc_device_info device_info;
++      struct netvsc_device *nvdev;
+       int ret;
+-      net = alloc_etherdev(sizeof(struct net_device_context));
++      net = alloc_etherdev_mq(sizeof(struct net_device_context),
++                              num_online_cpus());
+       if (!net)
+               return -ENOMEM;
+@@ -729,6 +820,12 @@ static int netvsc_probe(struct hv_device *dev,
+       }
+       memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
++      nvdev = hv_get_drvdata(dev);
++      netif_set_real_num_tx_queues(net, nvdev->num_chn);
++      netif_set_real_num_rx_queues(net, nvdev->num_chn);
++      dev_info(&dev->device, "real num tx,rx queues:%u, %u\n",
++               net->real_num_tx_queues, net->real_num_rx_queues);
++
+       ret = register_netdev(net);
+       if (ret != 0) {
+               pr_err("Unable to register netdev.\n");
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 143a98caf618..d92cfbe43410 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -31,7 +31,7 @@
+ #include "hyperv_net.h"
+-#define RNDIS_EXT_LEN 100
++#define RNDIS_EXT_LEN PAGE_SIZE
+ struct rndis_request {
+       struct list_head list_ent;
+       struct completion  wait_event;
+@@ -94,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev,
+       rndis_msg->ndis_msg_type = msg_type;
+       rndis_msg->msg_len = msg_len;
++      request->pkt.q_idx = 0;
++
+       /*
+        * Set the request id. This field is always after the rndis header for
+        * request/response packet types so we just used the SetRequest as a
+@@ -509,6 +511,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
+       query->info_buflen = 0;
+       query->dev_vc_handle = 0;
++      if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) {
++              struct ndis_recv_scale_cap *cap;
++
++              request->request_msg.msg_len +=
++                      sizeof(struct ndis_recv_scale_cap);
++              query->info_buflen = sizeof(struct ndis_recv_scale_cap);
++              cap = (struct ndis_recv_scale_cap *)((unsigned long)query +
++                                                   query->info_buf_offset);
++              cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES;
++              cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
++              cap->hdr.size = sizeof(struct ndis_recv_scale_cap);
++      }
++
+       ret = rndis_filter_send_request(dev, request);
+       if (ret != 0)
+               goto cleanup;
+@@ -695,6 +710,89 @@ cleanup:
+       return ret;
+ }
++u8 netvsc_hash_key[HASH_KEYLEN] = {
++      0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
++      0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
++      0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
++      0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
++      0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
++};
++
++int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
++{
++      struct net_device *ndev = rdev->net_dev->ndev;
++      struct rndis_request *request;
++      struct rndis_set_request *set;
++      struct rndis_set_complete *set_complete;
++      u32 extlen = sizeof(struct ndis_recv_scale_param) +
++                   4*ITAB_NUM + HASH_KEYLEN;
++      struct ndis_recv_scale_param *rssp;
++      u32 *itab;
++      u8 *keyp;
++      int i, t, ret;
++
++      request = get_rndis_request(
++                      rdev, RNDIS_MSG_SET,
++                      RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
++      if (!request)
++              return -ENOMEM;
++
++      set = &request->request_msg.msg.set_req;
++      set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS;
++      set->info_buflen = extlen;
++      set->info_buf_offset = sizeof(struct rndis_set_request);
++      set->dev_vc_handle = 0;
++
++      rssp = (struct ndis_recv_scale_param *)(set + 1);
++      rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS;
++      rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
++      rssp->hdr.size = sizeof(struct ndis_recv_scale_param);
++      rssp->flag = 0;
++      rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
++                       NDIS_HASH_TCP_IPV4;
++      rssp->indirect_tabsize = 4*ITAB_NUM;
++      rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
++      rssp->hashkey_size = HASH_KEYLEN;
++      rssp->kashkey_offset = rssp->indirect_taboffset +
++                             rssp->indirect_tabsize;
++
++      /* Set indirection table entries */
++      itab = (u32 *)(rssp + 1);
++      for (i = 0; i < ITAB_NUM; i++)
++              itab[i] = i % num_queue;
++
++      /* Set hask key values */
++      keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
++      for (i = 0; i < HASH_KEYLEN; i++)
++              keyp[i] = netvsc_hash_key[i];
++
++
++      ret = rndis_filter_send_request(rdev, request);
++      if (ret != 0)
++              goto cleanup;
++
++      t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
++      if (t == 0) {
++              netdev_err(ndev, "timeout before we got a set response...\n");
++              /* can't put_rndis_request, since we may still receive a
++               * send-completion.
++               */
++              return -ETIMEDOUT;
++      } else {
++              set_complete = &request->response_msg.msg.set_complete;
++              if (set_complete->status != RNDIS_STATUS_SUCCESS) {
++                      netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
++                                 set_complete->status);
++                      ret = -EINVAL;
++              }
++      }
++
++cleanup:
++      put_rndis_request(rdev, request);
++      return ret;
++}
++
++
+ static int rndis_filter_query_device_link_status(struct rndis_device *dev)
+ {
+       u32 size = sizeof(u32);
+@@ -886,6 +984,28 @@ static int rndis_filter_close_device(struct rndis_device *dev)
+       return ret;
+ }
++static void netvsc_sc_open(struct vmbus_channel *new_sc)
++{
++      struct netvsc_device *nvscdev;
++      u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
++      int ret;
++
++      nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);
++
++      if (chn_index >= nvscdev->num_chn)
++              return;
++
++      set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
++                            NETVSC_PACKET_SIZE);
++
++      ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
++                       nvscdev->ring_size * PAGE_SIZE, NULL, 0,
++                       netvsc_channel_cb, new_sc);
++
++      if (ret == 0)
++              nvscdev->chn_table[chn_index] = new_sc;
++}
++
+ int rndis_filter_device_add(struct hv_device *dev,
+                                 void *additional_info)
+ {
+@@ -894,6 +1014,10 @@ int rndis_filter_device_add(struct hv_device *dev,
+       struct rndis_device *rndis_device;
+       struct netvsc_device_info *device_info = additional_info;
+       struct ndis_offload_params offloads;
++      struct nvsp_message *init_packet;
++      int t;
++      struct ndis_recv_scale_cap rsscap;
++      u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
+       rndis_device = get_rndis_device();
+       if (!rndis_device)
+@@ -913,6 +1037,7 @@ int rndis_filter_device_add(struct hv_device *dev,
+       /* Initialize the rndis device */
+       net_device = hv_get_drvdata(dev);
++      net_device->num_chn = 1;
+       net_device->extension = rndis_device;
+       rndis_device->net_dev = net_device;
+@@ -952,7 +1077,6 @@ int rndis_filter_device_add(struct hv_device *dev,
+       if (ret)
+               goto err_dev_remv;
+-
+       rndis_filter_query_device_link_status(rndis_device);
+       device_info->link_state = rndis_device->link_state;
+@@ -961,7 +1085,66 @@ int rndis_filter_device_add(struct hv_device *dev,
+                rndis_device->hw_mac_adr,
+                device_info->link_state ? "down" : "up");
+-      return ret;
++      if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
++              return 0;
++
++      /* vRSS setup */
++      memset(&rsscap, 0, rsscap_size);
++      ret = rndis_filter_query_device(rndis_device,
++                                      OID_GEN_RECEIVE_SCALE_CAPABILITIES,
++                                      &rsscap, &rsscap_size);
++      if (ret || rsscap.num_recv_que < 2)
++              goto out;
++
++      net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ?
++                             num_online_cpus() : rsscap.num_recv_que;
++      if (net_device->num_chn == 1)
++              goto out;
++
++      net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) *
++                                       NETVSC_PACKET_SIZE);
++      if (!net_device->sub_cb_buf) {
++              net_device->num_chn = 1;
++              dev_info(&dev->device, "No memory for subchannels.\n");
++              goto out;
++      }
++
++      vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
++
++      init_packet = &net_device->channel_init_pkt;
++      memset(init_packet, 0, sizeof(struct nvsp_message));
++      init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
++      init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
++      init_packet->msg.v5_msg.subchn_req.num_subchannels =
++                                              net_device->num_chn - 1;
++      ret = vmbus_sendpacket(dev->channel, init_packet,
++                             sizeof(struct nvsp_message),
++                             (unsigned long)init_packet,
++                             VM_PKT_DATA_INBAND,
++                             VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
++      if (ret)
++              goto out;
++      t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
++      if (t == 0) {
++              ret = -ETIMEDOUT;
++              goto out;
++      }
++      if (init_packet->msg.v5_msg.subchn_comp.status !=
++          NVSP_STAT_SUCCESS) {
++              ret = -ENODEV;
++              goto out;
++      }
++      net_device->num_chn = 1 +
++              init_packet->msg.v5_msg.subchn_comp.num_subchannels;
++
++      vmbus_are_subchannels_present(dev->channel);
++
++      ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
++
++out:
++      if (ret)
++              net_device->num_chn = 1;
++      return 0; /* return 0 because primary channel can be used alone */
+ err_dev_remv:
+       rndis_filter_device_remove(dev);
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0002-Drivers-net-hyperv-Cleanup-the-receive-path.patch b/src/patches/linux/0002-Drivers-net-hyperv-Cleanup-the-receive-path.patch
new file mode 100644 (file)
index 0000000..79485b3
--- /dev/null
@@ -0,0 +1,112 @@
+From 348a5d691d84759dda8cdd3cbf9f071115c1240e Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sun, 16 Feb 2014 16:38:44 -0800
+Subject: [PATCH 02/25] Drivers: net: hyperv: Cleanup the receive path
+
+Make the receive path a little more efficient by parameterizing the
+required state rather than re-establishing that state.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc.c | 29 +++++++++++++----------------
+ 1 file changed, 13 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index 03a2c6e17158..7fa2bbade327 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -432,17 +432,14 @@ static inline u32 hv_ringbuf_avail_percent(
+       return avail_write * 100 / ring_info->ring_datasize;
+ }
+-static void netvsc_send_completion(struct hv_device *device,
++static void netvsc_send_completion(struct netvsc_device *net_device,
++                                 struct hv_device *device,
+                                  struct vmpacket_descriptor *packet)
+ {
+-      struct netvsc_device *net_device;
+       struct nvsp_message *nvsp_packet;
+       struct hv_netvsc_packet *nvsc_packet;
+       struct net_device *ndev;
+-      net_device = get_inbound_net_device(device);
+-      if (!net_device)
+-              return;
+       ndev = net_device->ndev;
+       nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
+@@ -561,13 +558,13 @@ int netvsc_send(struct hv_device *device,
+ }
+ static void netvsc_send_recv_completion(struct hv_device *device,
++                                      struct netvsc_device *net_device,
+                                       u64 transaction_id, u32 status)
+ {
+       struct nvsp_message recvcompMessage;
+       int retries = 0;
+       int ret;
+       struct net_device *ndev;
+-      struct netvsc_device *net_device = hv_get_drvdata(device);
+       ndev = net_device->ndev;
+@@ -653,14 +650,15 @@ static void netvsc_receive_completion(void *context)
+       /* Send a receive completion for the xfer page packet */
+       if (fsend_receive_comp)
+-              netvsc_send_recv_completion(device, transaction_id, status);
++              netvsc_send_recv_completion(device, net_device, transaction_id,
++                                      status);
+ }
+-static void netvsc_receive(struct hv_device *device,
+-                          struct vmpacket_descriptor *packet)
++static void netvsc_receive(struct netvsc_device *net_device,
++                      struct hv_device *device,
++                      struct vmpacket_descriptor *packet)
+ {
+-      struct netvsc_device *net_device;
+       struct vmtransfer_page_packet_header *vmxferpage_packet;
+       struct nvsp_message *nvsp_packet;
+       struct hv_netvsc_packet *netvsc_packet = NULL;
+@@ -673,9 +671,6 @@ static void netvsc_receive(struct hv_device *device,
+       LIST_HEAD(listHead);
+-      net_device = get_inbound_net_device(device);
+-      if (!net_device)
+-              return;
+       ndev = net_device->ndev;
+       /*
+@@ -741,7 +736,7 @@ static void netvsc_receive(struct hv_device *device,
+               spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
+                                      flags);
+-              netvsc_send_recv_completion(device,
++              netvsc_send_recv_completion(device, net_device,
+                                           vmxferpage_packet->d.trans_id,
+                                           NVSP_STAT_FAIL);
+@@ -825,11 +820,13 @@ static void netvsc_channel_cb(void *context)
+                               desc = (struct vmpacket_descriptor *)buffer;
+                               switch (desc->type) {
+                               case VM_PKT_COMP:
+-                                      netvsc_send_completion(device, desc);
++                                      netvsc_send_completion(net_device,
++                                                              device, desc);
+                                       break;
+                               case VM_PKT_DATA_USING_XFER_PAGES:
+-                                      netvsc_receive(device, desc);
++                                      netvsc_receive(net_device,
++                                                      device, desc);
+                                       break;
+                               default:
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0002-hyperv-Remove-recv_pkt_list-and-lock.patch b/src/patches/linux/0002-hyperv-Remove-recv_pkt_list-and-lock.patch
new file mode 100644 (file)
index 0000000..bbe0e52
--- /dev/null
@@ -0,0 +1,384 @@
+From 4baab26129e0540746744232022110dbe9e011e7 Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 21 Apr 2014 14:54:43 -0700
+Subject: [PATCH 02/11] hyperv: Remove recv_pkt_list and lock
+
+Removed recv_pkt_list and lock, and updated related code, so that
+the locking overhead is reduced especially when multiple channels
+are in use.
+
+The recv_pkt_list isn't actually necessary because the packets are
+processed sequentially in each channel. It has been replaced by a
+local variable, and the related lock for this list is also removed.
+The is_data_pkt field is not used in receive path, so its assignment
+is cleaned up.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   |  33 --------
+ drivers/net/hyperv/netvsc.c       | 174 +++-----------------------------------
+ drivers/net/hyperv/netvsc_drv.c   |   2 +-
+ drivers/net/hyperv/rndis_filter.c |   2 -
+ 4 files changed, 13 insertions(+), 198 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 57eb3f906d64..a1af0f7711e2 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -119,27 +119,14 @@ struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
+ };
+ /* Fwd declaration */
+-struct hv_netvsc_packet;
+ struct ndis_tcp_ip_checksum_info;
+-/* Represent the xfer page packet which contains 1 or more netvsc packet */
+-struct xferpage_packet {
+-      struct list_head list_ent;
+-      u32 status;
+-
+-      /* # of netvsc packets this xfer packet contains */
+-      u32 count;
+-
+-      struct vmbus_channel *channel;
+-};
+-
+ /*
+  * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame
+  * within the RNDIS
+  */
+ struct hv_netvsc_packet {
+       /* Bookkeeping stuff */
+-      struct list_head list_ent;
+       u32 status;
+       struct hv_device *device;
+@@ -149,19 +136,8 @@ struct hv_netvsc_packet {
+       u16 q_idx;
+       struct vmbus_channel *channel;
+-      /*
+-       * Valid only for receives when we break a xfer page packet
+-       * into multiple netvsc packets
+-       */
+-      struct xferpage_packet *xfer_page_pkt;
+-
+       union {
+               struct {
+-                      u64 recv_completion_tid;
+-                      void *recv_completion_ctx;
+-                      void (*recv_completion)(void *context);
+-              } recv;
+-              struct {
+                       u64 send_completion_tid;
+                       void *send_completion_ctx;
+                       void (*send_completion)(void *context);
+@@ -613,9 +589,6 @@ struct nvsp_message {
+ #define NETVSC_RECEIVE_BUFFER_ID              0xcafe
+-/* Preallocated receive packets */
+-#define NETVSC_RECEIVE_PACKETLIST_COUNT               256
+-
+ #define NETVSC_PACKET_SIZE                      2048
+ #define VRSS_SEND_TAB_SIZE 16
+@@ -630,12 +603,6 @@ struct netvsc_device {
+       wait_queue_head_t wait_drain;
+       bool start_remove;
+       bool destroy;
+-      /*
+-       * List of free preallocated hv_netvsc_packet to represent receive
+-       * packet
+-       */
+-      struct list_head recv_pkt_list;
+-      spinlock_t recv_pkt_list_lock;
+       /* Receive buffer allocated by us but manages by NetVSP */
+       void *recv_buf;
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index e7e77f12bc38..b10334773b32 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -387,7 +387,6 @@ static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
+ int netvsc_device_remove(struct hv_device *device)
+ {
+       struct netvsc_device *net_device;
+-      struct hv_netvsc_packet *netvsc_packet, *pos;
+       unsigned long flags;
+       net_device = hv_get_drvdata(device);
+@@ -416,12 +415,6 @@ int netvsc_device_remove(struct hv_device *device)
+       vmbus_close(device->channel);
+       /* Release all resources */
+-      list_for_each_entry_safe(netvsc_packet, pos,
+-                               &net_device->recv_pkt_list, list_ent) {
+-              list_del(&netvsc_packet->list_ent);
+-              kfree(netvsc_packet);
+-      }
+-
+       if (net_device->sub_cb_buf)
+               vfree(net_device->sub_cb_buf);
+@@ -641,62 +634,6 @@ retry_send_cmplt:
+       }
+ }
+-/* Send a receive completion packet to RNDIS device (ie NetVsp) */
+-static void netvsc_receive_completion(void *context)
+-{
+-      struct hv_netvsc_packet *packet = context;
+-      struct hv_device *device = packet->device;
+-      struct vmbus_channel *channel;
+-      struct netvsc_device *net_device;
+-      u64 transaction_id = 0;
+-      bool fsend_receive_comp = false;
+-      unsigned long flags;
+-      struct net_device *ndev;
+-      u32 status = NVSP_STAT_NONE;
+-
+-      /*
+-       * Even though it seems logical to do a GetOutboundNetDevice() here to
+-       * send out receive completion, we are using GetInboundNetDevice()
+-       * since we may have disable outbound traffic already.
+-       */
+-      net_device = get_inbound_net_device(device);
+-      if (!net_device)
+-              return;
+-      ndev = net_device->ndev;
+-
+-      /* Overloading use of the lock. */
+-      spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
+-
+-      if (packet->status != NVSP_STAT_SUCCESS)
+-              packet->xfer_page_pkt->status = NVSP_STAT_FAIL;
+-
+-      packet->xfer_page_pkt->count--;
+-
+-      /*
+-       * Last one in the line that represent 1 xfer page packet.
+-       * Return the xfer page packet itself to the freelist
+-       */
+-      if (packet->xfer_page_pkt->count == 0) {
+-              fsend_receive_comp = true;
+-              channel = packet->xfer_page_pkt->channel;
+-              transaction_id = packet->completion.recv.recv_completion_tid;
+-              status = packet->xfer_page_pkt->status;
+-              list_add_tail(&packet->xfer_page_pkt->list_ent,
+-                            &net_device->recv_pkt_list);
+-
+-      }
+-
+-      /* Put the packet back */
+-      list_add_tail(&packet->list_ent, &net_device->recv_pkt_list);
+-      spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
+-
+-      /* Send a receive completion for the xfer page packet */
+-      if (fsend_receive_comp)
+-              netvsc_send_recv_completion(device, channel, net_device,
+-                                          transaction_id, status);
+-
+-}
+-
+ static void netvsc_receive(struct netvsc_device *net_device,
+                       struct vmbus_channel *channel,
+                       struct hv_device *device,
+@@ -704,16 +641,13 @@ static void netvsc_receive(struct netvsc_device *net_device,
+ {
+       struct vmtransfer_page_packet_header *vmxferpage_packet;
+       struct nvsp_message *nvsp_packet;
+-      struct hv_netvsc_packet *netvsc_packet = NULL;
+-      /* struct netvsc_driver *netvscDriver; */
+-      struct xferpage_packet *xferpage_packet = NULL;
++      struct hv_netvsc_packet nv_pkt;
++      struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
++      u32 status = NVSP_STAT_SUCCESS;
+       int i;
+       int count = 0;
+-      unsigned long flags;
+       struct net_device *ndev;
+-      LIST_HEAD(listHead);
+-
+       ndev = net_device->ndev;
+       /*
+@@ -746,78 +680,14 @@ static void netvsc_receive(struct netvsc_device *net_device,
+               return;
+       }
+-      /*
+-       * Grab free packets (range count + 1) to represent this xfer
+-       * page packet. +1 to represent the xfer page packet itself.
+-       * We grab it here so that we know exactly how many we can
+-       * fulfil
+-       */
+-      spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
+-      while (!list_empty(&net_device->recv_pkt_list)) {
+-              list_move_tail(net_device->recv_pkt_list.next, &listHead);
+-              if (++count == vmxferpage_packet->range_cnt + 1)
+-                      break;
+-      }
+-      spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
+-
+-      /*
+-       * We need at least 2 netvsc pkts (1 to represent the xfer
+-       * page and at least 1 for the range) i.e. we can handled
+-       * some of the xfer page packet ranges...
+-       */
+-      if (count < 2) {
+-              netdev_err(ndev, "Got only %d netvsc pkt...needed "
+-                      "%d pkts. Dropping this xfer page packet completely!\n",
+-                      count, vmxferpage_packet->range_cnt + 1);
+-
+-              /* Return it to the freelist */
+-              spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
+-              for (i = count; i != 0; i--) {
+-                      list_move_tail(listHead.next,
+-                                     &net_device->recv_pkt_list);
+-              }
+-              spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
+-                                     flags);
+-
+-              netvsc_send_recv_completion(device, channel, net_device,
+-                                          vmxferpage_packet->d.trans_id,
+-                                          NVSP_STAT_FAIL);
+-
+-              return;
+-      }
+-
+-      /* Remove the 1st packet to represent the xfer page packet itself */
+-      xferpage_packet = (struct xferpage_packet *)listHead.next;
+-      list_del(&xferpage_packet->list_ent);
+-      xferpage_packet->status = NVSP_STAT_SUCCESS;
+-      xferpage_packet->channel = channel;
+-
+-      /* This is how much we can satisfy */
+-      xferpage_packet->count = count - 1;
+-
+-      if (xferpage_packet->count != vmxferpage_packet->range_cnt) {
+-              netdev_err(ndev, "Needed %d netvsc pkts to satisfy "
+-                      "this xfer page...got %d\n",
+-                      vmxferpage_packet->range_cnt, xferpage_packet->count);
+-      }
++      count = vmxferpage_packet->range_cnt;
++      netvsc_packet->device = device;
++      netvsc_packet->channel = channel;
+       /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
+-      for (i = 0; i < (count - 1); i++) {
+-              netvsc_packet = (struct hv_netvsc_packet *)listHead.next;
+-              list_del(&netvsc_packet->list_ent);
+-
++      for (i = 0; i < count; i++) {
+               /* Initialize the netvsc packet */
+               netvsc_packet->status = NVSP_STAT_SUCCESS;
+-              netvsc_packet->xfer_page_pkt = xferpage_packet;
+-              netvsc_packet->completion.recv.recv_completion =
+-                                      netvsc_receive_completion;
+-              netvsc_packet->completion.recv.recv_completion_ctx =
+-                                      netvsc_packet;
+-              netvsc_packet->device = device;
+-              /* Save this so that we can send it back */
+-              netvsc_packet->completion.recv.recv_completion_tid =
+-                                      vmxferpage_packet->d.trans_id;
+-
+               netvsc_packet->data = (void *)((unsigned long)net_device->
+                       recv_buf + vmxferpage_packet->ranges[i].byte_offset);
+               netvsc_packet->total_data_buflen =
+@@ -826,10 +696,12 @@ static void netvsc_receive(struct netvsc_device *net_device,
+               /* Pass it to the upper layer */
+               rndis_filter_receive(device, netvsc_packet);
+-              netvsc_receive_completion(netvsc_packet->
+-                              completion.recv.recv_completion_ctx);
++              if (netvsc_packet->status != NVSP_STAT_SUCCESS)
++                      status = NVSP_STAT_FAIL;
+       }
++      netvsc_send_recv_completion(device, channel, net_device,
++                                  vmxferpage_packet->d.trans_id, status);
+ }
+@@ -956,11 +828,9 @@ void netvsc_channel_cb(void *context)
+ int netvsc_device_add(struct hv_device *device, void *additional_info)
+ {
+       int ret = 0;
+-      int i;
+       int ring_size =
+       ((struct netvsc_device_info *)additional_info)->ring_size;
+       struct netvsc_device *net_device;
+-      struct hv_netvsc_packet *packet, *pos;
+       struct net_device *ndev;
+       net_device = alloc_net_device(device);
+@@ -981,18 +851,6 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
+       ndev = net_device->ndev;
+       /* Initialize the NetVSC channel extension */
+-      spin_lock_init(&net_device->recv_pkt_list_lock);
+-
+-      INIT_LIST_HEAD(&net_device->recv_pkt_list);
+-
+-      for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
+-              packet = kzalloc(sizeof(struct hv_netvsc_packet), GFP_KERNEL);
+-              if (!packet)
+-                      break;
+-
+-              list_add_tail(&packet->list_ent,
+-                            &net_device->recv_pkt_list);
+-      }
+       init_completion(&net_device->channel_init_wait);
+       set_per_channel_state(device->channel, net_device->cb_buffer);
+@@ -1028,16 +886,8 @@ close:
+ cleanup:
+-      if (net_device) {
+-              list_for_each_entry_safe(packet, pos,
+-                                       &net_device->recv_pkt_list,
+-                                       list_ent) {
+-                      list_del(&packet->list_ent);
+-                      kfree(packet);
+-              }
+-
++      if (net_device)
+               kfree(net_device);
+-      }
+       return ret;
+ }
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 093cf3fc46b8..8f6d53a2ed95 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -638,7 +638,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
+               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+                                      packet->vlan_tci);
+-      skb_record_rx_queue(skb, packet->xfer_page_pkt->channel->
++      skb_record_rx_queue(skb, packet->channel->
+                           offermsg.offer.sub_channel_index %
+                           net->real_num_rx_queues);
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index d92cfbe43410..48f5a0fbd674 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -401,8 +401,6 @@ static void rndis_filter_receive_data(struct rndis_device *dev,
+       pkt->total_data_buflen = rndis_pkt->data_len;
+       pkt->data = (void *)((unsigned long)pkt->data + data_offset);
+-      pkt->is_data_pkt = true;
+-
+       vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO);
+       if (vlan) {
+               pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid |
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0003-Drivers-net-hyperv-Cleanup-the-netvsc-receive-callba.patch b/src/patches/linux/0003-Drivers-net-hyperv-Cleanup-the-netvsc-receive-callba.patch
new file mode 100644 (file)
index 0000000..e0b82ea
--- /dev/null
@@ -0,0 +1,101 @@
+From c9f2db35ac4f789930522d9d36200cb71b442bed Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sun, 16 Feb 2014 16:38:45 -0800
+Subject: [PATCH 03/25] Drivers: net: hyperv: Cleanup the netvsc receive
+ callback functio
+
+Get rid of the buffer allocation in the receive path for normal packets.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h |  2 ++
+ drivers/net/hyperv/netvsc.c     | 33 ++++++++++-----------------------
+ 2 files changed, 12 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 7645ba38bde8..01a16ea77a5a 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -506,6 +506,8 @@ struct netvsc_device {
+       /* Holds rndis device info */
+       void *extension;
++      /* The recive buffer for this device */
++      unsigned char cb_buffer[NETVSC_PACKET_SIZE];
+ };
+ /* NdisInitialize message */
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index 7fa2bbade327..9a0e9c6f1414 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -795,22 +795,16 @@ static void netvsc_channel_cb(void *context)
+       struct netvsc_device *net_device;
+       u32 bytes_recvd;
+       u64 request_id;
+-      unsigned char *packet;
+       struct vmpacket_descriptor *desc;
+       unsigned char *buffer;
+       int bufferlen = NETVSC_PACKET_SIZE;
+       struct net_device *ndev;
+-      packet = kzalloc(NETVSC_PACKET_SIZE * sizeof(unsigned char),
+-                       GFP_ATOMIC);
+-      if (!packet)
+-              return;
+-      buffer = packet;
+-
+       net_device = get_inbound_net_device(device);
+       if (!net_device)
+-              goto out;
++              return;
+       ndev = net_device->ndev;
++      buffer = net_device->cb_buffer;
+       do {
+               ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
+@@ -838,23 +832,16 @@ static void netvsc_channel_cb(void *context)
+                                       break;
+                               }
+-                              /* reset */
+-                              if (bufferlen > NETVSC_PACKET_SIZE) {
+-                                      kfree(buffer);
+-                                      buffer = packet;
+-                                      bufferlen = NETVSC_PACKET_SIZE;
+-                              }
+                       } else {
+-                              /* reset */
+-                              if (bufferlen > NETVSC_PACKET_SIZE) {
+-                                      kfree(buffer);
+-                                      buffer = packet;
+-                                      bufferlen = NETVSC_PACKET_SIZE;
+-                              }
+-
++                              /*
++                               * We are done for this pass.
++                               */
+                               break;
+                       }
++
+               } else if (ret == -ENOBUFS) {
++                      if (bufferlen > NETVSC_PACKET_SIZE)
++                              kfree(buffer);
+                       /* Handle large packet */
+                       buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
+                       if (buffer == NULL) {
+@@ -869,8 +856,8 @@ static void netvsc_channel_cb(void *context)
+               }
+       } while (1);
+-out:
+-      kfree(buffer);
++      if (bufferlen > NETVSC_PACKET_SIZE)
++              kfree(buffer);
+       return;
+ }
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0003-hyperv-Simplify-the-send_completion-variables.patch b/src/patches/linux/0003-hyperv-Simplify-the-send_completion-variables.patch
new file mode 100644 (file)
index 0000000..dcab56f
--- /dev/null
@@ -0,0 +1,105 @@
+From 893f66277799cd46bdf97429cc5d16a815a51273 Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 21 Apr 2014 14:54:44 -0700
+Subject: [PATCH 03/11] hyperv: Simplify the send_completion variables
+
+The union contains only one member now, so we use the variables in it directly.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   | 10 +++-------
+ drivers/net/hyperv/netvsc.c       |  7 +++----
+ drivers/net/hyperv/netvsc_drv.c   |  8 ++++----
+ drivers/net/hyperv/rndis_filter.c |  2 +-
+ 4 files changed, 11 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index a1af0f7711e2..d1f7826aa75f 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -136,13 +136,9 @@ struct hv_netvsc_packet {
+       u16 q_idx;
+       struct vmbus_channel *channel;
+-      union {
+-              struct {
+-                      u64 send_completion_tid;
+-                      void *send_completion_ctx;
+-                      void (*send_completion)(void *context);
+-              } send;
+-      } completion;
++      u64 send_completion_tid;
++      void *send_completion_ctx;
++      void (*send_completion)(void *context);
+       /* This points to the memory after page_buf */
+       struct rndis_message *rndis_msg;
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index b10334773b32..bbee44635035 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -479,9 +479,8 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+               if (nvsc_packet) {
+                       q_idx = nvsc_packet->q_idx;
+                       channel = nvsc_packet->channel;
+-                      nvsc_packet->completion.send.send_completion(
+-                              nvsc_packet->completion.send.
+-                              send_completion_ctx);
++                      nvsc_packet->send_completion(nvsc_packet->
++                                                   send_completion_ctx);
+               }
+               num_outstanding_sends =
+@@ -534,7 +533,7 @@ int netvsc_send(struct hv_device *device,
+               0xFFFFFFFF;
+       sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+-      if (packet->completion.send.send_completion)
++      if (packet->send_completion)
+               req_id = (ulong)packet;
+       else
+               req_id = 0;
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 8f6d53a2ed95..c76b66515e92 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -235,7 +235,7 @@ static void netvsc_xmit_completion(void *context)
+ {
+       struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
+       struct sk_buff *skb = (struct sk_buff *)
+-              (unsigned long)packet->completion.send.send_completion_tid;
++              (unsigned long)packet->send_completion_tid;
+       kfree(packet);
+@@ -425,9 +425,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+                               (num_data_pgs * sizeof(struct hv_page_buffer)));
+       /* Set the completion routine */
+-      packet->completion.send.send_completion = netvsc_xmit_completion;
+-      packet->completion.send.send_completion_ctx = packet;
+-      packet->completion.send.send_completion_tid = (unsigned long)skb;
++      packet->send_completion = netvsc_xmit_completion;
++      packet->send_completion_ctx = packet;
++      packet->send_completion_tid = (unsigned long)skb;
+       isvlan = packet->vlan_tci & VLAN_TAG_PRESENT;
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 48f5a0fbd674..99c527adae5b 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -236,7 +236,7 @@ static int rndis_filter_send_request(struct rndis_device *dev,
+                       packet->page_buf[0].len;
+       }
+-      packet->completion.send.send_completion = NULL;
++      packet->send_completion = NULL;
+       ret = netvsc_send(dev->net_dev->dev, packet);
+       return ret;
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0004-hyperv-Add-latest-NetVSP-versions-to-auto-negotiatio.patch b/src/patches/linux/0004-hyperv-Add-latest-NetVSP-versions-to-auto-negotiatio.patch
new file mode 100644 (file)
index 0000000..f9cd827
--- /dev/null
@@ -0,0 +1,171 @@
+From 3c2a271d9681cc017947c5e027acc64707c30dee Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Wed, 19 Feb 2014 15:49:45 -0800
+Subject: [PATCH 04/25] hyperv: Add latest NetVSP versions to auto negotiation
+
+It auto negotiates the highest NetVSP version supported by both guest and host.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h | 53 +++++++++++++++++++++++++++++++++++++++++
+ drivers/net/hyperv/netvsc.c     | 25 ++++++++++++-------
+ drivers/net/hyperv/netvsc_drv.c |  2 +-
+ 3 files changed, 70 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 01a16ea77a5a..39fc230f5c20 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -139,6 +139,8 @@ int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac);
+ #define NVSP_PROTOCOL_VERSION_1               2
+ #define NVSP_PROTOCOL_VERSION_2               0x30002
++#define NVSP_PROTOCOL_VERSION_4               0x40000
++#define NVSP_PROTOCOL_VERSION_5               0x50000
+ enum {
+       NVSP_MSG_TYPE_NONE = 0,
+@@ -193,6 +195,23 @@ enum {
+       NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE,
+       NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP,
++
++      NVSP_MSG2_MAX = NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP,
++
++      /* Version 4 messages */
++      NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION,
++      NVSP_MSG4_TYPE_SWITCH_DATA_PATH,
++      NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED,
++
++      NVSP_MSG4_MAX = NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED,
++
++      /* Version 5 messages */
++      NVSP_MSG5_TYPE_OID_QUERY_EX,
++      NVSP_MSG5_TYPE_OID_QUERY_EX_COMP,
++      NVSP_MSG5_TYPE_SUBCHANNEL,
++      NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,
++
++      NVSP_MSG5_MAX = NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,
+ };
+ enum {
+@@ -447,10 +466,44 @@ union nvsp_2_message_uber {
+       struct nvsp_2_free_rxbuf free_rxbuf;
+ } __packed;
++enum nvsp_subchannel_operation {
++      NVSP_SUBCHANNEL_NONE = 0,
++      NVSP_SUBCHANNEL_ALLOCATE,
++      NVSP_SUBCHANNEL_MAX
++};
++
++struct nvsp_5_subchannel_request {
++      u32 op;
++      u32 num_subchannels;
++} __packed;
++
++struct nvsp_5_subchannel_complete {
++      u32 status;
++      u32 num_subchannels; /* Actual number of subchannels allocated */
++} __packed;
++
++struct nvsp_5_send_indirect_table {
++      /* The number of entries in the send indirection table */
++      u32 count;
++
++      /* The offset of the send indireciton table from top of this struct.
++       * The send indirection table tells which channel to put the send
++       * traffic on. Each entry is a channel number.
++       */
++      u32 offset;
++} __packed;
++
++union nvsp_5_message_uber {
++      struct nvsp_5_subchannel_request subchn_req;
++      struct nvsp_5_subchannel_complete subchn_comp;
++      struct nvsp_5_send_indirect_table send_table;
++} __packed;
++
+ union nvsp_all_messages {
+       union nvsp_message_init_uber init_msg;
+       union nvsp_1_message_uber v1_msg;
+       union nvsp_2_message_uber v2_msg;
++      union nvsp_5_message_uber v5_msg;
+ } __packed;
+ /* ALL Messages */
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index 9a0e9c6f1414..1a0280dcba7e 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -290,7 +290,7 @@ static int negotiate_nvsp_ver(struct hv_device *device,
+           NVSP_STAT_SUCCESS)
+               return -EINVAL;
+-      if (nvsp_ver != NVSP_PROTOCOL_VERSION_2)
++      if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
+               return 0;
+       /* NVSPv2 only: Send NDIS config */
+@@ -314,6 +314,9 @@ static int netvsc_connect_vsp(struct hv_device *device)
+       struct nvsp_message *init_packet;
+       int ndis_version;
+       struct net_device *ndev;
++      u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
++              NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
++      int i, num_ver = 4; /* number of different NVSP versions */
+       net_device = get_outbound_net_device(device);
+       if (!net_device)
+@@ -323,13 +326,14 @@ static int netvsc_connect_vsp(struct hv_device *device)
+       init_packet = &net_device->channel_init_pkt;
+       /* Negotiate the latest NVSP protocol supported */
+-      if (negotiate_nvsp_ver(device, net_device, init_packet,
+-                             NVSP_PROTOCOL_VERSION_2) == 0) {
+-              net_device->nvsp_version = NVSP_PROTOCOL_VERSION_2;
+-      } else if (negotiate_nvsp_ver(device, net_device, init_packet,
+-                                  NVSP_PROTOCOL_VERSION_1) == 0) {
+-              net_device->nvsp_version = NVSP_PROTOCOL_VERSION_1;
+-      } else {
++      for (i = num_ver - 1; i >= 0; i--)
++              if (negotiate_nvsp_ver(device, net_device, init_packet,
++                                     ver_list[i])  == 0) {
++                      net_device->nvsp_version = ver_list[i];
++                      break;
++              }
++
++      if (i < 0) {
+               ret = -EPROTO;
+               goto cleanup;
+       }
+@@ -339,7 +343,10 @@ static int netvsc_connect_vsp(struct hv_device *device)
+       /* Send the ndis version */
+       memset(init_packet, 0, sizeof(struct nvsp_message));
+-      ndis_version = 0x00050001;
++      if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
++              ndis_version = 0x00050001;
++      else
++              ndis_version = 0x0006001e;
+       init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
+       init_packet->msg.v1_msg.
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 28020f83ba6f..8e3a0b00099b 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -328,7 +328,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
+       if (nvdev == NULL || nvdev->destroy)
+               return -ENODEV;
+-      if (nvdev->nvsp_version == NVSP_PROTOCOL_VERSION_2)
++      if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
+               limit = NETVSC_MTU;
+       if (mtu < 68 || mtu > limit)
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0004-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch b/src/patches/linux/0004-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch
new file mode 100644 (file)
index 0000000..bc09122
--- /dev/null
@@ -0,0 +1,407 @@
+From c25aaf814a63f9d9c4e45416f13d70ef0aa0be2e Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Wed, 30 Apr 2014 10:14:31 -0700
+Subject: [PATCH 04/11] hyperv: Enable sendbuf mechanism on the send path
+
+We send packets using a copy-free mechanism (this is the Guest to Host transport
+via VMBUS). While this is obviously optimal for large packets,
+it may not be optimal for small packets. Hyper-V host supports
+a second mechanism for sending packets that is "copy based". We implement that
+mechanism in this patch.
+
+In this version of the patch I have addressed a comment from David Miller.
+
+With this patch (and all of the other offload and VRSS patches), we are now able
+to almost saturate a 10G interface between Linux VMs on Hyper-V
+on different hosts - close to  9 Gbps as measured via iperf.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h |  14 +++
+ drivers/net/hyperv/netvsc.c     | 226 ++++++++++++++++++++++++++++++++++++++--
+ drivers/net/hyperv/netvsc_drv.c |   3 +-
+ 3 files changed, 234 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index d1f7826aa75f..4b7df5a5c966 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -140,6 +140,8 @@ struct hv_netvsc_packet {
+       void *send_completion_ctx;
+       void (*send_completion)(void *context);
++      u32 send_buf_index;
++
+       /* This points to the memory after page_buf */
+       struct rndis_message *rndis_msg;
+@@ -582,6 +584,9 @@ struct nvsp_message {
+ #define NETVSC_RECEIVE_BUFFER_SIZE            (1024*1024*16)  /* 16MB */
+ #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY     (1024*1024*15)  /* 15MB */
++#define NETVSC_SEND_BUFFER_SIZE                       (1024 * 1024)   /* 1MB */
++#define NETVSC_INVALID_INDEX                  -1
++
+ #define NETVSC_RECEIVE_BUFFER_ID              0xcafe
+@@ -607,6 +612,15 @@ struct netvsc_device {
+       u32 recv_section_cnt;
+       struct nvsp_1_receive_buffer_section *recv_section;
++      /* Send buffer allocated by us */
++      void *send_buf;
++      u32 send_buf_size;
++      u32 send_buf_gpadl_handle;
++      u32 send_section_cnt;
++      u32 send_section_size;
++      unsigned long *send_section_map;
++      int map_words;
++
+       /* Used for NetVSP initialization protocol */
+       struct completion channel_init_wait;
+       struct nvsp_message channel_init_pkt;
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index bbee44635035..c041f63a6d30 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -28,6 +28,7 @@
+ #include <linux/slab.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_ether.h>
++#include <asm/sync_bitops.h>
+ #include "hyperv_net.h"
+@@ -80,7 +81,7 @@ get_in_err:
+ }
+-static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
++static int netvsc_destroy_buf(struct netvsc_device *net_device)
+ {
+       struct nvsp_message *revoke_packet;
+       int ret = 0;
+@@ -146,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
+               net_device->recv_section = NULL;
+       }
++      /* Deal with the send buffer we may have setup.
++       * If we got a  send section size, it means we received a
++       * SendsendBufferComplete msg (ie sent
++       * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
++       * to send a revoke msg here
++       */
++      if (net_device->send_section_size) {
++              /* Send the revoke receive buffer */
++              revoke_packet = &net_device->revoke_packet;
++              memset(revoke_packet, 0, sizeof(struct nvsp_message));
++
++              revoke_packet->hdr.msg_type =
++                      NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
++              revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0;
++
++              ret = vmbus_sendpacket(net_device->dev->channel,
++                                     revoke_packet,
++                                     sizeof(struct nvsp_message),
++                                     (unsigned long)revoke_packet,
++                                     VM_PKT_DATA_INBAND, 0);
++              /* If we failed here, we might as well return and
++               * have a leak rather than continue and a bugchk
++               */
++              if (ret != 0) {
++                      netdev_err(ndev, "unable to send "
++                                 "revoke send buffer to netvsp\n");
++                      return ret;
++              }
++      }
++      /* Teardown the gpadl on the vsp end */
++      if (net_device->send_buf_gpadl_handle) {
++              ret = vmbus_teardown_gpadl(net_device->dev->channel,
++                                         net_device->send_buf_gpadl_handle);
++
++              /* If we failed here, we might as well return and have a leak
++               * rather than continue and a bugchk
++               */
++              if (ret != 0) {
++                      netdev_err(ndev,
++                                 "unable to teardown send buffer's gpadl\n");
++                      return ret;
++              }
++              net_device->recv_buf_gpadl_handle = 0;
++      }
++      if (net_device->send_buf) {
++              /* Free up the receive buffer */
++              free_pages((unsigned long)net_device->send_buf,
++                         get_order(net_device->send_buf_size));
++              net_device->send_buf = NULL;
++      }
++      kfree(net_device->send_section_map);
++
+       return ret;
+ }
+-static int netvsc_init_recv_buf(struct hv_device *device)
++static int netvsc_init_buf(struct hv_device *device)
+ {
+       int ret = 0;
+       int t;
+@@ -248,10 +301,90 @@ static int netvsc_init_recv_buf(struct hv_device *device)
+               goto cleanup;
+       }
++      /* Now setup the send buffer.
++       */
++      net_device->send_buf =
++              (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
++                                       get_order(net_device->send_buf_size));
++      if (!net_device->send_buf) {
++              netdev_err(ndev, "unable to allocate send "
++                         "buffer of size %d\n", net_device->send_buf_size);
++              ret = -ENOMEM;
++              goto cleanup;
++      }
++
++      /* Establish the gpadl handle for this buffer on this
++       * channel.  Note: This call uses the vmbus connection rather
++       * than the channel to establish the gpadl handle.
++       */
++      ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
++                                  net_device->send_buf_size,
++                                  &net_device->send_buf_gpadl_handle);
++      if (ret != 0) {
++              netdev_err(ndev,
++                         "unable to establish send buffer's gpadl\n");
++              goto cleanup;
++      }
++
++      /* Notify the NetVsp of the gpadl handle */
++      init_packet = &net_device->channel_init_pkt;
++      memset(init_packet, 0, sizeof(struct nvsp_message));
++      init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
++      init_packet->msg.v1_msg.send_recv_buf.gpadl_handle =
++              net_device->send_buf_gpadl_handle;
++      init_packet->msg.v1_msg.send_recv_buf.id = 0;
++
++      /* Send the gpadl notification request */
++      ret = vmbus_sendpacket(device->channel, init_packet,
++                             sizeof(struct nvsp_message),
++                             (unsigned long)init_packet,
++                             VM_PKT_DATA_INBAND,
++                             VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
++      if (ret != 0) {
++              netdev_err(ndev,
++                         "unable to send send buffer's gpadl to netvsp\n");
++              goto cleanup;
++      }
++
++      t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
++      BUG_ON(t == 0);
++
++      /* Check the response */
++      if (init_packet->msg.v1_msg.
++          send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
++              netdev_err(ndev, "Unable to complete send buffer "
++                         "initialization with NetVsp - status %d\n",
++                         init_packet->msg.v1_msg.
++                         send_recv_buf_complete.status);
++              ret = -EINVAL;
++              goto cleanup;
++      }
++
++      /* Parse the response */
++      net_device->send_section_size = init_packet->msg.
++                              v1_msg.send_send_buf_complete.section_size;
++
++      /* Section count is simply the size divided by the section size.
++       */
++      net_device->send_section_cnt =
++              net_device->send_buf_size/net_device->send_section_size;
++
++      dev_info(&device->device, "Send section size: %d, Section count:%d\n",
++               net_device->send_section_size, net_device->send_section_cnt);
++
++      /* Setup state for managing the send buffer. */
++      net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
++                                           BITS_PER_LONG);
++
++      net_device->send_section_map =
++              kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
++      if (net_device->send_section_map == NULL)
++              goto cleanup;
++
+       goto exit;
+ cleanup:
+-      netvsc_destroy_recv_buf(net_device);
++      netvsc_destroy_buf(net_device);
+ exit:
+       return ret;
+@@ -369,8 +502,9 @@ static int netvsc_connect_vsp(struct hv_device *device)
+               net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
+       else
+               net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
++      net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
+-      ret = netvsc_init_recv_buf(device);
++      ret = netvsc_init_buf(device);
+ cleanup:
+       return ret;
+@@ -378,7 +512,7 @@ cleanup:
+ static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
+ {
+-      netvsc_destroy_recv_buf(net_device);
++      netvsc_destroy_buf(net_device);
+ }
+ /*
+@@ -440,6 +574,12 @@ static inline u32 hv_ringbuf_avail_percent(
+       return avail_write * 100 / ring_info->ring_datasize;
+ }
++static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
++                                       u32 index)
++{
++      sync_change_bit(index, net_device->send_section_map);
++}
++
+ static void netvsc_send_completion(struct netvsc_device *net_device,
+                                  struct hv_device *device,
+                                  struct vmpacket_descriptor *packet)
+@@ -447,6 +587,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+       struct nvsp_message *nvsp_packet;
+       struct hv_netvsc_packet *nvsc_packet;
+       struct net_device *ndev;
++      u32 send_index;
+       ndev = net_device->ndev;
+@@ -477,6 +618,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+               /* Notify the layer above us */
+               if (nvsc_packet) {
++                      send_index = nvsc_packet->send_buf_index;
++                      if (send_index != NETVSC_INVALID_INDEX)
++                              netvsc_free_send_slot(net_device, send_index);
+                       q_idx = nvsc_packet->q_idx;
+                       channel = nvsc_packet->channel;
+                       nvsc_packet->send_completion(nvsc_packet->
+@@ -504,6 +648,52 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+ }
++static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
++{
++      unsigned long index;
++      u32 max_words = net_device->map_words;
++      unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
++      u32 section_cnt = net_device->send_section_cnt;
++      int ret_val = NETVSC_INVALID_INDEX;
++      int i;
++      int prev_val;
++
++      for (i = 0; i < max_words; i++) {
++              if (!~(map_addr[i]))
++                      continue;
++              index = ffz(map_addr[i]);
++              prev_val = sync_test_and_set_bit(index, &map_addr[i]);
++              if (prev_val)
++                      continue;
++              if ((index + (i * BITS_PER_LONG)) >= section_cnt)
++                      break;
++              ret_val = (index + (i * BITS_PER_LONG));
++              break;
++      }
++      return ret_val;
++}
++
++u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
++                          unsigned int section_index,
++                          struct hv_netvsc_packet *packet)
++{
++      char *start = net_device->send_buf;
++      char *dest = (start + (section_index * net_device->send_section_size));
++      int i;
++      u32 msg_size = 0;
++
++      for (i = 0; i < packet->page_buf_cnt; i++) {
++              char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
++              u32 offset = packet->page_buf[i].offset;
++              u32 len = packet->page_buf[i].len;
++
++              memcpy(dest, (src + offset), len);
++              msg_size += len;
++              dest += len;
++      }
++      return msg_size;
++}
++
+ int netvsc_send(struct hv_device *device,
+                       struct hv_netvsc_packet *packet)
+ {
+@@ -513,6 +703,10 @@ int netvsc_send(struct hv_device *device,
+       struct net_device *ndev;
+       struct vmbus_channel *out_channel = NULL;
+       u64 req_id;
++      unsigned int section_index = NETVSC_INVALID_INDEX;
++      u32 msg_size = 0;
++      struct sk_buff *skb;
++
+       net_device = get_outbound_net_device(device);
+       if (!net_device)
+@@ -528,10 +722,26 @@ int netvsc_send(struct hv_device *device,
+               sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
+       }
+-      /* Not using send buffer section */
++      /* Attempt to send via sendbuf */
++      if (packet->total_data_buflen < net_device->send_section_size) {
++              section_index = netvsc_get_next_send_section(net_device);
++              if (section_index != NETVSC_INVALID_INDEX) {
++                      msg_size = netvsc_copy_to_send_buf(net_device,
++                                                         section_index,
++                                                         packet);
++                      skb = (struct sk_buff *)
++                            (unsigned long)packet->send_completion_tid;
++                      if (skb)
++                              dev_kfree_skb_any(skb);
++                      packet->page_buf_cnt = 0;
++              }
++      }
++      packet->send_buf_index = section_index;
++
++
+       sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
+-              0xFFFFFFFF;
+-      sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
++              section_index;
++      sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
+       if (packet->send_completion)
+               req_id = (ulong)packet;
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index c76b66515e92..939e3af60ec4 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -236,10 +236,11 @@ static void netvsc_xmit_completion(void *context)
+       struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
+       struct sk_buff *skb = (struct sk_buff *)
+               (unsigned long)packet->send_completion_tid;
++      u32 index = packet->send_buf_index;
+       kfree(packet);
+-      if (skb)
++      if (skb && (index == NETVSC_INVALID_INDEX))
+               dev_kfree_skb_any(skb);
+ }
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0005-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch b/src/patches/linux/0005-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch
new file mode 100644 (file)
index 0000000..4d00ad2
--- /dev/null
@@ -0,0 +1,42 @@
+From e565e803d437b36c4fb4ced5e346827981183284 Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Thu, 8 May 2014 15:14:10 -0700
+Subject: [PATCH 05/11] Add support for netvsc build without CONFIG_SYSFS flag
+
+This change ensures the driver can be built successfully without the
+CONFIG_SYSFS flag.
+MS-TFS: 182270
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc_drv.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 939e3af60ec4..083d084396d3 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -640,8 +640,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
+                                      packet->vlan_tci);
+       skb_record_rx_queue(skb, packet->channel->
+-                          offermsg.offer.sub_channel_index %
+-                          net->real_num_rx_queues);
++                          offermsg.offer.sub_channel_index);
+       net->stats.rx_packets++;
+       net->stats.rx_bytes += packet->total_data_buflen;
+@@ -824,8 +823,6 @@ static int netvsc_probe(struct hv_device *dev,
+       nvdev = hv_get_drvdata(dev);
+       netif_set_real_num_tx_queues(net, nvdev->num_chn);
+       netif_set_real_num_rx_queues(net, nvdev->num_chn);
+-      dev_info(&dev->device, "real num tx,rx queues:%u, %u\n",
+-               net->real_num_tx_queues, net->real_num_rx_queues);
+       ret = register_netdev(net);
+       if (ret != 0) {
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0005-Drivers-net-hyperv-Enable-scatter-gather-I-O.patch b/src/patches/linux/0005-Drivers-net-hyperv-Enable-scatter-gather-I-O.patch
new file mode 100644 (file)
index 0000000..27fb2d9
--- /dev/null
@@ -0,0 +1,212 @@
+From 4c06034001e20ff9f6e2a1a3dfa155bf3f31440c Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sat, 8 Mar 2014 19:23:13 -0800
+Subject: [PATCH 05/25] Drivers: net: hyperv: Enable scatter gather I/O
+
+Cleanup the code and enable scatter gather I/O.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc_drv.c | 153 ++++++++++++++++++++++++++++++----------
+ 1 file changed, 114 insertions(+), 39 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 8e3a0b00099b..72961741be54 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -140,22 +140,124 @@ static void netvsc_xmit_completion(void *context)
+               dev_kfree_skb_any(skb);
+ }
++static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
++                      struct hv_page_buffer *pb)
++{
++      int j = 0;
++
++      /* Deal with compund pages by ignoring unused part
++       * of the page.
++       */
++      page += (offset >> PAGE_SHIFT);
++      offset &= ~PAGE_MASK;
++
++      while (len > 0) {
++              unsigned long bytes;
++
++              bytes = PAGE_SIZE - offset;
++              if (bytes > len)
++                      bytes = len;
++              pb[j].pfn = page_to_pfn(page);
++              pb[j].offset = offset;
++              pb[j].len = bytes;
++
++              offset += bytes;
++              len -= bytes;
++
++              if (offset == PAGE_SIZE && len) {
++                      page++;
++                      offset = 0;
++                      j++;
++              }
++      }
++
++      return j + 1;
++}
++
++static void init_page_array(void *hdr, u32 len, struct sk_buff *skb,
++                          struct hv_page_buffer *pb)
++{
++      u32 slots_used = 0;
++      char *data = skb->data;
++      int frags = skb_shinfo(skb)->nr_frags;
++      int i;
++
++      /* The packet is laid out thus:
++       * 1. hdr
++       * 2. skb linear data
++       * 3. skb fragment data
++       */
++      if (hdr != NULL)
++              slots_used += fill_pg_buf(virt_to_page(hdr),
++                                      offset_in_page(hdr),
++                                      len, &pb[slots_used]);
++
++      slots_used += fill_pg_buf(virt_to_page(data),
++                              offset_in_page(data),
++                              skb_headlen(skb), &pb[slots_used]);
++
++      for (i = 0; i < frags; i++) {
++              skb_frag_t *frag = skb_shinfo(skb)->frags + i;
++
++              slots_used += fill_pg_buf(skb_frag_page(frag),
++                                      frag->page_offset,
++                                      skb_frag_size(frag), &pb[slots_used]);
++      }
++}
++
++static int count_skb_frag_slots(struct sk_buff *skb)
++{
++      int i, frags = skb_shinfo(skb)->nr_frags;
++      int pages = 0;
++
++      for (i = 0; i < frags; i++) {
++              skb_frag_t *frag = skb_shinfo(skb)->frags + i;
++              unsigned long size = skb_frag_size(frag);
++              unsigned long offset = frag->page_offset;
++
++              /* Skip unused frames from start of page */
++              offset &= ~PAGE_MASK;
++              pages += PFN_UP(offset + size);
++      }
++      return pages;
++}
++
++static int netvsc_get_slots(struct sk_buff *skb)
++{
++      char *data = skb->data;
++      unsigned int offset = offset_in_page(data);
++      unsigned int len = skb_headlen(skb);
++      int slots;
++      int frag_slots;
++
++      slots = DIV_ROUND_UP(offset + len, PAGE_SIZE);
++      frag_slots = count_skb_frag_slots(skb);
++      return slots + frag_slots;
++}
++
+ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+ {
+       struct net_device_context *net_device_ctx = netdev_priv(net);
+       struct hv_netvsc_packet *packet;
+       int ret;
+-      unsigned int i, num_pages, npg_data;
++      unsigned int num_data_pages;
+       u32 skb_length = skb->len;
+-      /* Add multipages for skb->data and additional 2 for RNDIS */
+-      npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
+-              >> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1;
+-      num_pages = skb_shinfo(skb)->nr_frags + npg_data + 2;
++      /* We will atmost need two pages to describe the rndis
++       * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
++       * of pages in a single packet.
++       */
++      num_data_pages = netvsc_get_slots(skb) + 2;
++      if (num_data_pages > MAX_PAGE_BUFFER_COUNT) {
++              netdev_err(net, "Packet too big: %u\n", skb->len);
++              dev_kfree_skb(skb);
++              net->stats.tx_dropped++;
++              return NETDEV_TX_OK;
++      }
+       /* Allocate a netvsc packet based on # of frags. */
+       packet = kzalloc(sizeof(struct hv_netvsc_packet) +
+-                       (num_pages * sizeof(struct hv_page_buffer)) +
++                       (num_data_pages * sizeof(struct hv_page_buffer)) +
+                        sizeof(struct rndis_message) +
+                        NDIS_VLAN_PPI_SIZE, GFP_ATOMIC);
+       if (!packet) {
+@@ -170,44 +272,17 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       packet->vlan_tci = skb->vlan_tci;
+       packet->extension = (void *)(unsigned long)packet +
+-                              sizeof(struct hv_netvsc_packet) +
+-                                  (num_pages * sizeof(struct hv_page_buffer));
++                      sizeof(struct hv_netvsc_packet) +
++                      (num_data_pages * sizeof(struct hv_page_buffer));
+       /* If the rndis msg goes beyond 1 page, we will add 1 later */
+-      packet->page_buf_cnt = num_pages - 1;
++      packet->page_buf_cnt = num_data_pages - 1;
+       /* Initialize it from the skb */
+       packet->total_data_buflen = skb->len;
+       /* Start filling in the page buffers starting after RNDIS buffer. */
+-      packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT;
+-      packet->page_buf[1].offset
+-              = (unsigned long)skb->data & (PAGE_SIZE - 1);
+-      if (npg_data == 1)
+-              packet->page_buf[1].len = skb_headlen(skb);
+-      else
+-              packet->page_buf[1].len = PAGE_SIZE
+-                      - packet->page_buf[1].offset;
+-
+-      for (i = 2; i <= npg_data; i++) {
+-              packet->page_buf[i].pfn = virt_to_phys(skb->data
+-                      + PAGE_SIZE * (i-1)) >> PAGE_SHIFT;
+-              packet->page_buf[i].offset = 0;
+-              packet->page_buf[i].len = PAGE_SIZE;
+-      }
+-      if (npg_data > 1)
+-              packet->page_buf[npg_data].len = (((unsigned long)skb->data
+-                      + skb_headlen(skb) - 1) & (PAGE_SIZE - 1)) + 1;
+-
+-      /* Additional fragments are after SKB data */
+-      for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+-              const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+-
+-              packet->page_buf[i+npg_data+1].pfn =
+-                      page_to_pfn(skb_frag_page(f));
+-              packet->page_buf[i+npg_data+1].offset = f->page_offset;
+-              packet->page_buf[i+npg_data+1].len = skb_frag_size(f);
+-      }
++      init_page_array(NULL, 0, skb, &packet->page_buf[1]);
+       /* Set the completion routine */
+       packet->completion.send.send_completion = netvsc_xmit_completion;
+@@ -454,8 +529,8 @@ static int netvsc_probe(struct hv_device *dev,
+       net->netdev_ops = &device_ops;
+       /* TODO: Add GSO and Checksum offload */
+-      net->hw_features = 0;
+-      net->features = NETIF_F_HW_VLAN_CTAG_TX;
++      net->hw_features = NETIF_F_SG;
++      net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG;
+       SET_ETHTOOL_OPS(net, &ethtool_ops);
+       SET_NETDEV_DEV(net, &dev->device);
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0006-Drivers-net-hyperv-Cleanup-the-send-path.patch b/src/patches/linux/0006-Drivers-net-hyperv-Cleanup-the-send-path.patch
new file mode 100644 (file)
index 0000000..20fcb3f
--- /dev/null
@@ -0,0 +1,266 @@
+From d972eb71fb95660fe74616901b55b0d7a336daed Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sat, 8 Mar 2014 19:23:14 -0800
+Subject: [PATCH 06/25] Drivers: net: hyperv: Cleanup the send path
+
+In preparation for enabling offloads, cleanup the send path.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   |  7 +---
+ drivers/net/hyperv/netvsc_drv.c   | 88 +++++++++++++++++++++++++++++++--------
+ drivers/net/hyperv/rndis_filter.c | 66 -----------------------------
+ 3 files changed, 71 insertions(+), 90 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 39fc230f5c20..694bf7cada90 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -73,7 +73,7 @@ struct hv_netvsc_packet {
+       } completion;
+       /* This points to the memory after page_buf */
+-      void *extension;
++      struct rndis_message *rndis_msg;
+       u32 total_data_buflen;
+       /* Points to the send/receive buffer where the ethernet frame is */
+@@ -126,11 +126,6 @@ void rndis_filter_device_remove(struct hv_device *dev);
+ int rndis_filter_receive(struct hv_device *dev,
+                       struct hv_netvsc_packet *pkt);
+-
+-
+-int rndis_filter_send(struct hv_device *dev,
+-                      struct hv_netvsc_packet *pkt);
+-
+ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter);
+ int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac);
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 72961741be54..87293a15e470 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -128,6 +128,27 @@ static int netvsc_close(struct net_device *net)
+       return ret;
+ }
++static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
++                              int pkt_type)
++{
++      struct rndis_packet *rndis_pkt;
++      struct rndis_per_packet_info *ppi;
++
++      rndis_pkt = &msg->msg.pkt;
++      rndis_pkt->data_offset += ppi_size;
++
++      ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt +
++              rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len);
++
++      ppi->size = ppi_size;
++      ppi->type = pkt_type;
++      ppi->ppi_offset = sizeof(struct rndis_per_packet_info);
++
++      rndis_pkt->per_pkt_info_len += ppi_size;
++
++      return ppi;
++}
++
+ static void netvsc_xmit_completion(void *context)
+ {
+       struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
+@@ -174,8 +195,8 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
+       return j + 1;
+ }
+-static void init_page_array(void *hdr, u32 len, struct sk_buff *skb,
+-                          struct hv_page_buffer *pb)
++static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
++                         struct hv_page_buffer *pb)
+ {
+       u32 slots_used = 0;
+       char *data = skb->data;
+@@ -203,6 +224,7 @@ static void init_page_array(void *hdr, u32 len, struct sk_buff *skb,
+                                       frag->page_offset,
+                                       skb_frag_size(frag), &pb[slots_used]);
+       }
++      return slots_used;
+ }
+ static int count_skb_frag_slots(struct sk_buff *skb)
+@@ -240,15 +262,20 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       struct net_device_context *net_device_ctx = netdev_priv(net);
+       struct hv_netvsc_packet *packet;
+       int ret;
+-      unsigned int num_data_pages;
+       u32 skb_length = skb->len;
++      unsigned int num_data_pgs;
++      struct rndis_message *rndis_msg;
++      struct rndis_packet *rndis_pkt;
++      u32 rndis_msg_size;
++      bool isvlan;
++      struct rndis_per_packet_info *ppi;
+       /* We will atmost need two pages to describe the rndis
+        * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
+        * of pages in a single packet.
+        */
+-      num_data_pages = netvsc_get_slots(skb) + 2;
+-      if (num_data_pages > MAX_PAGE_BUFFER_COUNT) {
++      num_data_pgs = netvsc_get_slots(skb) + 2;
++      if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
+               netdev_err(net, "Packet too big: %u\n", skb->len);
+               dev_kfree_skb(skb);
+               net->stats.tx_dropped++;
+@@ -257,7 +284,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       /* Allocate a netvsc packet based on # of frags. */
+       packet = kzalloc(sizeof(struct hv_netvsc_packet) +
+-                       (num_data_pages * sizeof(struct hv_page_buffer)) +
++                       (num_data_pgs * sizeof(struct hv_page_buffer)) +
+                        sizeof(struct rndis_message) +
+                        NDIS_VLAN_PPI_SIZE, GFP_ATOMIC);
+       if (!packet) {
+@@ -271,26 +298,51 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       packet->vlan_tci = skb->vlan_tci;
+-      packet->extension = (void *)(unsigned long)packet +
+-                      sizeof(struct hv_netvsc_packet) +
+-                      (num_data_pages * sizeof(struct hv_page_buffer));
+-
+-      /* If the rndis msg goes beyond 1 page, we will add 1 later */
+-      packet->page_buf_cnt = num_data_pages - 1;
+-
+-      /* Initialize it from the skb */
++      packet->is_data_pkt = true;
+       packet->total_data_buflen = skb->len;
+-      /* Start filling in the page buffers starting after RNDIS buffer. */
+-      init_page_array(NULL, 0, skb, &packet->page_buf[1]);
++      packet->rndis_msg = (struct rndis_message *)((unsigned long)packet +
++                              sizeof(struct hv_netvsc_packet) +
++                              (num_data_pgs * sizeof(struct hv_page_buffer)));
+       /* Set the completion routine */
+       packet->completion.send.send_completion = netvsc_xmit_completion;
+       packet->completion.send.send_completion_ctx = packet;
+       packet->completion.send.send_completion_tid = (unsigned long)skb;
+-      ret = rndis_filter_send(net_device_ctx->device_ctx,
+-                                packet);
++      isvlan = packet->vlan_tci & VLAN_TAG_PRESENT;
++
++      /* Add the rndis header */
++      rndis_msg = packet->rndis_msg;
++      rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
++      rndis_msg->msg_len = packet->total_data_buflen;
++      rndis_pkt = &rndis_msg->msg.pkt;
++      rndis_pkt->data_offset = sizeof(struct rndis_packet);
++      rndis_pkt->data_len = packet->total_data_buflen;
++      rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);
++
++      rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
++
++      if (isvlan) {
++              struct ndis_pkt_8021q_info *vlan;
++
++              rndis_msg_size += NDIS_VLAN_PPI_SIZE;
++              ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
++                                      IEEE_8021Q_INFO);
++              vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
++                                              ppi->ppi_offset);
++              vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK;
++              vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >>
++                              VLAN_PRIO_SHIFT;
++      }
++
++      /* Start filling in the page buffers with the rndis hdr */
++      rndis_msg->msg_len += rndis_msg_size;
++      packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
++                                      skb, &packet->page_buf[0]);
++
++      ret = netvsc_send(net_device_ctx->device_ctx, packet);
++
+       if (ret == 0) {
+               net->stats.tx_bytes += skb_length;
+               net->stats.tx_packets++;
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 6a9f6021f09c..dcbf144ea7da 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -910,69 +910,3 @@ int rndis_filter_close(struct hv_device *dev)
+       return rndis_filter_close_device(nvdev->extension);
+ }
+-
+-int rndis_filter_send(struct hv_device *dev,
+-                           struct hv_netvsc_packet *pkt)
+-{
+-      struct rndis_message *rndis_msg;
+-      struct rndis_packet *rndis_pkt;
+-      u32 rndis_msg_size;
+-      bool isvlan = pkt->vlan_tci & VLAN_TAG_PRESENT;
+-
+-      /* Add the rndis header */
+-      rndis_msg = (struct rndis_message *)pkt->extension;
+-
+-      rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
+-      if (isvlan)
+-              rndis_msg_size += NDIS_VLAN_PPI_SIZE;
+-
+-      rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
+-      rndis_msg->msg_len = pkt->total_data_buflen +
+-                                    rndis_msg_size;
+-
+-      rndis_pkt = &rndis_msg->msg.pkt;
+-      rndis_pkt->data_offset = sizeof(struct rndis_packet);
+-      if (isvlan)
+-              rndis_pkt->data_offset += NDIS_VLAN_PPI_SIZE;
+-      rndis_pkt->data_len = pkt->total_data_buflen;
+-
+-      if (isvlan) {
+-              struct rndis_per_packet_info *ppi;
+-              struct ndis_pkt_8021q_info *vlan;
+-
+-              rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);
+-              rndis_pkt->per_pkt_info_len = NDIS_VLAN_PPI_SIZE;
+-
+-              ppi = (struct rndis_per_packet_info *)((ulong)rndis_pkt +
+-                      rndis_pkt->per_pkt_info_offset);
+-              ppi->size = NDIS_VLAN_PPI_SIZE;
+-              ppi->type = IEEE_8021Q_INFO;
+-              ppi->ppi_offset = sizeof(struct rndis_per_packet_info);
+-
+-              vlan = (struct ndis_pkt_8021q_info *)((ulong)ppi +
+-                      ppi->ppi_offset);
+-              vlan->vlanid = pkt->vlan_tci & VLAN_VID_MASK;
+-              vlan->pri = (pkt->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+-      }
+-
+-      pkt->is_data_pkt = true;
+-      pkt->page_buf[0].pfn = virt_to_phys(rndis_msg) >> PAGE_SHIFT;
+-      pkt->page_buf[0].offset =
+-                      (unsigned long)rndis_msg & (PAGE_SIZE-1);
+-      pkt->page_buf[0].len = rndis_msg_size;
+-
+-      /* Add one page_buf if the rndis msg goes beyond page boundary */
+-      if (pkt->page_buf[0].offset + rndis_msg_size > PAGE_SIZE) {
+-              int i;
+-              for (i = pkt->page_buf_cnt; i > 1; i--)
+-                      pkt->page_buf[i] = pkt->page_buf[i-1];
+-              pkt->page_buf_cnt++;
+-              pkt->page_buf[0].len = PAGE_SIZE - pkt->page_buf[0].offset;
+-              pkt->page_buf[1].pfn = virt_to_phys((void *)((ulong)
+-                      rndis_msg + pkt->page_buf[0].len)) >> PAGE_SHIFT;
+-              pkt->page_buf[1].offset = 0;
+-              pkt->page_buf[1].len = rndis_msg_size - pkt->page_buf[0].len;
+-      }
+-
+-      return netvsc_send(dev, pkt);
+-}
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0006-net-get-rid-of-SET_ETHTOOL_OPS.patch b/src/patches/linux/0006-net-get-rid-of-SET_ETHTOOL_OPS.patch
new file mode 100644 (file)
index 0000000..e4a7a46
--- /dev/null
@@ -0,0 +1,44 @@
+From 7ad24ea4bf620a32631d7b3069c3e30c078b0c3e Mon Sep 17 00:00:00 2001
+From: Wilfried Klaebe <w-lkml@lebenslange-mailadresse.de>
+Date: Sun, 11 May 2014 00:12:32 +0000
+Subject: [PATCH 06/11] net: get rid of SET_ETHTOOL_OPS
+
+net: get rid of SET_ETHTOOL_OPS
+
+Dave Miller mentioned he'd like to see SET_ETHTOOL_OPS gone.
+This does that.
+
+Mostly done via coccinelle script:
+@@
+struct ethtool_ops *ops;
+struct net_device *dev;
+@@
+-       SET_ETHTOOL_OPS(dev, ops);
++       dev->ethtool_ops = ops;
+
+Compile tested only, but I'd seriously wonder if this broke anything.
+
+Suggested-by: Dave Miller <davem@davemloft.net>
+Signed-off-by: Wilfried Klaebe <w-lkml@lebenslange-mailadresse.de>
+Acked-by: Felipe Balbi <balbi@ti.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc_drv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 1de3ef5dd5d2..2e967a7bdb33 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -810,7 +810,7 @@ static int netvsc_probe(struct hv_device *dev,
+       net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
+                       NETIF_F_IP_CSUM | NETIF_F_TSO;
+-      SET_ETHTOOL_OPS(net, &ethtool_ops);
++      net->ethtool_ops = &ethtool_ops;
+       SET_NETDEV_DEV(net, &dev->device);
+       /* Notify the netvsc driver of the new device */
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0007-Drivers-net-hyperv-Enable-offloads-on-the-host.patch b/src/patches/linux/0007-Drivers-net-hyperv-Enable-offloads-on-the-host.patch
new file mode 100644 (file)
index 0000000..a109f04
--- /dev/null
@@ -0,0 +1,196 @@
+From 6b15b5f37e976a5f3840c7ea59560e10c6251250 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sat, 8 Mar 2014 19:23:15 -0800
+Subject: [PATCH 07/25] Drivers: net: hyperv: Enable offloads on the host
+
+Prior to enabling guest side offloads, enable the offloads on the host.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   | 55 +++++++++++++++++++++++++++
+ drivers/net/hyperv/rndis_filter.c | 80 +++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 135 insertions(+)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 694bf7cada90..8bc4e766589b 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -721,6 +721,61 @@ struct ndis_pkt_8021q_info {
+       };
+ };
++struct ndis_oject_header {
++      u8 type;
++      u8 revision;
++      u16 size;
++};
++
++#define NDIS_OBJECT_TYPE_DEFAULT      0x80
++#define NDIS_OFFLOAD_PARAMETERS_REVISION_3 3
++#define NDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0
++#define NDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1
++#define NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED  2
++#define NDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED  2
++#define NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1
++#define NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2
++#define NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1
++#define NDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2
++#define NDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3
++#define NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4
++
++/*
++ * New offload OIDs for NDIS 6
++ */
++#define OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */
++#define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C         /* set only */
++#define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */
++#define OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */
++#define OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */
++#define OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */
++
++struct ndis_offload_params {
++      struct ndis_oject_header header;
++      u8 ip_v4_csum;
++      u8 tcp_ip_v4_csum;
++      u8 udp_ip_v4_csum;
++      u8 tcp_ip_v6_csum;
++      u8 udp_ip_v6_csum;
++      u8 lso_v1;
++      u8 ip_sec_v1;
++      u8 lso_v2_ipv4;
++      u8 lso_v2_ipv6;
++      u8 tcp_connection_ip_v4;
++      u8 tcp_connection_ip_v6;
++      u32 flags;
++      u8 ip_sec_v2;
++      u8 ip_sec_v2_ip_v4;
++      struct {
++              u8 rsc_ip_v4;
++              u8 rsc_ip_v6;
++      };
++      struct {
++              u8 encapsulated_packet_task_offload;
++              u8 encapsulation_types;
++      };
++};
++
+ #define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+               sizeof(struct ndis_pkt_8021q_info))
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index dcbf144ea7da..9b02f21097a7 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -627,6 +627,61 @@ cleanup:
+       return ret;
+ }
++int rndis_filter_set_offload_params(struct hv_device *hdev,
++                              struct ndis_offload_params *req_offloads)
++{
++      struct netvsc_device *nvdev = hv_get_drvdata(hdev);
++      struct rndis_device *rdev = nvdev->extension;
++      struct net_device *ndev = nvdev->ndev;
++      struct rndis_request *request;
++      struct rndis_set_request *set;
++      struct ndis_offload_params *offload_params;
++      struct rndis_set_complete *set_complete;
++      u32 extlen = sizeof(struct ndis_offload_params);
++      int ret, t;
++
++      request = get_rndis_request(rdev, RNDIS_MSG_SET,
++              RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
++      if (!request)
++              return -ENOMEM;
++
++      set = &request->request_msg.msg.set_req;
++      set->oid = OID_TCP_OFFLOAD_PARAMETERS;
++      set->info_buflen = extlen;
++      set->info_buf_offset = sizeof(struct rndis_set_request);
++      set->dev_vc_handle = 0;
++
++      offload_params = (struct ndis_offload_params *)((ulong)set +
++                              set->info_buf_offset);
++      *offload_params = *req_offloads;
++      offload_params->header.type = NDIS_OBJECT_TYPE_DEFAULT;
++      offload_params->header.revision = NDIS_OFFLOAD_PARAMETERS_REVISION_3;
++      offload_params->header.size = extlen;
++
++      ret = rndis_filter_send_request(rdev, request);
++      if (ret != 0)
++              goto cleanup;
++
++      t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
++      if (t == 0) {
++              netdev_err(ndev, "timeout before we got aOFFLOAD set response...\n");
++              /* can't put_rndis_request, since we may still receive a
++               * send-completion.
++               */
++              return -EBUSY;
++      } else {
++              set_complete = &request->response_msg.msg.set_complete;
++              if (set_complete->status != RNDIS_STATUS_SUCCESS) {
++                      netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
++                                 set_complete->status);
++                      ret = -EINVAL;
++              }
++      }
++
++cleanup:
++      put_rndis_request(rdev, request);
++      return ret;
++}
+ static int rndis_filter_query_device_link_status(struct rndis_device *dev)
+ {
+@@ -826,6 +881,7 @@ int rndis_filter_device_add(struct hv_device *dev,
+       struct netvsc_device *net_device;
+       struct rndis_device *rndis_device;
+       struct netvsc_device_info *device_info = additional_info;
++      struct ndis_offload_params offloads;
+       rndis_device = get_rndis_device();
+       if (!rndis_device)
+@@ -865,6 +921,26 @@ int rndis_filter_device_add(struct hv_device *dev,
+       memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
++      /* Turn on the offloads; the host supports all of the relevant
++       * offloads.
++       */
++      memset(&offloads, 0, sizeof(struct ndis_offload_params));
++      /* A value of zero means "no change"; now turn on what we
++       * want.
++       */
++      offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
++      offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
++      offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
++      offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
++      offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
++      offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
++
++
++      ret = rndis_filter_set_offload_params(dev, &offloads);
++      if (ret)
++              goto err_dev_remv;
++
++
+       rndis_filter_query_device_link_status(rndis_device);
+       device_info->link_state = rndis_device->link_state;
+@@ -874,6 +950,10 @@ int rndis_filter_device_add(struct hv_device *dev,
+                device_info->link_state ? "down" : "up");
+       return ret;
++
++err_dev_remv:
++      rndis_filter_device_remove(dev);
++      return ret;
+ }
+ void rndis_filter_device_remove(struct hv_device *dev)
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0007-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch b/src/patches/linux/0007-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch
new file mode 100644 (file)
index 0000000..0db5f72
--- /dev/null
@@ -0,0 +1,93 @@
+From 307f099520b66504cf6c5638f3f404c48b9fb45b Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Wed, 21 May 2014 12:55:39 -0700
+Subject: [PATCH 07/11] hyperv: Add hash value into RNDIS Per-packet info
+
+It passes the hash value as the RNDIS Per-packet info to the Hyper-V host,
+so that the send completion notices can be spread across multiple channels.
+MS-TFS: 140273
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h |  4 ++++
+ drivers/net/hyperv/netvsc_drv.c | 18 ++++++++++++++----
+ 2 files changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 4b7df5a5c966..6cc37c15e0bf 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -791,6 +791,7 @@ enum ndis_per_pkt_info_type {
+       IEEE_8021Q_INFO,
+       ORIGINAL_PKTINFO,
+       PACKET_CANCEL_ID,
++      NBL_HASH_VALUE = PACKET_CANCEL_ID,
+       ORIGINAL_NET_BUFLIST,
+       CACHED_NET_BUFLIST,
+       SHORT_PKT_PADINFO,
+@@ -937,6 +938,9 @@ struct ndis_tcp_lso_info {
+ #define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+               sizeof(struct ndis_tcp_lso_info))
++#define NDIS_HASH_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
++              sizeof(u32))
++
+ /* Format of Information buffer passed in a SetRequest for the OID */
+ /* OID_GEN_RNDIS_CONFIG_PARAMETER. */
+ struct rndis_config_parameter_info {
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 2e967a7bdb33..4fd71b75e666 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -224,9 +224,11 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+       if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
+               return 0;
+-      if (netvsc_set_hash(&hash, skb))
++      if (netvsc_set_hash(&hash, skb)) {
+               q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
+                       ndev->real_num_tx_queues;
++              skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
++      }
+       return q_idx;
+ }
+@@ -384,6 +386,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       struct ndis_tcp_lso_info *lso_info;
+       int  hdr_offset;
+       u32 net_trans_info;
++      u32 hash;
+       /* We will atmost need two pages to describe the rndis
+@@ -402,9 +405,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       packet = kzalloc(sizeof(struct hv_netvsc_packet) +
+                        (num_data_pgs * sizeof(struct hv_page_buffer)) +
+                        sizeof(struct rndis_message) +
+-                       NDIS_VLAN_PPI_SIZE +
+-                       NDIS_CSUM_PPI_SIZE +
+-                       NDIS_LSO_PPI_SIZE, GFP_ATOMIC);
++                       NDIS_VLAN_PPI_SIZE + NDIS_CSUM_PPI_SIZE +
++                       NDIS_LSO_PPI_SIZE + NDIS_HASH_PPI_SIZE, GFP_ATOMIC);
+       if (!packet) {
+               /* out of memory, drop packet */
+               netdev_err(net, "unable to allocate hv_netvsc_packet\n");
+@@ -443,6 +445,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
++      hash = skb_get_hash_raw(skb);
++      if (hash != 0 && net->real_num_tx_queues > 1) {
++              rndis_msg_size += NDIS_HASH_PPI_SIZE;
++              ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
++                                  NBL_HASH_VALUE);
++              *(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
++      }
++
+       if (isvlan) {
+               struct ndis_pkt_8021q_info *vlan;
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0008-Drivers-net-hyperv-Enable-receive-side-IP-checksum-o.patch b/src/patches/linux/0008-Drivers-net-hyperv-Enable-receive-side-IP-checksum-o.patch
new file mode 100644 (file)
index 0000000..e77e9f2
--- /dev/null
@@ -0,0 +1,147 @@
+From 59e0c70c618668522a9431686f7e3a69ef396dff Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sat, 8 Mar 2014 19:23:16 -0800
+Subject: [PATCH 08/25] Drivers: net: hyperv: Enable receive side IP checksum
+ offload
+
+Enable receive side checksum offload.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   | 33 ++++++++++++++++++++++++++++++++-
+ drivers/net/hyperv/netvsc_drv.c   | 19 +++++++++++++++----
+ drivers/net/hyperv/rndis_filter.c |  4 +++-
+ 3 files changed, 50 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 8bc4e766589b..faeb74623fbd 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -30,6 +30,7 @@
+ /* Fwd declaration */
+ struct hv_netvsc_packet;
++struct ndis_tcp_ip_checksum_info;
+ /* Represent the xfer page packet which contains 1 or more netvsc packet */
+ struct xferpage_packet {
+@@ -117,7 +118,8 @@ int netvsc_send(struct hv_device *device,
+ void netvsc_linkstatus_callback(struct hv_device *device_obj,
+                               unsigned int status);
+ int netvsc_recv_callback(struct hv_device *device_obj,
+-                      struct hv_netvsc_packet *packet);
++                      struct hv_netvsc_packet *packet,
++                      struct ndis_tcp_ip_checksum_info *csum_info);
+ int rndis_filter_open(struct hv_device *dev);
+ int rndis_filter_close(struct hv_device *dev);
+ int rndis_filter_device_add(struct hv_device *dev,
+@@ -776,9 +778,38 @@ struct ndis_offload_params {
+       };
+ };
++struct ndis_tcp_ip_checksum_info {
++      union {
++              struct {
++                      u32 is_ipv4:1;
++                      u32 is_ipv6:1;
++                      u32 tcp_checksum:1;
++                      u32 udp_checksum:1;
++                      u32 ip_header_checksum:1;
++                      u32 reserved:11;
++                      u32 tcp_header_offset:10;
++              } transmit;
++              struct {
++                      u32 tcp_checksum_failed:1;
++                      u32 udp_checksum_failed:1;
++                      u32 ip_checksum_failed:1;
++                      u32 tcp_checksum_succeeded:1;
++                      u32 udp_checksum_succeeded:1;
++                      u32 ip_checksum_succeeded:1;
++                      u32 loopback:1;
++                      u32 tcp_checksum_value_invalid:1;
++                      u32 ip_checksum_value_invalid:1;
++              } receive;
++              u32  value;
++      };
++};
++
+ #define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+               sizeof(struct ndis_pkt_8021q_info))
++#define NDIS_CSUM_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
++              sizeof(struct ndis_tcp_ip_checksum_info))
++
+ /* Format of Information buffer passed in a SetRequest for the OID */
+ /* OID_GEN_RNDIS_CONFIG_PARAMETER. */
+ struct rndis_config_parameter_info {
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 87293a15e470..7438360b7a02 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -392,7 +392,8 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
+  * "wire" on the specified device.
+  */
+ int netvsc_recv_callback(struct hv_device *device_obj,
+-                              struct hv_netvsc_packet *packet)
++                              struct hv_netvsc_packet *packet,
++                              struct ndis_tcp_ip_checksum_info *csum_info)
+ {
+       struct net_device *net;
+       struct sk_buff *skb;
+@@ -419,7 +420,17 @@ int netvsc_recv_callback(struct hv_device *device_obj,
+               packet->total_data_buflen);
+       skb->protocol = eth_type_trans(skb, net);
+-      skb->ip_summed = CHECKSUM_NONE;
++      if (csum_info) {
++              /* We only look at the IP checksum here.
++               * Should we be dropping the packet if checksum
++               * failed? How do we deal with other checksums - TCP/UDP?
++               */
++              if (csum_info->receive.ip_checksum_succeeded)
++                      skb->ip_summed = CHECKSUM_UNNECESSARY;
++              else
++                      skb->ip_summed = CHECKSUM_NONE;
++      }
++
+       if (packet->vlan_tci & VLAN_TAG_PRESENT)
+               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+                                      packet->vlan_tci);
+@@ -581,8 +592,8 @@ static int netvsc_probe(struct hv_device *dev,
+       net->netdev_ops = &device_ops;
+       /* TODO: Add GSO and Checksum offload */
+-      net->hw_features = NETIF_F_SG;
+-      net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG;
++      net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG;
++      net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM;
+       SET_ETHTOOL_OPS(net, &ethtool_ops);
+       SET_NETDEV_DEV(net, &dev->device);
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 9b02f21097a7..4a37e3db9e32 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -370,6 +370,7 @@ static void rndis_filter_receive_data(struct rndis_device *dev,
+       struct rndis_packet *rndis_pkt;
+       u32 data_offset;
+       struct ndis_pkt_8021q_info *vlan;
++      struct ndis_tcp_ip_checksum_info *csum_info;
+       rndis_pkt = &msg->msg.pkt;
+@@ -408,7 +409,8 @@ static void rndis_filter_receive_data(struct rndis_device *dev,
+               pkt->vlan_tci = 0;
+       }
+-      netvsc_recv_callback(dev->net_dev->dev, pkt);
++      csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO);
++      netvsc_recv_callback(dev->net_dev->dev, pkt, csum_info);
+ }
+ int rndis_filter_receive(struct hv_device *dev,
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0008-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch b/src/patches/linux/0008-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch
new file mode 100644 (file)
index 0000000..b774f81
--- /dev/null
@@ -0,0 +1,32 @@
+From 2f18423d7ebf8044a9edaca840be5ae199fb32f6 Mon Sep 17 00:00:00 2001
+From: Dave Jones <davej@redhat.com>
+Date: Mon, 16 Jun 2014 16:59:02 -0400
+Subject: [PATCH 08/11] hyperv: fix apparent cut-n-paste error in send path
+ teardown
+
+c25aaf814a63: "hyperv: Enable sendbuf mechanism on the send path" added
+some teardown code that looks like it was copied from the recieve path
+above, but missed a variable name replacement.
+
+Signed-off-by: Dave Jones <davej@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index c041f63a6d30..4ed38eaecea8 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -189,7 +189,7 @@ static int netvsc_destroy_buf(struct netvsc_device *net_device)
+                                  "unable to teardown send buffer's gpadl\n");
+                       return ret;
+               }
+-              net_device->recv_buf_gpadl_handle = 0;
++              net_device->send_buf_gpadl_handle = 0;
+       }
+       if (net_device->send_buf) {
+               /* Free up the receive buffer */
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0009-Drivers-net-hyperv-Enable-send-side-checksum-offload.patch b/src/patches/linux/0009-Drivers-net-hyperv-Enable-send-side-checksum-offload.patch
new file mode 100644 (file)
index 0000000..db42108
--- /dev/null
@@ -0,0 +1,140 @@
+From d2a0be7f4263eb669af84240c5424a72cce4cdb4 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sat, 8 Mar 2014 19:23:17 -0800
+Subject: [PATCH 09/25] Drivers: net: hyperv: Enable send side checksum offload
+
+Enable send side checksum offload.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h | 10 ++++++
+ drivers/net/hyperv/netvsc_drv.c | 69 +++++++++++++++++++++++++++++++++++++++--
+ 2 files changed, 77 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index faeb74623fbd..4cf238234321 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -1035,6 +1035,16 @@ struct rndis_message {
+ #define NDIS_PACKET_TYPE_FUNCTIONAL   0x00000400
+ #define NDIS_PACKET_TYPE_MAC_FRAME    0x00000800
++#define INFO_IPV4       2
++#define INFO_IPV6       4
++#define INFO_TCP        2
++#define INFO_UDP        4
++
++#define TRANSPORT_INFO_NOT_IP   0
++#define TRANSPORT_INFO_IPV4_TCP ((INFO_IPV4 << 16) | INFO_TCP)
++#define TRANSPORT_INFO_IPV4_UDP ((INFO_IPV4 << 16) | INFO_UDP)
++#define TRANSPORT_INFO_IPV6_TCP ((INFO_IPV6 << 16) | INFO_TCP)
++#define TRANSPORT_INFO_IPV6_UDP ((INFO_IPV6 << 16) | INFO_UDP)
+ #endif /* _HYPERV_NET_H */
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 7438360b7a02..2fab69ac61ef 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -257,6 +257,35 @@ static int netvsc_get_slots(struct sk_buff *skb)
+       return slots + frag_slots;
+ }
++static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off)
++{
++      u32 ret_val = TRANSPORT_INFO_NOT_IP;
++
++      if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) &&
++              (eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) {
++              goto not_ip;
++      }
++
++      *trans_off = skb_transport_offset(skb);
++
++      if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) {
++              struct iphdr *iphdr = ip_hdr(skb);
++
++              if (iphdr->protocol == IPPROTO_TCP)
++                      ret_val = TRANSPORT_INFO_IPV4_TCP;
++              else if (iphdr->protocol == IPPROTO_UDP)
++                      ret_val = TRANSPORT_INFO_IPV4_UDP;
++      } else {
++              if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
++                      ret_val = TRANSPORT_INFO_IPV6_TCP;
++              else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
++                      ret_val = TRANSPORT_INFO_IPV6_UDP;
++      }
++
++not_ip:
++      return ret_val;
++}
++
+ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+ {
+       struct net_device_context *net_device_ctx = netdev_priv(net);
+@@ -269,6 +298,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       u32 rndis_msg_size;
+       bool isvlan;
+       struct rndis_per_packet_info *ppi;
++      struct ndis_tcp_ip_checksum_info *csum_info;
++      int  hdr_offset;
++      u32 net_trans_info;
++
+       /* We will atmost need two pages to describe the rndis
+        * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
+@@ -336,6 +369,37 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+                               VLAN_PRIO_SHIFT;
+       }
++      net_trans_info = get_net_transport_info(skb, &hdr_offset);
++      if (net_trans_info == TRANSPORT_INFO_NOT_IP)
++              goto do_send;
++
++      /*
++       * Setup the sendside checksum offload only if this is not a
++       * GSO packet.
++       */
++      if (skb_is_gso(skb))
++              goto do_send;
++
++      rndis_msg_size += NDIS_CSUM_PPI_SIZE;
++      ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
++                          TCPIP_CHKSUM_PKTINFO);
++
++      csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi +
++                      ppi->ppi_offset);
++
++      if (net_trans_info & (INFO_IPV4 << 16))
++              csum_info->transmit.is_ipv4 = 1;
++      else
++              csum_info->transmit.is_ipv6 = 1;
++
++      if (net_trans_info & INFO_TCP) {
++              csum_info->transmit.tcp_checksum = 1;
++              csum_info->transmit.tcp_header_offset = hdr_offset;
++      } else if (net_trans_info & INFO_UDP) {
++              csum_info->transmit.udp_checksum = 1;
++      }
++
++do_send:
+       /* Start filling in the page buffers with the rndis hdr */
+       rndis_msg->msg_len += rndis_msg_size;
+       packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
+@@ -592,8 +656,9 @@ static int netvsc_probe(struct hv_device *dev,
+       net->netdev_ops = &device_ops;
+       /* TODO: Add GSO and Checksum offload */
+-      net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG;
+-      net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM;
++      net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM;
++      net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
++                      NETIF_F_IP_CSUM;
+       SET_ETHTOOL_OPS(net, &ethtool_ops);
+       SET_NETDEV_DEV(net, &dev->device);
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0009-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch b/src/patches/linux/0009-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch
new file mode 100644 (file)
index 0000000..fcae531
--- /dev/null
@@ -0,0 +1,34 @@
+From dd1d3f8f9920926aa426589e542eed6bf58b7354 Mon Sep 17 00:00:00 2001
+From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
+Date: Wed, 23 Jul 2014 09:00:35 +0800
+Subject: [PATCH 09/11] hyperv: Fix error return code in netvsc_init_buf()
+
+Fix to return -ENOMEM from the kalloc error handling
+case instead of 0.
+
+Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index 4ed38eaecea8..d97d5f39a04e 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -378,8 +378,10 @@ static int netvsc_init_buf(struct hv_device *device)
+       net_device->send_section_map =
+               kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
+-      if (net_device->send_section_map == NULL)
++      if (net_device->send_section_map == NULL) {
++              ret = -ENOMEM;
+               goto cleanup;
++      }
+       goto exit;
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0010-Drivers-net-hyperv-Enable-large-send-offload.patch b/src/patches/linux/0010-Drivers-net-hyperv-Enable-large-send-offload.patch
new file mode 100644 (file)
index 0000000..36be2a8
--- /dev/null
@@ -0,0 +1,153 @@
+From a4ec4f58017b456281ee17c35fb82dfe4eab2193 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sat, 8 Mar 2014 19:23:18 -0800
+Subject: [PATCH 10/25] Drivers: net: hyperv: Enable large send offload
+
+Enable segmentation offload.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h | 40 ++++++++++++++++++++++++++++++++++++++++
+ drivers/net/hyperv/netvsc_drv.c | 38 ++++++++++++++++++++++++++++++++++----
+ 2 files changed, 74 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 4cf238234321..7d06b4959383 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -742,6 +742,10 @@ struct ndis_oject_header {
+ #define NDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3
+ #define NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4
++#define NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE   1
++#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4      0
++#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6      1
++
+ /*
+  * New offload OIDs for NDIS 6
+  */
+@@ -804,12 +808,48 @@ struct ndis_tcp_ip_checksum_info {
+       };
+ };
++struct ndis_tcp_lso_info {
++      union {
++              struct {
++                      u32 unused:30;
++                      u32 type:1;
++                      u32 reserved2:1;
++              } transmit;
++              struct {
++                      u32 mss:20;
++                      u32 tcp_header_offset:10;
++                      u32 type:1;
++                      u32 reserved2:1;
++              } lso_v1_transmit;
++              struct {
++                      u32 tcp_payload:30;
++                      u32 type:1;
++                      u32 reserved2:1;
++              } lso_v1_transmit_complete;
++              struct {
++                      u32 mss:20;
++                      u32 tcp_header_offset:10;
++                      u32 type:1;
++                      u32 ip_version:1;
++              } lso_v2_transmit;
++              struct {
++                      u32 reserved:30;
++                      u32 type:1;
++                      u32 reserved2:1;
++              } lso_v2_transmit_complete;
++              u32  value;
++      };
++};
++
+ #define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+               sizeof(struct ndis_pkt_8021q_info))
+ #define NDIS_CSUM_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+               sizeof(struct ndis_tcp_ip_checksum_info))
++#define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
++              sizeof(struct ndis_tcp_lso_info))
++
+ /* Format of Information buffer passed in a SetRequest for the OID */
+ /* OID_GEN_RNDIS_CONFIG_PARAMETER. */
+ struct rndis_config_parameter_info {
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 2fab69ac61ef..5baa1fa7e692 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -299,6 +299,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       bool isvlan;
+       struct rndis_per_packet_info *ppi;
+       struct ndis_tcp_ip_checksum_info *csum_info;
++      struct ndis_tcp_lso_info *lso_info;
+       int  hdr_offset;
+       u32 net_trans_info;
+@@ -378,7 +379,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+        * GSO packet.
+        */
+       if (skb_is_gso(skb))
+-              goto do_send;
++              goto do_lso;
+       rndis_msg_size += NDIS_CSUM_PPI_SIZE;
+       ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
+@@ -398,6 +399,35 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       } else if (net_trans_info & INFO_UDP) {
+               csum_info->transmit.udp_checksum = 1;
+       }
++      goto do_send;
++
++do_lso:
++      rndis_msg_size += NDIS_LSO_PPI_SIZE;
++      ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
++                          TCP_LARGESEND_PKTINFO);
++
++      lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
++                      ppi->ppi_offset);
++
++      lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
++      if (net_trans_info & (INFO_IPV4 << 16)) {
++              lso_info->lso_v2_transmit.ip_version =
++                      NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
++              ip_hdr(skb)->tot_len = 0;
++              ip_hdr(skb)->check = 0;
++              tcp_hdr(skb)->check =
++              ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
++                                 ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
++      } else {
++              lso_info->lso_v2_transmit.ip_version =
++                      NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
++              ipv6_hdr(skb)->payload_len = 0;
++              tcp_hdr(skb)->check =
++              ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
++                              &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
++      }
++      lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
++      lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
+ do_send:
+       /* Start filling in the page buffers with the rndis hdr */
+@@ -655,10 +685,10 @@ static int netvsc_probe(struct hv_device *dev,
+       net->netdev_ops = &device_ops;
+-      /* TODO: Add GSO and Checksum offload */
+-      net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM;
++      net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
++                              NETIF_F_TSO;
+       net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
+-                      NETIF_F_IP_CSUM;
++                      NETIF_F_IP_CSUM | NETIF_F_TSO;
+       SET_ETHTOOL_OPS(net, &ethtool_ops);
+       SET_NETDEV_DEV(net, &dev->device);
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0010-hyperv-Fix-a-bug-in-netvsc_start_xmit.patch b/src/patches/linux/0010-hyperv-Fix-a-bug-in-netvsc_start_xmit.patch
new file mode 100644 (file)
index 0000000..5e08805
--- /dev/null
@@ -0,0 +1,47 @@
+From b3e774263908a834c1c0d5abf3a7658280e42fc7 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sun, 28 Sep 2014 22:16:43 -0700
+Subject: [PATCH 10/11] hyperv: Fix a bug in netvsc_start_xmit()
+
+[ Upstream commit dedb845ded56ded1c62f5398a94ffa8615d4592d ]
+
+After the packet is successfully sent, we should not touch the skb
+as it may have been freed. This patch is based on the work done by
+Long Li <longli@microsoft.com>.
+
+In this version of the patch I have fixed issues pointed out by David.
+David, please queue this up for stable.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Tested-by: Long Li <longli@microsoft.com>
+Tested-by: Sitsofe Wheeler <sitsofe@yahoo.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 4fd71b75e666..f15297201777 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -387,6 +387,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       int  hdr_offset;
+       u32 net_trans_info;
+       u32 hash;
++      u32 skb_length = skb->len;
+       /* We will atmost need two pages to describe the rndis
+@@ -562,7 +563,7 @@ do_send:
+ drop:
+       if (ret == 0) {
+-              net->stats.tx_bytes += skb->len;
++              net->stats.tx_bytes += skb_length;
+               net->stats.tx_packets++;
+       } else {
+               kfree(packet);
+-- 
+2.4.3
+
similarity index 81%
rename from src/patches/linux-3.14.x-hyperv-2008-fix.patch
rename to src/patches/linux/0011-hyperv-Change-the-receive-buffer-size-for-legacy-hos.patch
index e538e08e5cb7a2286e3ef53f3e321889abdb281a..e3ee8cbc9915be01daa58353646e8b542a92a537 100644 (file)
@@ -1,16 +1,20 @@
-From 99d3016de4f2a29635f5382b0e9bd0e5f2151487 Mon Sep 17 00:00:00 2001
+From 56b3f72d3fed7f9b8d17dcf5d81455fa1b4327d7 Mon Sep 17 00:00:00 2001
 From: Haiyang Zhang <haiyangz@microsoft.com>
 Date: Sun, 9 Mar 2014 16:10:59 -0700
-Subject: hyperv: Change the receive buffer size for legacy hosts
+Subject: [PATCH 11/25] hyperv: Change the receive buffer size for legacy hosts
 
 Due to a bug in the Hyper-V host verion 2008R2, we need to use a slightly smaller
 receive buffer size, otherwise the buffer will not be accepted by the legacy hosts.
 
 Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
 Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h | 1 +
+ drivers/net/hyperv/netvsc.c     | 6 +++++-
+ 2 files changed, 6 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
-index 7d06b49..13010b4 100644
+index 7d06b4959383..13010b4dae5b 100644
 --- a/drivers/net/hyperv/hyperv_net.h
 +++ b/drivers/net/hyperv/hyperv_net.h
 @@ -513,6 +513,7 @@ struct nvsp_message {
@@ -22,7 +26,7 @@ index 7d06b49..13010b4 100644
  #define NETVSC_RECEIVE_BUFFER_ID              0xcafe
  
 diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
-index 1a0280d..daddea2 100644
+index 1a0280dcba7e..daddea2654ce 100644
 --- a/drivers/net/hyperv/netvsc.c
 +++ b/drivers/net/hyperv/netvsc.c
 @@ -365,6 +365,11 @@ static int netvsc_connect_vsp(struct hv_device *device)
@@ -46,5 +50,5 @@ index 1a0280d..daddea2 100644
  
        INIT_LIST_HEAD(&net_device->recv_pkt_list);
 -- 
-cgit v0.10.2
+2.4.3
 
diff --git a/src/patches/linux/0011-hyperv-Fix-a-bug-in-netvsc_send.patch b/src/patches/linux/0011-hyperv-Fix-a-bug-in-netvsc_send.patch
new file mode 100644 (file)
index 0000000..00f9c5d
--- /dev/null
@@ -0,0 +1,68 @@
+From 26875bba869bd91a1d8fef9229a56a1e6d9fef2b Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sun, 5 Oct 2014 10:42:51 -0700
+Subject: [PATCH 11/11] hyperv: Fix a bug in netvsc_send()
+
+[ Upstream commit 3a67c9ccad926a168d8b7891537a452018368a5b ]
+
+After the packet is successfully sent, we should not touch the packet
+as it may have been freed. This patch is based on the work done by
+Long Li <longli@microsoft.com>.
+
+David, please queue this up for stable.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reported-by: Sitsofe Wheeler <sitsofe@yahoo.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index d97d5f39a04e..7edf976ecfa0 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -708,6 +708,7 @@ int netvsc_send(struct hv_device *device,
+       unsigned int section_index = NETVSC_INVALID_INDEX;
+       u32 msg_size = 0;
+       struct sk_buff *skb;
++      u16 q_idx = packet->q_idx;
+       net_device = get_outbound_net_device(device);
+@@ -772,24 +773,24 @@ int netvsc_send(struct hv_device *device,
+       if (ret == 0) {
+               atomic_inc(&net_device->num_outstanding_sends);
+-              atomic_inc(&net_device->queue_sends[packet->q_idx]);
++              atomic_inc(&net_device->queue_sends[q_idx]);
+               if (hv_ringbuf_avail_percent(&out_channel->outbound) <
+                       RING_AVAIL_PERCENT_LOWATER) {
+                       netif_tx_stop_queue(netdev_get_tx_queue(
+-                                          ndev, packet->q_idx));
++                                          ndev, q_idx));
+                       if (atomic_read(&net_device->
+-                              queue_sends[packet->q_idx]) < 1)
++                              queue_sends[q_idx]) < 1)
+                               netif_tx_wake_queue(netdev_get_tx_queue(
+-                                                  ndev, packet->q_idx));
++                                                  ndev, q_idx));
+               }
+       } else if (ret == -EAGAIN) {
+               netif_tx_stop_queue(netdev_get_tx_queue(
+-                                  ndev, packet->q_idx));
+-              if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
++                                  ndev, q_idx));
++              if (atomic_read(&net_device->queue_sends[q_idx]) < 1) {
+                       netif_tx_wake_queue(netdev_get_tx_queue(
+-                                          ndev, packet->q_idx));
++                                          ndev, q_idx));
+                       ret = -ENOSPC;
+               }
+       } else {
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0012-Drivers-net-hyperv-Allocate-memory-for-all-possible-.patch b/src/patches/linux/0012-Drivers-net-hyperv-Allocate-memory-for-all-possible-.patch
new file mode 100644 (file)
index 0000000..c626bb0
--- /dev/null
@@ -0,0 +1,34 @@
+From e0f6906eb7e5b395370da9499189d13b59020382 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Wed, 9 Apr 2014 15:00:45 -0700
+Subject: [PATCH 12/25] Drivers: net: hyperv: Allocate memory for all possible
+ per-pecket information
+
+An outgoing packet can potentially need per-packet information for
+all the offloads and VLAN tagging. Fix this issue.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc_drv.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 5baa1fa7e692..c76c85176644 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -320,7 +320,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       packet = kzalloc(sizeof(struct hv_netvsc_packet) +
+                        (num_data_pgs * sizeof(struct hv_page_buffer)) +
+                        sizeof(struct rndis_message) +
+-                       NDIS_VLAN_PPI_SIZE, GFP_ATOMIC);
++                       NDIS_VLAN_PPI_SIZE +
++                       NDIS_CSUM_PPI_SIZE +
++                       NDIS_LSO_PPI_SIZE, GFP_ATOMIC);
+       if (!packet) {
+               /* out of memory, drop packet */
+               netdev_err(net, "unable to allocate hv_netvsc_packet\n");
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0013-Drivers-net-hyperv-Negotiate-suitable-ndis-version-f.patch b/src/patches/linux/0013-Drivers-net-hyperv-Negotiate-suitable-ndis-version-f.patch
new file mode 100644 (file)
index 0000000..7044c52
--- /dev/null
@@ -0,0 +1,32 @@
+From 8a7882bada78e7d7355aafc0ca3c5696d25eb443 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Wed, 9 Apr 2014 15:00:46 -0700
+Subject: [PATCH 13/25] Drivers: net: hyperv: Negotiate suitable ndis version
+ for offload support
+
+Ws2008R2 supports ndis_version 6.1 and 6.1 is the minimal version required
+for various offloads. Negotiate ndis_version 6.1 when on ws2008r2.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index daddea2654ce..f7629ecefa84 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -344,7 +344,7 @@ static int netvsc_connect_vsp(struct hv_device *device)
+       memset(init_packet, 0, sizeof(struct nvsp_message));
+       if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
+-              ndis_version = 0x00050001;
++              ndis_version = 0x00060001;
+       else
+               ndis_version = 0x0006001e;
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0014-Drivers-net-hyperv-Address-UDP-checksum-issues.patch b/src/patches/linux/0014-Drivers-net-hyperv-Address-UDP-checksum-issues.patch
new file mode 100644 (file)
index 0000000..eaf343e
--- /dev/null
@@ -0,0 +1,111 @@
+From b822ee7a17efd1bc8c7584da5d0a2c042e9ca5b6 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Wed, 9 Apr 2014 15:00:47 -0700
+Subject: [PATCH 14/25] Drivers: net: hyperv: Address UDP checksum issues
+
+ws2008r2 does not support UDP checksum offload. Thus, we cannnot turn on
+UDP offload in the host. Also, on ws2012 and ws2012 r2, there appear to be
+an issue with UDP checksum offload.
+Fix this issue by computing the UDP checksum in the Hyper-V driver.
+
+Based on Dave Miller's comments, in this version, I have COWed the skb
+before modifying the UDP header (the checksum field).
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   |  1 +
+ drivers/net/hyperv/netvsc_drv.c   | 26 +++++++++++++++++++++++++-
+ drivers/net/hyperv/rndis_filter.c | 12 +++++++++++-
+ 3 files changed, 37 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 13010b4dae5b..d18f711d0b0c 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -747,6 +747,7 @@ struct ndis_oject_header {
+ #define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4      0
+ #define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6      1
++#define VERSION_4_OFFLOAD_SIZE                        22
+ /*
+  * New offload OIDs for NDIS 6
+  */
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index c76c85176644..0d898876689e 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -399,7 +399,30 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+               csum_info->transmit.tcp_checksum = 1;
+               csum_info->transmit.tcp_header_offset = hdr_offset;
+       } else if (net_trans_info & INFO_UDP) {
+-              csum_info->transmit.udp_checksum = 1;
++              /* UDP checksum offload is not supported on ws2008r2.
++               * Furthermore, on ws2012 and ws2012r2, there are some
++               * issues with udp checksum offload from Linux guests.
++               * (these are host issues).
++               * For now compute the checksum here.
++               */
++              struct udphdr *uh;
++              u16 udp_len;
++
++              ret = skb_cow_head(skb, 0);
++              if (ret)
++                      goto drop;
++
++              uh = udp_hdr(skb);
++              udp_len = ntohs(uh->len);
++              uh->check = 0;
++              uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr,
++                                            ip_hdr(skb)->daddr,
++                                            udp_len, IPPROTO_UDP,
++                                            csum_partial(uh, udp_len, 0));
++              if (uh->check == 0)
++                      uh->check = CSUM_MANGLED_0;
++
++              csum_info->transmit.udp_checksum = 0;
+       }
+       goto do_send;
+@@ -439,6 +462,7 @@ do_send:
+       ret = netvsc_send(net_device_ctx->device_ctx, packet);
++drop:
+       if (ret == 0) {
+               net->stats.tx_bytes += skb_length;
+               net->stats.tx_packets++;
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 4a37e3db9e32..143a98caf618 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -641,6 +641,16 @@ int rndis_filter_set_offload_params(struct hv_device *hdev,
+       struct rndis_set_complete *set_complete;
+       u32 extlen = sizeof(struct ndis_offload_params);
+       int ret, t;
++      u32 vsp_version = nvdev->nvsp_version;
++
++      if (vsp_version <= NVSP_PROTOCOL_VERSION_4) {
++              extlen = VERSION_4_OFFLOAD_SIZE;
++              /* On NVSP_PROTOCOL_VERSION_4 and below, we do not support
++               * UDP checksum offload.
++               */
++              req_offloads->udp_ip_v4_csum = 0;
++              req_offloads->udp_ip_v6_csum = 0;
++      }
+       request = get_rndis_request(rdev, RNDIS_MSG_SET,
+               RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
+@@ -674,7 +684,7 @@ int rndis_filter_set_offload_params(struct hv_device *hdev,
+       } else {
+               set_complete = &request->response_msg.msg.set_complete;
+               if (set_complete->status != RNDIS_STATUS_SUCCESS) {
+-                      netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
++                      netdev_err(ndev, "Fail to set offload on host side:0x%x\n",
+                                  set_complete->status);
+                       ret = -EINVAL;
+               }
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0015-hyperv-Properly-handle-checksum-offload.patch b/src/patches/linux/0015-hyperv-Properly-handle-checksum-offload.patch
new file mode 100644 (file)
index 0000000..0760833
--- /dev/null
@@ -0,0 +1,42 @@
+From 540360773bad2b81f14c38ca92b62797fa32cbc6 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Wed, 30 Apr 2014 11:58:25 -0700
+Subject: [PATCH 15/25] hyperv: Properly handle checksum offload
+
+Do checksum offload only if the client of the driver wants checksum to be
+offloaded.
+
+In V1 version of this patch, I  addressed comments from
+Stephen Hemminger <stephen@networkplumber.org> and
+Eric Dumazet <eric.dumazet@gmail.com>.
+
+In this version of the patch I have addressed comments from
+David Miller.
+
+This patch fixes a bug that is exposed in gateway scenarios.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc_drv.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 0d898876689e..ce6d870dd7ae 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -383,6 +383,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       if (skb_is_gso(skb))
+               goto do_lso;
++      if ((skb->ip_summed == CHECKSUM_NONE) ||
++          (skb->ip_summed == CHECKSUM_UNNECESSARY))
++              goto do_send;
++
+       rndis_msg_size += NDIS_CSUM_PPI_SIZE;
+       ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
+                           TCPIP_CHKSUM_PKTINFO);
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch b/src/patches/linux/0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch
new file mode 100644 (file)
index 0000000..d01a428
--- /dev/null
@@ -0,0 +1,917 @@
+From 44559a96c6864eb6e95db0ae896c621b82e605f3 Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 21 Apr 2014 10:20:28 -0700
+Subject: [PATCH 16/25] hyperv: Add support for virtual Receive Side Scaling
+ (vRSS)
+
+This feature allows multiple channels to be used by each virtual NIC.
+It is available on Hyper-V host 2012 R2.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   | 110 +++++++++++++++++++++-
+ drivers/net/hyperv/netvsc.c       | 136 +++++++++++++++++++++------
+ drivers/net/hyperv/netvsc_drv.c   | 103 ++++++++++++++++++++-
+ drivers/net/hyperv/rndis_filter.c | 189 +++++++++++++++++++++++++++++++++++++-
+ 4 files changed, 504 insertions(+), 34 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index d18f711d0b0c..57eb3f906d64 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -28,6 +28,96 @@
+ #include <linux/hyperv.h>
+ #include <linux/rndis.h>
++/* RSS related */
++#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203  /* query only */
++#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204  /* query and set */
++
++#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
++#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
++
++#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
++#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
++
++struct ndis_obj_header {
++      u8 type;
++      u8 rev;
++      u16 size;
++} __packed;
++
++/* ndis_recv_scale_cap/cap_flag */
++#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
++#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR       0x02000000
++#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC       0x04000000
++#define NDIS_RSS_CAPS_USING_MSI_X                 0x08000000
++#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS      0x10000000
++#define NDIS_RSS_CAPS_SUPPORTS_MSI_X              0x20000000
++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4          0x00000100
++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6          0x00000200
++#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX       0x00000400
++
++struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
++      struct ndis_obj_header hdr;
++      u32 cap_flag;
++      u32 num_int_msg;
++      u32 num_recv_que;
++      u16 num_indirect_tabent;
++} __packed;
++
++
++/* ndis_recv_scale_param flags */
++#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED     0x0001
++#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED    0x0002
++#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED       0x0004
++#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED     0x0008
++#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS            0x0010
++
++/* Hash info bits */
++#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001
++#define NDIS_HASH_IPV4          0x00000100
++#define NDIS_HASH_TCP_IPV4      0x00000200
++#define NDIS_HASH_IPV6          0x00000400
++#define NDIS_HASH_IPV6_EX       0x00000800
++#define NDIS_HASH_TCP_IPV6      0x00001000
++#define NDIS_HASH_TCP_IPV6_EX   0x00002000
++
++#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
++#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
++
++#define ITAB_NUM 128
++#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
++extern u8 netvsc_hash_key[];
++
++struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
++      struct ndis_obj_header hdr;
++
++      /* Qualifies the rest of the information */
++      u16 flag;
++
++      /* The base CPU number to do receive processing. not used */
++      u16 base_cpu_number;
++
++      /* This describes the hash function and type being enabled */
++      u32 hashinfo;
++
++      /* The size of indirection table array */
++      u16 indirect_tabsize;
++
++      /* The offset of the indirection table from the beginning of this
++       * structure
++       */
++      u32 indirect_taboffset;
++
++      /* The size of the hash secret key */
++      u16 hashkey_size;
++
++      /* The offset of the secret key from the beginning of this structure */
++      u32 kashkey_offset;
++
++      u32 processor_masks_offset;
++      u32 num_processor_masks;
++      u32 processor_masks_entry_size;
++};
++
+ /* Fwd declaration */
+ struct hv_netvsc_packet;
+ struct ndis_tcp_ip_checksum_info;
+@@ -39,6 +129,8 @@ struct xferpage_packet {
+       /* # of netvsc packets this xfer packet contains */
+       u32 count;
++
++      struct vmbus_channel *channel;
+ };
+ /*
+@@ -54,6 +146,9 @@ struct hv_netvsc_packet {
+       bool is_data_pkt;
+       u16 vlan_tci;
++      u16 q_idx;
++      struct vmbus_channel *channel;
++
+       /*
+        * Valid only for receives when we break a xfer page packet
+        * into multiple netvsc packets
+@@ -120,6 +215,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
+ int netvsc_recv_callback(struct hv_device *device_obj,
+                       struct hv_netvsc_packet *packet,
+                       struct ndis_tcp_ip_checksum_info *csum_info);
++void netvsc_channel_cb(void *context);
+ int rndis_filter_open(struct hv_device *dev);
+ int rndis_filter_close(struct hv_device *dev);
+ int rndis_filter_device_add(struct hv_device *dev,
+@@ -522,6 +618,8 @@ struct nvsp_message {
+ #define NETVSC_PACKET_SIZE                      2048
++#define VRSS_SEND_TAB_SIZE 16
++
+ /* Per netvsc channel-specific */
+ struct netvsc_device {
+       struct hv_device *dev;
+@@ -555,10 +653,20 @@ struct netvsc_device {
+       struct net_device *ndev;
++      struct vmbus_channel *chn_table[NR_CPUS];
++      u32 send_table[VRSS_SEND_TAB_SIZE];
++      u32 num_chn;
++      atomic_t queue_sends[NR_CPUS];
++
+       /* Holds rndis device info */
+       void *extension;
+-      /* The recive buffer for this device */
++
++      int ring_size;
++
++      /* The primary channel callback buffer */
+       unsigned char cb_buffer[NETVSC_PACKET_SIZE];
++      /* The sub channel callback buffer */
++      unsigned char *sub_cb_buf;
+ };
+ /* NdisInitialize message */
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index f7629ecefa84..e7e77f12bc38 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -422,6 +422,9 @@ int netvsc_device_remove(struct hv_device *device)
+               kfree(netvsc_packet);
+       }
++      if (net_device->sub_cb_buf)
++              vfree(net_device->sub_cb_buf);
++
+       kfree(net_device);
+       return 0;
+ }
+@@ -461,7 +464,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+           (nvsp_packet->hdr.msg_type ==
+            NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
+           (nvsp_packet->hdr.msg_type ==
+-           NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
++           NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
++          (nvsp_packet->hdr.msg_type ==
++           NVSP_MSG5_TYPE_SUBCHANNEL)) {
+               /* Copy the response back */
+               memcpy(&net_device->channel_init_pkt, nvsp_packet,
+                      sizeof(struct nvsp_message));
+@@ -469,28 +474,37 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+       } else if (nvsp_packet->hdr.msg_type ==
+                  NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
+               int num_outstanding_sends;
++              u16 q_idx = 0;
++              struct vmbus_channel *channel = device->channel;
++              int queue_sends;
+               /* Get the send context */
+               nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
+                       packet->trans_id;
+               /* Notify the layer above us */
+-              if (nvsc_packet)
++              if (nvsc_packet) {
++                      q_idx = nvsc_packet->q_idx;
++                      channel = nvsc_packet->channel;
+                       nvsc_packet->completion.send.send_completion(
+                               nvsc_packet->completion.send.
+                               send_completion_ctx);
++              }
+               num_outstanding_sends =
+                       atomic_dec_return(&net_device->num_outstanding_sends);
++              queue_sends = atomic_dec_return(&net_device->
++                                              queue_sends[q_idx]);
+               if (net_device->destroy && num_outstanding_sends == 0)
+                       wake_up(&net_device->wait_drain);
+-              if (netif_queue_stopped(ndev) && !net_device->start_remove &&
+-                      (hv_ringbuf_avail_percent(&device->channel->outbound)
+-                      > RING_AVAIL_PERCENT_HIWATER ||
+-                      num_outstanding_sends < 1))
+-                              netif_wake_queue(ndev);
++              if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
++                  !net_device->start_remove &&
++                  (hv_ringbuf_avail_percent(&channel->outbound) >
++                   RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
++                              netif_tx_wake_queue(netdev_get_tx_queue(
++                                                  ndev, q_idx));
+       } else {
+               netdev_err(ndev, "Unknown send completion packet type- "
+                          "%d received!!\n", nvsp_packet->hdr.msg_type);
+@@ -505,6 +519,7 @@ int netvsc_send(struct hv_device *device,
+       int ret = 0;
+       struct nvsp_message sendMessage;
+       struct net_device *ndev;
++      struct vmbus_channel *out_channel = NULL;
+       u64 req_id;
+       net_device = get_outbound_net_device(device);
+@@ -531,15 +546,20 @@ int netvsc_send(struct hv_device *device,
+       else
+               req_id = 0;
++      out_channel = net_device->chn_table[packet->q_idx];
++      if (out_channel == NULL)
++              out_channel = device->channel;
++      packet->channel = out_channel;
++
+       if (packet->page_buf_cnt) {
+-              ret = vmbus_sendpacket_pagebuffer(device->channel,
++              ret = vmbus_sendpacket_pagebuffer(out_channel,
+                                                 packet->page_buf,
+                                                 packet->page_buf_cnt,
+                                                 &sendMessage,
+                                                 sizeof(struct nvsp_message),
+                                                 req_id);
+       } else {
+-              ret = vmbus_sendpacket(device->channel, &sendMessage,
++              ret = vmbus_sendpacket(out_channel, &sendMessage,
+                               sizeof(struct nvsp_message),
+                               req_id,
+                               VM_PKT_DATA_INBAND,
+@@ -548,17 +568,24 @@ int netvsc_send(struct hv_device *device,
+       if (ret == 0) {
+               atomic_inc(&net_device->num_outstanding_sends);
+-              if (hv_ringbuf_avail_percent(&device->channel->outbound) <
++              atomic_inc(&net_device->queue_sends[packet->q_idx]);
++
++              if (hv_ringbuf_avail_percent(&out_channel->outbound) <
+                       RING_AVAIL_PERCENT_LOWATER) {
+-                      netif_stop_queue(ndev);
++                      netif_tx_stop_queue(netdev_get_tx_queue(
++                                          ndev, packet->q_idx));
++
+                       if (atomic_read(&net_device->
+-                              num_outstanding_sends) < 1)
+-                              netif_wake_queue(ndev);
++                              queue_sends[packet->q_idx]) < 1)
++                              netif_tx_wake_queue(netdev_get_tx_queue(
++                                                  ndev, packet->q_idx));
+               }
+       } else if (ret == -EAGAIN) {
+-              netif_stop_queue(ndev);
+-              if (atomic_read(&net_device->num_outstanding_sends) < 1) {
+-                      netif_wake_queue(ndev);
++              netif_tx_stop_queue(netdev_get_tx_queue(
++                                  ndev, packet->q_idx));
++              if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
++                      netif_tx_wake_queue(netdev_get_tx_queue(
++                                          ndev, packet->q_idx));
+                       ret = -ENOSPC;
+               }
+       } else {
+@@ -570,6 +597,7 @@ int netvsc_send(struct hv_device *device,
+ }
+ static void netvsc_send_recv_completion(struct hv_device *device,
++                                      struct vmbus_channel *channel,
+                                       struct netvsc_device *net_device,
+                                       u64 transaction_id, u32 status)
+ {
+@@ -587,7 +615,7 @@ static void netvsc_send_recv_completion(struct hv_device *device,
+ retry_send_cmplt:
+       /* Send the completion */
+-      ret = vmbus_sendpacket(device->channel, &recvcompMessage,
++      ret = vmbus_sendpacket(channel, &recvcompMessage,
+                              sizeof(struct nvsp_message), transaction_id,
+                              VM_PKT_COMP, 0);
+       if (ret == 0) {
+@@ -618,6 +646,7 @@ static void netvsc_receive_completion(void *context)
+ {
+       struct hv_netvsc_packet *packet = context;
+       struct hv_device *device = packet->device;
++      struct vmbus_channel *channel;
+       struct netvsc_device *net_device;
+       u64 transaction_id = 0;
+       bool fsend_receive_comp = false;
+@@ -649,6 +678,7 @@ static void netvsc_receive_completion(void *context)
+        */
+       if (packet->xfer_page_pkt->count == 0) {
+               fsend_receive_comp = true;
++              channel = packet->xfer_page_pkt->channel;
+               transaction_id = packet->completion.recv.recv_completion_tid;
+               status = packet->xfer_page_pkt->status;
+               list_add_tail(&packet->xfer_page_pkt->list_ent,
+@@ -662,12 +692,13 @@ static void netvsc_receive_completion(void *context)
+       /* Send a receive completion for the xfer page packet */
+       if (fsend_receive_comp)
+-              netvsc_send_recv_completion(device, net_device, transaction_id,
+-                                      status);
++              netvsc_send_recv_completion(device, channel, net_device,
++                                          transaction_id, status);
+ }
+ static void netvsc_receive(struct netvsc_device *net_device,
++                      struct vmbus_channel *channel,
+                       struct hv_device *device,
+                       struct vmpacket_descriptor *packet)
+ {
+@@ -748,7 +779,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
+               spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
+                                      flags);
+-              netvsc_send_recv_completion(device, net_device,
++              netvsc_send_recv_completion(device, channel, net_device,
+                                           vmxferpage_packet->d.trans_id,
+                                           NVSP_STAT_FAIL);
+@@ -759,6 +790,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
+       xferpage_packet = (struct xferpage_packet *)listHead.next;
+       list_del(&xferpage_packet->list_ent);
+       xferpage_packet->status = NVSP_STAT_SUCCESS;
++      xferpage_packet->channel = channel;
+       /* This is how much we can satisfy */
+       xferpage_packet->count = count - 1;
+@@ -800,10 +832,45 @@ static void netvsc_receive(struct netvsc_device *net_device,
+ }
+-static void netvsc_channel_cb(void *context)
++
++static void netvsc_send_table(struct hv_device *hdev,
++                            struct vmpacket_descriptor *vmpkt)
++{
++      struct netvsc_device *nvscdev;
++      struct net_device *ndev;
++      struct nvsp_message *nvmsg;
++      int i;
++      u32 count, *tab;
++
++      nvscdev = get_outbound_net_device(hdev);
++      if (!nvscdev)
++              return;
++      ndev = nvscdev->ndev;
++
++      nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
++                                      (vmpkt->offset8 << 3));
++
++      if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
++              return;
++
++      count = nvmsg->msg.v5_msg.send_table.count;
++      if (count != VRSS_SEND_TAB_SIZE) {
++              netdev_err(ndev, "Received wrong send-table size:%u\n", count);
++              return;
++      }
++
++      tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
++                    nvmsg->msg.v5_msg.send_table.offset);
++
++      for (i = 0; i < count; i++)
++              nvscdev->send_table[i] = tab[i];
++}
++
++void netvsc_channel_cb(void *context)
+ {
+       int ret;
+-      struct hv_device *device = context;
++      struct vmbus_channel *channel = (struct vmbus_channel *)context;
++      struct hv_device *device;
+       struct netvsc_device *net_device;
+       u32 bytes_recvd;
+       u64 request_id;
+@@ -812,14 +879,19 @@ static void netvsc_channel_cb(void *context)
+       int bufferlen = NETVSC_PACKET_SIZE;
+       struct net_device *ndev;
++      if (channel->primary_channel != NULL)
++              device = channel->primary_channel->device_obj;
++      else
++              device = channel->device_obj;
++
+       net_device = get_inbound_net_device(device);
+       if (!net_device)
+               return;
+       ndev = net_device->ndev;
+-      buffer = net_device->cb_buffer;
++      buffer = get_per_channel_state(channel);
+       do {
+-              ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
++              ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
+                                          &bytes_recvd, &request_id);
+               if (ret == 0) {
+                       if (bytes_recvd > 0) {
+@@ -831,8 +903,12 @@ static void netvsc_channel_cb(void *context)
+                                       break;
+                               case VM_PKT_DATA_USING_XFER_PAGES:
+-                                      netvsc_receive(net_device,
+-                                                      device, desc);
++                                      netvsc_receive(net_device, channel,
++                                                     device, desc);
++                                      break;
++
++                              case VM_PKT_DATA_INBAND:
++                                      netvsc_send_table(device, desc);
+                                       break;
+                               default:
+@@ -893,6 +969,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
+               goto cleanup;
+       }
++      net_device->ring_size = ring_size;
++
+       /*
+        * Coming into this function, struct net_device * is
+        * registered as the driver private data.
+@@ -917,10 +995,12 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
+       }
+       init_completion(&net_device->channel_init_wait);
++      set_per_channel_state(device->channel, net_device->cb_buffer);
++
+       /* Open the channel */
+       ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
+                        ring_size * PAGE_SIZE, NULL, 0,
+-                       netvsc_channel_cb, device);
++                       netvsc_channel_cb, device->channel);
+       if (ret != 0) {
+               netdev_err(ndev, "unable to open channel: %d\n", ret);
+@@ -930,6 +1010,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
+       /* Channel is opened */
+       pr_info("hv_netvsc channel opened successfully\n");
++      net_device->chn_table[0] = device->channel;
++
+       /* Connect with the NetVsp */
+       ret = netvsc_connect_vsp(device);
+       if (ret != 0) {
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index ce6d870dd7ae..e486dbd33f61 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -101,7 +101,7 @@ static int netvsc_open(struct net_device *net)
+               return ret;
+       }
+-      netif_start_queue(net);
++      netif_tx_start_all_queues(net);
+       nvdev = hv_get_drvdata(device_obj);
+       rdev = nvdev->extension;
+@@ -149,6 +149,88 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
+       return ppi;
+ }
++union sub_key {
++      u64 k;
++      struct {
++              u8 pad[3];
++              u8 kb;
++              u32 ka;
++      };
++};
++
++/* Toeplitz hash function
++ * data: network byte order
++ * return: host byte order
++ */
++static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen)
++{
++      union sub_key subk;
++      int k_next = 4;
++      u8 dt;
++      int i, j;
++      u32 ret = 0;
++
++      subk.k = 0;
++      subk.ka = ntohl(*(u32 *)key);
++
++      for (i = 0; i < dlen; i++) {
++              subk.kb = key[k_next];
++              k_next = (k_next + 1) % klen;
++              dt = data[i];
++              for (j = 0; j < 8; j++) {
++                      if (dt & 0x80)
++                              ret ^= subk.ka;
++                      dt <<= 1;
++                      subk.k <<= 1;
++              }
++      }
++
++      return ret;
++}
++
++static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
++{
++      struct iphdr *iphdr;
++      int data_len;
++      bool ret = false;
++
++      if (eth_hdr(skb)->h_proto != htons(ETH_P_IP))
++              return false;
++
++      iphdr = ip_hdr(skb);
++
++      if (iphdr->version == 4) {
++              if (iphdr->protocol == IPPROTO_TCP)
++                      data_len = 12;
++              else
++                      data_len = 8;
++              *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN,
++                                (u8 *)&iphdr->saddr, data_len);
++              ret = true;
++      }
++
++      return ret;
++}
++
++static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
++                      void *accel_priv, select_queue_fallback_t fallback)
++{
++      struct net_device_context *net_device_ctx = netdev_priv(ndev);
++      struct hv_device *hdev =  net_device_ctx->device_ctx;
++      struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev);
++      u32 hash;
++      u16 q_idx = 0;
++
++      if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
++              return 0;
++
++      if (netvsc_set_hash(&hash, skb))
++              q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
++                      ndev->real_num_tx_queues;
++
++      return q_idx;
++}
++
+ static void netvsc_xmit_completion(void *context)
+ {
+       struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
+@@ -334,6 +416,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       packet->vlan_tci = skb->vlan_tci;
++      packet->q_idx = skb_get_queue_mapping(skb);
++
+       packet->is_data_pkt = true;
+       packet->total_data_buflen = skb->len;
+@@ -559,6 +643,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
+               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+                                      packet->vlan_tci);
++      skb_record_rx_queue(skb, packet->xfer_page_pkt->channel->
++                          offermsg.offer.sub_channel_index %
++                          net->real_num_rx_queues);
++
+       net->stats.rx_packets++;
+       net->stats.rx_bytes += packet->total_data_buflen;
+@@ -607,7 +695,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
+       hv_set_drvdata(hdev, ndev);
+       device_info.ring_size = ring_size;
+       rndis_filter_device_add(hdev, &device_info);
+-      netif_wake_queue(ndev);
++      netif_tx_wake_all_queues(ndev);
+       return 0;
+ }
+@@ -653,6 +741,7 @@ static const struct net_device_ops device_ops = {
+       .ndo_change_mtu =               netvsc_change_mtu,
+       .ndo_validate_addr =            eth_validate_addr,
+       .ndo_set_mac_address =          netvsc_set_mac_addr,
++      .ndo_select_queue =             netvsc_select_queue,
+ };
+ /*
+@@ -699,9 +788,11 @@ static int netvsc_probe(struct hv_device *dev,
+       struct net_device *net = NULL;
+       struct net_device_context *net_device_ctx;
+       struct netvsc_device_info device_info;
++      struct netvsc_device *nvdev;
+       int ret;
+-      net = alloc_etherdev(sizeof(struct net_device_context));
++      net = alloc_etherdev_mq(sizeof(struct net_device_context),
++                              num_online_cpus());
+       if (!net)
+               return -ENOMEM;
+@@ -734,6 +825,12 @@ static int netvsc_probe(struct hv_device *dev,
+       }
+       memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
++      nvdev = hv_get_drvdata(dev);
++      netif_set_real_num_tx_queues(net, nvdev->num_chn);
++      netif_set_real_num_rx_queues(net, nvdev->num_chn);
++      dev_info(&dev->device, "real num tx,rx queues:%u, %u\n",
++               net->real_num_tx_queues, net->real_num_rx_queues);
++
+       ret = register_netdev(net);
+       if (ret != 0) {
+               pr_err("Unable to register netdev.\n");
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 143a98caf618..d92cfbe43410 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -31,7 +31,7 @@
+ #include "hyperv_net.h"
+-#define RNDIS_EXT_LEN 100
++#define RNDIS_EXT_LEN PAGE_SIZE
+ struct rndis_request {
+       struct list_head list_ent;
+       struct completion  wait_event;
+@@ -94,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev,
+       rndis_msg->ndis_msg_type = msg_type;
+       rndis_msg->msg_len = msg_len;
++      request->pkt.q_idx = 0;
++
+       /*
+        * Set the request id. This field is always after the rndis header for
+        * request/response packet types so we just used the SetRequest as a
+@@ -509,6 +511,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
+       query->info_buflen = 0;
+       query->dev_vc_handle = 0;
++      if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) {
++              struct ndis_recv_scale_cap *cap;
++
++              request->request_msg.msg_len +=
++                      sizeof(struct ndis_recv_scale_cap);
++              query->info_buflen = sizeof(struct ndis_recv_scale_cap);
++              cap = (struct ndis_recv_scale_cap *)((unsigned long)query +
++                                                   query->info_buf_offset);
++              cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES;
++              cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
++              cap->hdr.size = sizeof(struct ndis_recv_scale_cap);
++      }
++
+       ret = rndis_filter_send_request(dev, request);
+       if (ret != 0)
+               goto cleanup;
+@@ -695,6 +710,89 @@ cleanup:
+       return ret;
+ }
++u8 netvsc_hash_key[HASH_KEYLEN] = {
++      0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
++      0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
++      0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
++      0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
++      0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
++};
++
++int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
++{
++      struct net_device *ndev = rdev->net_dev->ndev;
++      struct rndis_request *request;
++      struct rndis_set_request *set;
++      struct rndis_set_complete *set_complete;
++      u32 extlen = sizeof(struct ndis_recv_scale_param) +
++                   4*ITAB_NUM + HASH_KEYLEN;
++      struct ndis_recv_scale_param *rssp;
++      u32 *itab;
++      u8 *keyp;
++      int i, t, ret;
++
++      request = get_rndis_request(
++                      rdev, RNDIS_MSG_SET,
++                      RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
++      if (!request)
++              return -ENOMEM;
++
++      set = &request->request_msg.msg.set_req;
++      set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS;
++      set->info_buflen = extlen;
++      set->info_buf_offset = sizeof(struct rndis_set_request);
++      set->dev_vc_handle = 0;
++
++      rssp = (struct ndis_recv_scale_param *)(set + 1);
++      rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS;
++      rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
++      rssp->hdr.size = sizeof(struct ndis_recv_scale_param);
++      rssp->flag = 0;
++      rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
++                       NDIS_HASH_TCP_IPV4;
++      rssp->indirect_tabsize = 4*ITAB_NUM;
++      rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
++      rssp->hashkey_size = HASH_KEYLEN;
++      rssp->kashkey_offset = rssp->indirect_taboffset +
++                             rssp->indirect_tabsize;
++
++      /* Set indirection table entries */
++      itab = (u32 *)(rssp + 1);
++      for (i = 0; i < ITAB_NUM; i++)
++              itab[i] = i % num_queue;
++
++      /* Set hask key values */
++      keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
++      for (i = 0; i < HASH_KEYLEN; i++)
++              keyp[i] = netvsc_hash_key[i];
++
++
++      ret = rndis_filter_send_request(rdev, request);
++      if (ret != 0)
++              goto cleanup;
++
++      t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
++      if (t == 0) {
++              netdev_err(ndev, "timeout before we got a set response...\n");
++              /* can't put_rndis_request, since we may still receive a
++               * send-completion.
++               */
++              return -ETIMEDOUT;
++      } else {
++              set_complete = &request->response_msg.msg.set_complete;
++              if (set_complete->status != RNDIS_STATUS_SUCCESS) {
++                      netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
++                                 set_complete->status);
++                      ret = -EINVAL;
++              }
++      }
++
++cleanup:
++      put_rndis_request(rdev, request);
++      return ret;
++}
++
++
+ static int rndis_filter_query_device_link_status(struct rndis_device *dev)
+ {
+       u32 size = sizeof(u32);
+@@ -886,6 +984,28 @@ static int rndis_filter_close_device(struct rndis_device *dev)
+       return ret;
+ }
++static void netvsc_sc_open(struct vmbus_channel *new_sc)
++{
++      struct netvsc_device *nvscdev;
++      u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
++      int ret;
++
++      nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);
++
++      if (chn_index >= nvscdev->num_chn)
++              return;
++
++      set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
++                            NETVSC_PACKET_SIZE);
++
++      ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
++                       nvscdev->ring_size * PAGE_SIZE, NULL, 0,
++                       netvsc_channel_cb, new_sc);
++
++      if (ret == 0)
++              nvscdev->chn_table[chn_index] = new_sc;
++}
++
+ int rndis_filter_device_add(struct hv_device *dev,
+                                 void *additional_info)
+ {
+@@ -894,6 +1014,10 @@ int rndis_filter_device_add(struct hv_device *dev,
+       struct rndis_device *rndis_device;
+       struct netvsc_device_info *device_info = additional_info;
+       struct ndis_offload_params offloads;
++      struct nvsp_message *init_packet;
++      int t;
++      struct ndis_recv_scale_cap rsscap;
++      u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
+       rndis_device = get_rndis_device();
+       if (!rndis_device)
+@@ -913,6 +1037,7 @@ int rndis_filter_device_add(struct hv_device *dev,
+       /* Initialize the rndis device */
+       net_device = hv_get_drvdata(dev);
++      net_device->num_chn = 1;
+       net_device->extension = rndis_device;
+       rndis_device->net_dev = net_device;
+@@ -952,7 +1077,6 @@ int rndis_filter_device_add(struct hv_device *dev,
+       if (ret)
+               goto err_dev_remv;
+-
+       rndis_filter_query_device_link_status(rndis_device);
+       device_info->link_state = rndis_device->link_state;
+@@ -961,7 +1085,66 @@ int rndis_filter_device_add(struct hv_device *dev,
+                rndis_device->hw_mac_adr,
+                device_info->link_state ? "down" : "up");
+-      return ret;
++      if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
++              return 0;
++
++      /* vRSS setup */
++      memset(&rsscap, 0, rsscap_size);
++      ret = rndis_filter_query_device(rndis_device,
++                                      OID_GEN_RECEIVE_SCALE_CAPABILITIES,
++                                      &rsscap, &rsscap_size);
++      if (ret || rsscap.num_recv_que < 2)
++              goto out;
++
++      net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ?
++                             num_online_cpus() : rsscap.num_recv_que;
++      if (net_device->num_chn == 1)
++              goto out;
++
++      net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) *
++                                       NETVSC_PACKET_SIZE);
++      if (!net_device->sub_cb_buf) {
++              net_device->num_chn = 1;
++              dev_info(&dev->device, "No memory for subchannels.\n");
++              goto out;
++      }
++
++      vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
++
++      init_packet = &net_device->channel_init_pkt;
++      memset(init_packet, 0, sizeof(struct nvsp_message));
++      init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
++      init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
++      init_packet->msg.v5_msg.subchn_req.num_subchannels =
++                                              net_device->num_chn - 1;
++      ret = vmbus_sendpacket(dev->channel, init_packet,
++                             sizeof(struct nvsp_message),
++                             (unsigned long)init_packet,
++                             VM_PKT_DATA_INBAND,
++                             VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
++      if (ret)
++              goto out;
++      t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
++      if (t == 0) {
++              ret = -ETIMEDOUT;
++              goto out;
++      }
++      if (init_packet->msg.v5_msg.subchn_comp.status !=
++          NVSP_STAT_SUCCESS) {
++              ret = -ENODEV;
++              goto out;
++      }
++      net_device->num_chn = 1 +
++              init_packet->msg.v5_msg.subchn_comp.num_subchannels;
++
++      vmbus_are_subchannels_present(dev->channel);
++
++      ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
++
++out:
++      if (ret)
++              net_device->num_chn = 1;
++      return 0; /* return 0 because primary channel can be used alone */
+ err_dev_remv:
+       rndis_filter_device_remove(dev);
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0017-hyperv-Remove-recv_pkt_list-and-lock.patch b/src/patches/linux/0017-hyperv-Remove-recv_pkt_list-and-lock.patch
new file mode 100644 (file)
index 0000000..31465a1
--- /dev/null
@@ -0,0 +1,384 @@
+From d6bf5567c1438b4f3b1bcff1a1525ddb1754df19 Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 21 Apr 2014 14:54:43 -0700
+Subject: [PATCH 17/25] hyperv: Remove recv_pkt_list and lock
+
+Removed recv_pkt_list and lock, and updated related code, so that
+the locking overhead is reduced especially when multiple channels
+are in use.
+
+The recv_pkt_list isn't actually necessary because the packets are
+processed sequentially in each channel. It has been replaced by a
+local variable, and the related lock for this list is also removed.
+The is_data_pkt field is not used in receive path, so its assignment
+is cleaned up.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   |  33 --------
+ drivers/net/hyperv/netvsc.c       | 174 +++-----------------------------------
+ drivers/net/hyperv/netvsc_drv.c   |   2 +-
+ drivers/net/hyperv/rndis_filter.c |   2 -
+ 4 files changed, 13 insertions(+), 198 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 57eb3f906d64..a1af0f7711e2 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -119,27 +119,14 @@ struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
+ };
+ /* Fwd declaration */
+-struct hv_netvsc_packet;
+ struct ndis_tcp_ip_checksum_info;
+-/* Represent the xfer page packet which contains 1 or more netvsc packet */
+-struct xferpage_packet {
+-      struct list_head list_ent;
+-      u32 status;
+-
+-      /* # of netvsc packets this xfer packet contains */
+-      u32 count;
+-
+-      struct vmbus_channel *channel;
+-};
+-
+ /*
+  * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame
+  * within the RNDIS
+  */
+ struct hv_netvsc_packet {
+       /* Bookkeeping stuff */
+-      struct list_head list_ent;
+       u32 status;
+       struct hv_device *device;
+@@ -149,19 +136,8 @@ struct hv_netvsc_packet {
+       u16 q_idx;
+       struct vmbus_channel *channel;
+-      /*
+-       * Valid only for receives when we break a xfer page packet
+-       * into multiple netvsc packets
+-       */
+-      struct xferpage_packet *xfer_page_pkt;
+-
+       union {
+               struct {
+-                      u64 recv_completion_tid;
+-                      void *recv_completion_ctx;
+-                      void (*recv_completion)(void *context);
+-              } recv;
+-              struct {
+                       u64 send_completion_tid;
+                       void *send_completion_ctx;
+                       void (*send_completion)(void *context);
+@@ -613,9 +589,6 @@ struct nvsp_message {
+ #define NETVSC_RECEIVE_BUFFER_ID              0xcafe
+-/* Preallocated receive packets */
+-#define NETVSC_RECEIVE_PACKETLIST_COUNT               256
+-
+ #define NETVSC_PACKET_SIZE                      2048
+ #define VRSS_SEND_TAB_SIZE 16
+@@ -630,12 +603,6 @@ struct netvsc_device {
+       wait_queue_head_t wait_drain;
+       bool start_remove;
+       bool destroy;
+-      /*
+-       * List of free preallocated hv_netvsc_packet to represent receive
+-       * packet
+-       */
+-      struct list_head recv_pkt_list;
+-      spinlock_t recv_pkt_list_lock;
+       /* Receive buffer allocated by us but manages by NetVSP */
+       void *recv_buf;
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index e7e77f12bc38..b10334773b32 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -387,7 +387,6 @@ static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
+ int netvsc_device_remove(struct hv_device *device)
+ {
+       struct netvsc_device *net_device;
+-      struct hv_netvsc_packet *netvsc_packet, *pos;
+       unsigned long flags;
+       net_device = hv_get_drvdata(device);
+@@ -416,12 +415,6 @@ int netvsc_device_remove(struct hv_device *device)
+       vmbus_close(device->channel);
+       /* Release all resources */
+-      list_for_each_entry_safe(netvsc_packet, pos,
+-                               &net_device->recv_pkt_list, list_ent) {
+-              list_del(&netvsc_packet->list_ent);
+-              kfree(netvsc_packet);
+-      }
+-
+       if (net_device->sub_cb_buf)
+               vfree(net_device->sub_cb_buf);
+@@ -641,62 +634,6 @@ retry_send_cmplt:
+       }
+ }
+-/* Send a receive completion packet to RNDIS device (ie NetVsp) */
+-static void netvsc_receive_completion(void *context)
+-{
+-      struct hv_netvsc_packet *packet = context;
+-      struct hv_device *device = packet->device;
+-      struct vmbus_channel *channel;
+-      struct netvsc_device *net_device;
+-      u64 transaction_id = 0;
+-      bool fsend_receive_comp = false;
+-      unsigned long flags;
+-      struct net_device *ndev;
+-      u32 status = NVSP_STAT_NONE;
+-
+-      /*
+-       * Even though it seems logical to do a GetOutboundNetDevice() here to
+-       * send out receive completion, we are using GetInboundNetDevice()
+-       * since we may have disable outbound traffic already.
+-       */
+-      net_device = get_inbound_net_device(device);
+-      if (!net_device)
+-              return;
+-      ndev = net_device->ndev;
+-
+-      /* Overloading use of the lock. */
+-      spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
+-
+-      if (packet->status != NVSP_STAT_SUCCESS)
+-              packet->xfer_page_pkt->status = NVSP_STAT_FAIL;
+-
+-      packet->xfer_page_pkt->count--;
+-
+-      /*
+-       * Last one in the line that represent 1 xfer page packet.
+-       * Return the xfer page packet itself to the freelist
+-       */
+-      if (packet->xfer_page_pkt->count == 0) {
+-              fsend_receive_comp = true;
+-              channel = packet->xfer_page_pkt->channel;
+-              transaction_id = packet->completion.recv.recv_completion_tid;
+-              status = packet->xfer_page_pkt->status;
+-              list_add_tail(&packet->xfer_page_pkt->list_ent,
+-                            &net_device->recv_pkt_list);
+-
+-      }
+-
+-      /* Put the packet back */
+-      list_add_tail(&packet->list_ent, &net_device->recv_pkt_list);
+-      spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
+-
+-      /* Send a receive completion for the xfer page packet */
+-      if (fsend_receive_comp)
+-              netvsc_send_recv_completion(device, channel, net_device,
+-                                          transaction_id, status);
+-
+-}
+-
+ static void netvsc_receive(struct netvsc_device *net_device,
+                       struct vmbus_channel *channel,
+                       struct hv_device *device,
+@@ -704,16 +641,13 @@ static void netvsc_receive(struct netvsc_device *net_device,
+ {
+       struct vmtransfer_page_packet_header *vmxferpage_packet;
+       struct nvsp_message *nvsp_packet;
+-      struct hv_netvsc_packet *netvsc_packet = NULL;
+-      /* struct netvsc_driver *netvscDriver; */
+-      struct xferpage_packet *xferpage_packet = NULL;
++      struct hv_netvsc_packet nv_pkt;
++      struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
++      u32 status = NVSP_STAT_SUCCESS;
+       int i;
+       int count = 0;
+-      unsigned long flags;
+       struct net_device *ndev;
+-      LIST_HEAD(listHead);
+-
+       ndev = net_device->ndev;
+       /*
+@@ -746,78 +680,14 @@ static void netvsc_receive(struct netvsc_device *net_device,
+               return;
+       }
+-      /*
+-       * Grab free packets (range count + 1) to represent this xfer
+-       * page packet. +1 to represent the xfer page packet itself.
+-       * We grab it here so that we know exactly how many we can
+-       * fulfil
+-       */
+-      spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
+-      while (!list_empty(&net_device->recv_pkt_list)) {
+-              list_move_tail(net_device->recv_pkt_list.next, &listHead);
+-              if (++count == vmxferpage_packet->range_cnt + 1)
+-                      break;
+-      }
+-      spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
+-
+-      /*
+-       * We need at least 2 netvsc pkts (1 to represent the xfer
+-       * page and at least 1 for the range) i.e. we can handled
+-       * some of the xfer page packet ranges...
+-       */
+-      if (count < 2) {
+-              netdev_err(ndev, "Got only %d netvsc pkt...needed "
+-                      "%d pkts. Dropping this xfer page packet completely!\n",
+-                      count, vmxferpage_packet->range_cnt + 1);
+-
+-              /* Return it to the freelist */
+-              spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
+-              for (i = count; i != 0; i--) {
+-                      list_move_tail(listHead.next,
+-                                     &net_device->recv_pkt_list);
+-              }
+-              spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
+-                                     flags);
+-
+-              netvsc_send_recv_completion(device, channel, net_device,
+-                                          vmxferpage_packet->d.trans_id,
+-                                          NVSP_STAT_FAIL);
+-
+-              return;
+-      }
+-
+-      /* Remove the 1st packet to represent the xfer page packet itself */
+-      xferpage_packet = (struct xferpage_packet *)listHead.next;
+-      list_del(&xferpage_packet->list_ent);
+-      xferpage_packet->status = NVSP_STAT_SUCCESS;
+-      xferpage_packet->channel = channel;
+-
+-      /* This is how much we can satisfy */
+-      xferpage_packet->count = count - 1;
+-
+-      if (xferpage_packet->count != vmxferpage_packet->range_cnt) {
+-              netdev_err(ndev, "Needed %d netvsc pkts to satisfy "
+-                      "this xfer page...got %d\n",
+-                      vmxferpage_packet->range_cnt, xferpage_packet->count);
+-      }
++      count = vmxferpage_packet->range_cnt;
++      netvsc_packet->device = device;
++      netvsc_packet->channel = channel;
+       /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
+-      for (i = 0; i < (count - 1); i++) {
+-              netvsc_packet = (struct hv_netvsc_packet *)listHead.next;
+-              list_del(&netvsc_packet->list_ent);
+-
++      for (i = 0; i < count; i++) {
+               /* Initialize the netvsc packet */
+               netvsc_packet->status = NVSP_STAT_SUCCESS;
+-              netvsc_packet->xfer_page_pkt = xferpage_packet;
+-              netvsc_packet->completion.recv.recv_completion =
+-                                      netvsc_receive_completion;
+-              netvsc_packet->completion.recv.recv_completion_ctx =
+-                                      netvsc_packet;
+-              netvsc_packet->device = device;
+-              /* Save this so that we can send it back */
+-              netvsc_packet->completion.recv.recv_completion_tid =
+-                                      vmxferpage_packet->d.trans_id;
+-
+               netvsc_packet->data = (void *)((unsigned long)net_device->
+                       recv_buf + vmxferpage_packet->ranges[i].byte_offset);
+               netvsc_packet->total_data_buflen =
+@@ -826,10 +696,12 @@ static void netvsc_receive(struct netvsc_device *net_device,
+               /* Pass it to the upper layer */
+               rndis_filter_receive(device, netvsc_packet);
+-              netvsc_receive_completion(netvsc_packet->
+-                              completion.recv.recv_completion_ctx);
++              if (netvsc_packet->status != NVSP_STAT_SUCCESS)
++                      status = NVSP_STAT_FAIL;
+       }
++      netvsc_send_recv_completion(device, channel, net_device,
++                                  vmxferpage_packet->d.trans_id, status);
+ }
+@@ -956,11 +828,9 @@ void netvsc_channel_cb(void *context)
+ int netvsc_device_add(struct hv_device *device, void *additional_info)
+ {
+       int ret = 0;
+-      int i;
+       int ring_size =
+       ((struct netvsc_device_info *)additional_info)->ring_size;
+       struct netvsc_device *net_device;
+-      struct hv_netvsc_packet *packet, *pos;
+       struct net_device *ndev;
+       net_device = alloc_net_device(device);
+@@ -981,18 +851,6 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
+       ndev = net_device->ndev;
+       /* Initialize the NetVSC channel extension */
+-      spin_lock_init(&net_device->recv_pkt_list_lock);
+-
+-      INIT_LIST_HEAD(&net_device->recv_pkt_list);
+-
+-      for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
+-              packet = kzalloc(sizeof(struct hv_netvsc_packet), GFP_KERNEL);
+-              if (!packet)
+-                      break;
+-
+-              list_add_tail(&packet->list_ent,
+-                            &net_device->recv_pkt_list);
+-      }
+       init_completion(&net_device->channel_init_wait);
+       set_per_channel_state(device->channel, net_device->cb_buffer);
+@@ -1028,16 +886,8 @@ close:
+ cleanup:
+-      if (net_device) {
+-              list_for_each_entry_safe(packet, pos,
+-                                       &net_device->recv_pkt_list,
+-                                       list_ent) {
+-                      list_del(&packet->list_ent);
+-                      kfree(packet);
+-              }
+-
++      if (net_device)
+               kfree(net_device);
+-      }
+       return ret;
+ }
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index e486dbd33f61..6cc4db064fec 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -643,7 +643,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
+               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+                                      packet->vlan_tci);
+-      skb_record_rx_queue(skb, packet->xfer_page_pkt->channel->
++      skb_record_rx_queue(skb, packet->channel->
+                           offermsg.offer.sub_channel_index %
+                           net->real_num_rx_queues);
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index d92cfbe43410..48f5a0fbd674 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -401,8 +401,6 @@ static void rndis_filter_receive_data(struct rndis_device *dev,
+       pkt->total_data_buflen = rndis_pkt->data_len;
+       pkt->data = (void *)((unsigned long)pkt->data + data_offset);
+-      pkt->is_data_pkt = true;
+-
+       vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO);
+       if (vlan) {
+               pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid |
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0018-hyperv-Simplify-the-send_completion-variables.patch b/src/patches/linux/0018-hyperv-Simplify-the-send_completion-variables.patch
new file mode 100644 (file)
index 0000000..82b742b
--- /dev/null
@@ -0,0 +1,105 @@
+From d6eeeb452c1e6e7cf14f4a581a8f2ea2c50ec17a Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Mon, 21 Apr 2014 14:54:44 -0700
+Subject: [PATCH 18/25] hyperv: Simplify the send_completion variables
+
+The union contains only one member now, so we use the variables in it directly.
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h   | 10 +++-------
+ drivers/net/hyperv/netvsc.c       |  7 +++----
+ drivers/net/hyperv/netvsc_drv.c   |  8 ++++----
+ drivers/net/hyperv/rndis_filter.c |  2 +-
+ 4 files changed, 11 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index a1af0f7711e2..d1f7826aa75f 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -136,13 +136,9 @@ struct hv_netvsc_packet {
+       u16 q_idx;
+       struct vmbus_channel *channel;
+-      union {
+-              struct {
+-                      u64 send_completion_tid;
+-                      void *send_completion_ctx;
+-                      void (*send_completion)(void *context);
+-              } send;
+-      } completion;
++      u64 send_completion_tid;
++      void *send_completion_ctx;
++      void (*send_completion)(void *context);
+       /* This points to the memory after page_buf */
+       struct rndis_message *rndis_msg;
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index b10334773b32..bbee44635035 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -479,9 +479,8 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+               if (nvsc_packet) {
+                       q_idx = nvsc_packet->q_idx;
+                       channel = nvsc_packet->channel;
+-                      nvsc_packet->completion.send.send_completion(
+-                              nvsc_packet->completion.send.
+-                              send_completion_ctx);
++                      nvsc_packet->send_completion(nvsc_packet->
++                                                   send_completion_ctx);
+               }
+               num_outstanding_sends =
+@@ -534,7 +533,7 @@ int netvsc_send(struct hv_device *device,
+               0xFFFFFFFF;
+       sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+-      if (packet->completion.send.send_completion)
++      if (packet->send_completion)
+               req_id = (ulong)packet;
+       else
+               req_id = 0;
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 6cc4db064fec..f3d3525ed42d 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -235,7 +235,7 @@ static void netvsc_xmit_completion(void *context)
+ {
+       struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
+       struct sk_buff *skb = (struct sk_buff *)
+-              (unsigned long)packet->completion.send.send_completion_tid;
++              (unsigned long)packet->send_completion_tid;
+       kfree(packet);
+@@ -426,9 +426,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+                               (num_data_pgs * sizeof(struct hv_page_buffer)));
+       /* Set the completion routine */
+-      packet->completion.send.send_completion = netvsc_xmit_completion;
+-      packet->completion.send.send_completion_ctx = packet;
+-      packet->completion.send.send_completion_tid = (unsigned long)skb;
++      packet->send_completion = netvsc_xmit_completion;
++      packet->send_completion_ctx = packet;
++      packet->send_completion_tid = (unsigned long)skb;
+       isvlan = packet->vlan_tci & VLAN_TAG_PRESENT;
+diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
+index 48f5a0fbd674..99c527adae5b 100644
+--- a/drivers/net/hyperv/rndis_filter.c
++++ b/drivers/net/hyperv/rndis_filter.c
+@@ -236,7 +236,7 @@ static int rndis_filter_send_request(struct rndis_device *dev,
+                       packet->page_buf[0].len;
+       }
+-      packet->completion.send.send_completion = NULL;
++      packet->send_completion = NULL;
+       ret = netvsc_send(dev->net_dev->dev, packet);
+       return ret;
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0019-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch b/src/patches/linux/0019-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch
new file mode 100644 (file)
index 0000000..3940865
--- /dev/null
@@ -0,0 +1,407 @@
+From 4685e50349d5dc5fe485c898ca3ce539e93a0118 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Wed, 30 Apr 2014 10:14:31 -0700
+Subject: [PATCH 19/25] hyperv: Enable sendbuf mechanism on the send path
+
+We send packets using a copy-free mechanism (this is the Guest to Host transport
+via VMBUS). While this is obviously optimal for large packets,
+it may not be optimal for small packets. Hyper-V host supports
+a second mechanism for sending packets that is "copy based". We implement that
+mechanism in this patch.
+
+In this version of the patch I have addressed a comment from David Miller.
+
+With this patch (and all of the other offload and VRSS patches), we are now able
+to almost saturate a 10G interface between Linux VMs on Hyper-V
+on different hosts - close to  9 Gbps as measured via iperf.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h |  14 +++
+ drivers/net/hyperv/netvsc.c     | 226 ++++++++++++++++++++++++++++++++++++++--
+ drivers/net/hyperv/netvsc_drv.c |   3 +-
+ 3 files changed, 234 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index d1f7826aa75f..4b7df5a5c966 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -140,6 +140,8 @@ struct hv_netvsc_packet {
+       void *send_completion_ctx;
+       void (*send_completion)(void *context);
++      u32 send_buf_index;
++
+       /* This points to the memory after page_buf */
+       struct rndis_message *rndis_msg;
+@@ -582,6 +584,9 @@ struct nvsp_message {
+ #define NETVSC_RECEIVE_BUFFER_SIZE            (1024*1024*16)  /* 16MB */
+ #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY     (1024*1024*15)  /* 15MB */
++#define NETVSC_SEND_BUFFER_SIZE                       (1024 * 1024)   /* 1MB */
++#define NETVSC_INVALID_INDEX                  -1
++
+ #define NETVSC_RECEIVE_BUFFER_ID              0xcafe
+@@ -607,6 +612,15 @@ struct netvsc_device {
+       u32 recv_section_cnt;
+       struct nvsp_1_receive_buffer_section *recv_section;
++      /* Send buffer allocated by us */
++      void *send_buf;
++      u32 send_buf_size;
++      u32 send_buf_gpadl_handle;
++      u32 send_section_cnt;
++      u32 send_section_size;
++      unsigned long *send_section_map;
++      int map_words;
++
+       /* Used for NetVSP initialization protocol */
+       struct completion channel_init_wait;
+       struct nvsp_message channel_init_pkt;
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index bbee44635035..c041f63a6d30 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -28,6 +28,7 @@
+ #include <linux/slab.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_ether.h>
++#include <asm/sync_bitops.h>
+ #include "hyperv_net.h"
+@@ -80,7 +81,7 @@ get_in_err:
+ }
+-static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
++static int netvsc_destroy_buf(struct netvsc_device *net_device)
+ {
+       struct nvsp_message *revoke_packet;
+       int ret = 0;
+@@ -146,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
+               net_device->recv_section = NULL;
+       }
++      /* Deal with the send buffer we may have setup.
++       * If we got a  send section size, it means we received a
++       * SendsendBufferComplete msg (ie sent
++       * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
++       * to send a revoke msg here
++       */
++      if (net_device->send_section_size) {
++              /* Send the revoke receive buffer */
++              revoke_packet = &net_device->revoke_packet;
++              memset(revoke_packet, 0, sizeof(struct nvsp_message));
++
++              revoke_packet->hdr.msg_type =
++                      NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
++              revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0;
++
++              ret = vmbus_sendpacket(net_device->dev->channel,
++                                     revoke_packet,
++                                     sizeof(struct nvsp_message),
++                                     (unsigned long)revoke_packet,
++                                     VM_PKT_DATA_INBAND, 0);
++              /* If we failed here, we might as well return and
++               * have a leak rather than continue and a bugchk
++               */
++              if (ret != 0) {
++                      netdev_err(ndev, "unable to send "
++                                 "revoke send buffer to netvsp\n");
++                      return ret;
++              }
++      }
++      /* Teardown the gpadl on the vsp end */
++      if (net_device->send_buf_gpadl_handle) {
++              ret = vmbus_teardown_gpadl(net_device->dev->channel,
++                                         net_device->send_buf_gpadl_handle);
++
++              /* If we failed here, we might as well return and have a leak
++               * rather than continue and a bugchk
++               */
++              if (ret != 0) {
++                      netdev_err(ndev,
++                                 "unable to teardown send buffer's gpadl\n");
++                      return ret;
++              }
++              net_device->recv_buf_gpadl_handle = 0;
++      }
++      if (net_device->send_buf) {
++              /* Free up the receive buffer */
++              free_pages((unsigned long)net_device->send_buf,
++                         get_order(net_device->send_buf_size));
++              net_device->send_buf = NULL;
++      }
++      kfree(net_device->send_section_map);
++
+       return ret;
+ }
+-static int netvsc_init_recv_buf(struct hv_device *device)
++static int netvsc_init_buf(struct hv_device *device)
+ {
+       int ret = 0;
+       int t;
+@@ -248,10 +301,90 @@ static int netvsc_init_recv_buf(struct hv_device *device)
+               goto cleanup;
+       }
++      /* Now setup the send buffer.
++       */
++      net_device->send_buf =
++              (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
++                                       get_order(net_device->send_buf_size));
++      if (!net_device->send_buf) {
++              netdev_err(ndev, "unable to allocate send "
++                         "buffer of size %d\n", net_device->send_buf_size);
++              ret = -ENOMEM;
++              goto cleanup;
++      }
++
++      /* Establish the gpadl handle for this buffer on this
++       * channel.  Note: This call uses the vmbus connection rather
++       * than the channel to establish the gpadl handle.
++       */
++      ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
++                                  net_device->send_buf_size,
++                                  &net_device->send_buf_gpadl_handle);
++      if (ret != 0) {
++              netdev_err(ndev,
++                         "unable to establish send buffer's gpadl\n");
++              goto cleanup;
++      }
++
++      /* Notify the NetVsp of the gpadl handle */
++      init_packet = &net_device->channel_init_pkt;
++      memset(init_packet, 0, sizeof(struct nvsp_message));
++      init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
++      init_packet->msg.v1_msg.send_recv_buf.gpadl_handle =
++              net_device->send_buf_gpadl_handle;
++      init_packet->msg.v1_msg.send_recv_buf.id = 0;
++
++      /* Send the gpadl notification request */
++      ret = vmbus_sendpacket(device->channel, init_packet,
++                             sizeof(struct nvsp_message),
++                             (unsigned long)init_packet,
++                             VM_PKT_DATA_INBAND,
++                             VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
++      if (ret != 0) {
++              netdev_err(ndev,
++                         "unable to send send buffer's gpadl to netvsp\n");
++              goto cleanup;
++      }
++
++      t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
++      BUG_ON(t == 0);
++
++      /* Check the response */
++      if (init_packet->msg.v1_msg.
++          send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
++              netdev_err(ndev, "Unable to complete send buffer "
++                         "initialization with NetVsp - status %d\n",
++                         init_packet->msg.v1_msg.
++                         send_recv_buf_complete.status);
++              ret = -EINVAL;
++              goto cleanup;
++      }
++
++      /* Parse the response */
++      net_device->send_section_size = init_packet->msg.
++                              v1_msg.send_send_buf_complete.section_size;
++
++      /* Section count is simply the size divided by the section size.
++       */
++      net_device->send_section_cnt =
++              net_device->send_buf_size/net_device->send_section_size;
++
++      dev_info(&device->device, "Send section size: %d, Section count:%d\n",
++               net_device->send_section_size, net_device->send_section_cnt);
++
++      /* Setup state for managing the send buffer. */
++      net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
++                                           BITS_PER_LONG);
++
++      net_device->send_section_map =
++              kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
++      if (net_device->send_section_map == NULL)
++              goto cleanup;
++
+       goto exit;
+ cleanup:
+-      netvsc_destroy_recv_buf(net_device);
++      netvsc_destroy_buf(net_device);
+ exit:
+       return ret;
+@@ -369,8 +502,9 @@ static int netvsc_connect_vsp(struct hv_device *device)
+               net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
+       else
+               net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
++      net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
+-      ret = netvsc_init_recv_buf(device);
++      ret = netvsc_init_buf(device);
+ cleanup:
+       return ret;
+@@ -378,7 +512,7 @@ cleanup:
+ static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
+ {
+-      netvsc_destroy_recv_buf(net_device);
++      netvsc_destroy_buf(net_device);
+ }
+ /*
+@@ -440,6 +574,12 @@ static inline u32 hv_ringbuf_avail_percent(
+       return avail_write * 100 / ring_info->ring_datasize;
+ }
++static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
++                                       u32 index)
++{
++      sync_change_bit(index, net_device->send_section_map);
++}
++
+ static void netvsc_send_completion(struct netvsc_device *net_device,
+                                  struct hv_device *device,
+                                  struct vmpacket_descriptor *packet)
+@@ -447,6 +587,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+       struct nvsp_message *nvsp_packet;
+       struct hv_netvsc_packet *nvsc_packet;
+       struct net_device *ndev;
++      u32 send_index;
+       ndev = net_device->ndev;
+@@ -477,6 +618,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+               /* Notify the layer above us */
+               if (nvsc_packet) {
++                      send_index = nvsc_packet->send_buf_index;
++                      if (send_index != NETVSC_INVALID_INDEX)
++                              netvsc_free_send_slot(net_device, send_index);
+                       q_idx = nvsc_packet->q_idx;
+                       channel = nvsc_packet->channel;
+                       nvsc_packet->send_completion(nvsc_packet->
+@@ -504,6 +648,52 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
+ }
++static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
++{
++      unsigned long index;
++      u32 max_words = net_device->map_words;
++      unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
++      u32 section_cnt = net_device->send_section_cnt;
++      int ret_val = NETVSC_INVALID_INDEX;
++      int i;
++      int prev_val;
++
++      for (i = 0; i < max_words; i++) {
++              if (!~(map_addr[i]))
++                      continue;
++              index = ffz(map_addr[i]);
++              prev_val = sync_test_and_set_bit(index, &map_addr[i]);
++              if (prev_val)
++                      continue;
++              if ((index + (i * BITS_PER_LONG)) >= section_cnt)
++                      break;
++              ret_val = (index + (i * BITS_PER_LONG));
++              break;
++      }
++      return ret_val;
++}
++
++u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
++                          unsigned int section_index,
++                          struct hv_netvsc_packet *packet)
++{
++      char *start = net_device->send_buf;
++      char *dest = (start + (section_index * net_device->send_section_size));
++      int i;
++      u32 msg_size = 0;
++
++      for (i = 0; i < packet->page_buf_cnt; i++) {
++              char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
++              u32 offset = packet->page_buf[i].offset;
++              u32 len = packet->page_buf[i].len;
++
++              memcpy(dest, (src + offset), len);
++              msg_size += len;
++              dest += len;
++      }
++      return msg_size;
++}
++
+ int netvsc_send(struct hv_device *device,
+                       struct hv_netvsc_packet *packet)
+ {
+@@ -513,6 +703,10 @@ int netvsc_send(struct hv_device *device,
+       struct net_device *ndev;
+       struct vmbus_channel *out_channel = NULL;
+       u64 req_id;
++      unsigned int section_index = NETVSC_INVALID_INDEX;
++      u32 msg_size = 0;
++      struct sk_buff *skb;
++
+       net_device = get_outbound_net_device(device);
+       if (!net_device)
+@@ -528,10 +722,26 @@ int netvsc_send(struct hv_device *device,
+               sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
+       }
+-      /* Not using send buffer section */
++      /* Attempt to send via sendbuf */
++      if (packet->total_data_buflen < net_device->send_section_size) {
++              section_index = netvsc_get_next_send_section(net_device);
++              if (section_index != NETVSC_INVALID_INDEX) {
++                      msg_size = netvsc_copy_to_send_buf(net_device,
++                                                         section_index,
++                                                         packet);
++                      skb = (struct sk_buff *)
++                            (unsigned long)packet->send_completion_tid;
++                      if (skb)
++                              dev_kfree_skb_any(skb);
++                      packet->page_buf_cnt = 0;
++              }
++      }
++      packet->send_buf_index = section_index;
++
++
+       sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
+-              0xFFFFFFFF;
+-      sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
++              section_index;
++      sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
+       if (packet->send_completion)
+               req_id = (ulong)packet;
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index f3d3525ed42d..9a19aa5672e6 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -236,10 +236,11 @@ static void netvsc_xmit_completion(void *context)
+       struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
+       struct sk_buff *skb = (struct sk_buff *)
+               (unsigned long)packet->send_completion_tid;
++      u32 index = packet->send_buf_index;
+       kfree(packet);
+-      if (skb)
++      if (skb && (index == NETVSC_INVALID_INDEX))
+               dev_kfree_skb_any(skb);
+ }
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0020-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch b/src/patches/linux/0020-Add-support-for-netvsc-build-without-CONFIG_SYSFS-fl.patch
new file mode 100644 (file)
index 0000000..b610b54
--- /dev/null
@@ -0,0 +1,42 @@
+From 3f0b77385356301cf4718a94f76a4068588ecb8e Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Thu, 8 May 2014 15:14:10 -0700
+Subject: [PATCH 20/25] Add support for netvsc build without CONFIG_SYSFS flag
+
+This change ensures the driver can be built successfully without the
+CONFIG_SYSFS flag.
+MS-TFS: 182270
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc_drv.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 9a19aa5672e6..346f1aeb9c24 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -645,8 +645,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
+                                      packet->vlan_tci);
+       skb_record_rx_queue(skb, packet->channel->
+-                          offermsg.offer.sub_channel_index %
+-                          net->real_num_rx_queues);
++                          offermsg.offer.sub_channel_index);
+       net->stats.rx_packets++;
+       net->stats.rx_bytes += packet->total_data_buflen;
+@@ -829,8 +828,6 @@ static int netvsc_probe(struct hv_device *dev,
+       nvdev = hv_get_drvdata(dev);
+       netif_set_real_num_tx_queues(net, nvdev->num_chn);
+       netif_set_real_num_rx_queues(net, nvdev->num_chn);
+-      dev_info(&dev->device, "real num tx,rx queues:%u, %u\n",
+-               net->real_num_tx_queues, net->real_num_rx_queues);
+       ret = register_netdev(net);
+       if (ret != 0) {
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0021-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch b/src/patches/linux/0021-hyperv-Add-hash-value-into-RNDIS-Per-packet-info.patch
new file mode 100644 (file)
index 0000000..c522d28
--- /dev/null
@@ -0,0 +1,93 @@
+From 1267d9b235e7612f8cdfa842a0433bb82e75f7fb Mon Sep 17 00:00:00 2001
+From: Haiyang Zhang <haiyangz@microsoft.com>
+Date: Wed, 21 May 2014 12:55:39 -0700
+Subject: [PATCH 21/25] hyperv: Add hash value into RNDIS Per-packet info
+
+It passes the hash value as the RNDIS Per-packet info to the Hyper-V host,
+so that the send completion notices can be spread across multiple channels.
+MS-TFS: 140273
+
+Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/hyperv_net.h |  4 ++++
+ drivers/net/hyperv/netvsc_drv.c | 18 ++++++++++++++----
+ 2 files changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
+index 4b7df5a5c966..6cc37c15e0bf 100644
+--- a/drivers/net/hyperv/hyperv_net.h
++++ b/drivers/net/hyperv/hyperv_net.h
+@@ -791,6 +791,7 @@ enum ndis_per_pkt_info_type {
+       IEEE_8021Q_INFO,
+       ORIGINAL_PKTINFO,
+       PACKET_CANCEL_ID,
++      NBL_HASH_VALUE = PACKET_CANCEL_ID,
+       ORIGINAL_NET_BUFLIST,
+       CACHED_NET_BUFLIST,
+       SHORT_PKT_PADINFO,
+@@ -937,6 +938,9 @@ struct ndis_tcp_lso_info {
+ #define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+               sizeof(struct ndis_tcp_lso_info))
++#define NDIS_HASH_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
++              sizeof(u32))
++
+ /* Format of Information buffer passed in a SetRequest for the OID */
+ /* OID_GEN_RNDIS_CONFIG_PARAMETER. */
+ struct rndis_config_parameter_info {
+diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
+index 346f1aeb9c24..bd3b3acd04dc 100644
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -224,9 +224,11 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+       if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
+               return 0;
+-      if (netvsc_set_hash(&hash, skb))
++      if (netvsc_set_hash(&hash, skb)) {
+               q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
+                       ndev->real_num_tx_queues;
++              skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
++      }
+       return q_idx;
+ }
+@@ -385,6 +387,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       struct ndis_tcp_lso_info *lso_info;
+       int  hdr_offset;
+       u32 net_trans_info;
++      u32 hash;
+       /* We will atmost need two pages to describe the rndis
+@@ -403,9 +406,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       packet = kzalloc(sizeof(struct hv_netvsc_packet) +
+                        (num_data_pgs * sizeof(struct hv_page_buffer)) +
+                        sizeof(struct rndis_message) +
+-                       NDIS_VLAN_PPI_SIZE +
+-                       NDIS_CSUM_PPI_SIZE +
+-                       NDIS_LSO_PPI_SIZE, GFP_ATOMIC);
++                       NDIS_VLAN_PPI_SIZE + NDIS_CSUM_PPI_SIZE +
++                       NDIS_LSO_PPI_SIZE + NDIS_HASH_PPI_SIZE, GFP_ATOMIC);
+       if (!packet) {
+               /* out of memory, drop packet */
+               netdev_err(net, "unable to allocate hv_netvsc_packet\n");
+@@ -444,6 +446,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+       rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
++      hash = skb_get_hash_raw(skb);
++      if (hash != 0 && net->real_num_tx_queues > 1) {
++              rndis_msg_size += NDIS_HASH_PPI_SIZE;
++              ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
++                                  NBL_HASH_VALUE);
++              *(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
++      }
++
+       if (isvlan) {
+               struct ndis_pkt_8021q_info *vlan;
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0022-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch b/src/patches/linux/0022-hyperv-fix-apparent-cut-n-paste-error-in-send-path-t.patch
new file mode 100644 (file)
index 0000000..d849ebb
--- /dev/null
@@ -0,0 +1,32 @@
+From ee99150c7f34737e4382e7199d2ffe3dfbb54a5c Mon Sep 17 00:00:00 2001
+From: Dave Jones <davej@redhat.com>
+Date: Mon, 16 Jun 2014 16:59:02 -0400
+Subject: [PATCH 22/25] hyperv: fix apparent cut-n-paste error in send path
+ teardown
+
+c25aaf814a63: "hyperv: Enable sendbuf mechanism on the send path" added
+some teardown code that looks like it was copied from the recieve path
+above, but missed a variable name replacement.
+
+Signed-off-by: Dave Jones <davej@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index c041f63a6d30..4ed38eaecea8 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -189,7 +189,7 @@ static int netvsc_destroy_buf(struct netvsc_device *net_device)
+                                  "unable to teardown send buffer's gpadl\n");
+                       return ret;
+               }
+-              net_device->recv_buf_gpadl_handle = 0;
++              net_device->send_buf_gpadl_handle = 0;
+       }
+       if (net_device->send_buf) {
+               /* Free up the receive buffer */
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0023-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch b/src/patches/linux/0023-hyperv-Fix-error-return-code-in-netvsc_init_buf.patch
new file mode 100644 (file)
index 0000000..ab0960d
--- /dev/null
@@ -0,0 +1,34 @@
+From f1009dfec7c439a958bd9ed8893dd6aa692c61f5 Mon Sep 17 00:00:00 2001
+From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
+Date: Wed, 23 Jul 2014 09:00:35 +0800
+Subject: [PATCH 23/25] hyperv: Fix error return code in netvsc_init_buf()
+
+Fix to return -ENOMEM from the kalloc error handling
+case instead of 0.
+
+Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/hyperv/netvsc.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index 4ed38eaecea8..d97d5f39a04e 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -378,8 +378,10 @@ static int netvsc_init_buf(struct hv_device *device)
+       net_device->send_section_map =
+               kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
+-      if (net_device->send_section_map == NULL)
++      if (net_device->send_section_map == NULL) {
++              ret = -ENOMEM;
+               goto cleanup;
++      }
+       goto exit;
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0024-hyperv-Fix-a-bug-in-netvsc_send.patch b/src/patches/linux/0024-hyperv-Fix-a-bug-in-netvsc_send.patch
new file mode 100644 (file)
index 0000000..8495fa2
--- /dev/null
@@ -0,0 +1,68 @@
+From 38bca2d5bd6fdaa0b8e1e415f79d89322c6825a8 Mon Sep 17 00:00:00 2001
+From: KY Srinivasan <kys@microsoft.com>
+Date: Sun, 5 Oct 2014 10:42:51 -0700
+Subject: [PATCH 24/25] hyperv: Fix a bug in netvsc_send()
+
+[ Upstream commit 3a67c9ccad926a168d8b7891537a452018368a5b ]
+
+After the packet is successfully sent, we should not touch the packet
+as it may have been freed. This patch is based on the work done by
+Long Li <longli@microsoft.com>.
+
+David, please queue this up for stable.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Reported-by: Sitsofe Wheeler <sitsofe@yahoo.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
+index d97d5f39a04e..7edf976ecfa0 100644
+--- a/drivers/net/hyperv/netvsc.c
++++ b/drivers/net/hyperv/netvsc.c
+@@ -708,6 +708,7 @@ int netvsc_send(struct hv_device *device,
+       unsigned int section_index = NETVSC_INVALID_INDEX;
+       u32 msg_size = 0;
+       struct sk_buff *skb;
++      u16 q_idx = packet->q_idx;
+       net_device = get_outbound_net_device(device);
+@@ -772,24 +773,24 @@ int netvsc_send(struct hv_device *device,
+       if (ret == 0) {
+               atomic_inc(&net_device->num_outstanding_sends);
+-              atomic_inc(&net_device->queue_sends[packet->q_idx]);
++              atomic_inc(&net_device->queue_sends[q_idx]);
+               if (hv_ringbuf_avail_percent(&out_channel->outbound) <
+                       RING_AVAIL_PERCENT_LOWATER) {
+                       netif_tx_stop_queue(netdev_get_tx_queue(
+-                                          ndev, packet->q_idx));
++                                          ndev, q_idx));
+                       if (atomic_read(&net_device->
+-                              queue_sends[packet->q_idx]) < 1)
++                              queue_sends[q_idx]) < 1)
+                               netif_tx_wake_queue(netdev_get_tx_queue(
+-                                                  ndev, packet->q_idx));
++                                                  ndev, q_idx));
+               }
+       } else if (ret == -EAGAIN) {
+               netif_tx_stop_queue(netdev_get_tx_queue(
+-                                  ndev, packet->q_idx));
+-              if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
++                                  ndev, q_idx));
++              if (atomic_read(&net_device->queue_sends[q_idx]) < 1) {
+                       netif_tx_wake_queue(netdev_get_tx_queue(
+-                                          ndev, packet->q_idx));
++                                          ndev, q_idx));
+                       ret = -ENOSPC;
+               }
+       } else {
+-- 
+2.4.3
+
diff --git a/src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch b/src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch
new file mode 100644 (file)
index 0000000..47037be
--- /dev/null
@@ -0,0 +1,51 @@
+From 4b71288758aa1d510402b84ca93b1ef566575d22 Mon Sep 17 00:00:00 2001
+From: "K. Y. Srinivasan" <kys@microsoft.com>
+Date: Mon, 3 Feb 2014 12:42:45 -0800
+Subject: [PATCH 25/25] Drivers: hv: vmbus: Support per-channel driver state
+
+As we implement Virtual Receive Side Scaling on the networking side
+(the VRSS patches are currently under review), it will be useful to have
+per-channel state that vmbus drivers can manage. Add support for
+managing per-channel state.
+
+Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hyperv.h | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
+index 6088058a3e00..732dc7e37e96 100644
+--- a/include/linux/hyperv.h
++++ b/include/linux/hyperv.h
+@@ -1045,6 +1045,10 @@ struct vmbus_channel {
+        * This will be NULL for the primary channel.
+        */
+       struct vmbus_channel *primary_channel;
++      /*
++       * Support per-channel state for use by vmbus drivers.
++       */
++      void *per_channel_state;
+ };
+ static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
+@@ -1052,6 +1056,16 @@ static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
+       c->batched_reading = state;
+ }
++static inline void set_per_channel_state(struct vmbus_channel *c, void *s)
++{
++      c->per_channel_state = s;
++}
++
++static inline void *get_per_channel_state(struct vmbus_channel *c)
++{
++      return c->per_channel_state;
++}
++
+ void vmbus_onmessage(void *context);
+ int vmbus_request_offers(void);
+-- 
+2.4.3
+