1 From c25aaf814a63f9d9c4e45416f13d70ef0aa0be2e Mon Sep 17 00:00:00 2001
2 From: KY Srinivasan <kys@microsoft.com>
3 Date: Wed, 30 Apr 2014 10:14:31 -0700
4 Subject: [PATCH 04/11] hyperv: Enable sendbuf mechanism on the send path
6 We send packets using a copy-free mechanism (this is the Guest to Host transport
7 via VMBUS). While this is obviously optimal for large packets,
8 it may not be optimal for small packets. Hyper-V host supports
9 a second mechanism for sending packets that is "copy based". We implement that
10 mechanism in this patch.
12 In this version of the patch I have addressed a comment from David Miller.
14 With this patch (and all of the other offload and VRSS patches), we are now able
15 to almost saturate a 10G interface between Linux VMs on Hyper-V
16 on different hosts - close to 9 Gbps as measured via iperf.
18 Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
19 Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
20 Signed-off-by: David S. Miller <davem@davemloft.net>
22 drivers/net/hyperv/hyperv_net.h | 14 +++
23 drivers/net/hyperv/netvsc.c | 226 ++++++++++++++++++++++++++++++++++++++--
24 drivers/net/hyperv/netvsc_drv.c | 3 +-
25 3 files changed, 234 insertions(+), 9 deletions(-)
27 diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
28 index d1f7826aa75f..4b7df5a5c966 100644
29 --- a/drivers/net/hyperv/hyperv_net.h
30 +++ b/drivers/net/hyperv/hyperv_net.h
31 @@ -140,6 +140,8 @@ struct hv_netvsc_packet {
32 void *send_completion_ctx;
33 void (*send_completion)(void *context);
37 /* This points to the memory after page_buf */
38 struct rndis_message *rndis_msg;
40 @@ -582,6 +584,9 @@ struct nvsp_message {
42 #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */
43 #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */
44 +#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024) /* 1MB */
45 +#define NETVSC_INVALID_INDEX -1
48 #define NETVSC_RECEIVE_BUFFER_ID 0xcafe
50 @@ -607,6 +612,15 @@ struct netvsc_device {
52 struct nvsp_1_receive_buffer_section *recv_section;
54 + /* Send buffer allocated by us */
57 + u32 send_buf_gpadl_handle;
58 + u32 send_section_cnt;
59 + u32 send_section_size;
60 + unsigned long *send_section_map;
63 /* Used for NetVSP initialization protocol */
64 struct completion channel_init_wait;
65 struct nvsp_message channel_init_pkt;
66 diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
67 index bbee44635035..c041f63a6d30 100644
68 --- a/drivers/net/hyperv/netvsc.c
69 +++ b/drivers/net/hyperv/netvsc.c
71 #include <linux/slab.h>
72 #include <linux/netdevice.h>
73 #include <linux/if_ether.h>
74 +#include <asm/sync_bitops.h>
76 #include "hyperv_net.h"
78 @@ -80,7 +81,7 @@ get_in_err:
82 -static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
83 +static int netvsc_destroy_buf(struct netvsc_device *net_device)
85 struct nvsp_message *revoke_packet;
87 @@ -146,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
88 net_device->recv_section = NULL;
91 + /* Deal with the send buffer we may have setup.
92 + * If we got a send section size, it means we received a
93 + * SendsendBufferComplete msg (ie sent
94 + * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
95 + * to send a revoke msg here
97 + if (net_device->send_section_size) {
98 + /* Send the revoke receive buffer */
99 + revoke_packet = &net_device->revoke_packet;
100 + memset(revoke_packet, 0, sizeof(struct nvsp_message));
102 + revoke_packet->hdr.msg_type =
103 + NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
104 + revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0;
106 + ret = vmbus_sendpacket(net_device->dev->channel,
108 + sizeof(struct nvsp_message),
109 + (unsigned long)revoke_packet,
110 + VM_PKT_DATA_INBAND, 0);
111 + /* If we failed here, we might as well return and
112 + * have a leak rather than continue and a bugchk
115 + netdev_err(ndev, "unable to send "
116 + "revoke send buffer to netvsp\n");
120 + /* Teardown the gpadl on the vsp end */
121 + if (net_device->send_buf_gpadl_handle) {
122 + ret = vmbus_teardown_gpadl(net_device->dev->channel,
123 + net_device->send_buf_gpadl_handle);
125 + /* If we failed here, we might as well return and have a leak
126 + * rather than continue and a bugchk
130 + "unable to teardown send buffer's gpadl\n");
133 + net_device->recv_buf_gpadl_handle = 0;
135 + if (net_device->send_buf) {
136 + /* Free up the receive buffer */
137 + free_pages((unsigned long)net_device->send_buf,
138 + get_order(net_device->send_buf_size));
139 + net_device->send_buf = NULL;
141 + kfree(net_device->send_section_map);
146 -static int netvsc_init_recv_buf(struct hv_device *device)
147 +static int netvsc_init_buf(struct hv_device *device)
151 @@ -248,10 +301,90 @@ static int netvsc_init_recv_buf(struct hv_device *device)
155 + /* Now setup the send buffer.
157 + net_device->send_buf =
158 + (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
159 + get_order(net_device->send_buf_size));
160 + if (!net_device->send_buf) {
161 + netdev_err(ndev, "unable to allocate send "
162 + "buffer of size %d\n", net_device->send_buf_size);
167 + /* Establish the gpadl handle for this buffer on this
168 + * channel. Note: This call uses the vmbus connection rather
169 + * than the channel to establish the gpadl handle.
171 + ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
172 + net_device->send_buf_size,
173 + &net_device->send_buf_gpadl_handle);
176 + "unable to establish send buffer's gpadl\n");
180 + /* Notify the NetVsp of the gpadl handle */
181 + init_packet = &net_device->channel_init_pkt;
182 + memset(init_packet, 0, sizeof(struct nvsp_message));
183 + init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
184 + init_packet->msg.v1_msg.send_recv_buf.gpadl_handle =
185 + net_device->send_buf_gpadl_handle;
186 + init_packet->msg.v1_msg.send_recv_buf.id = 0;
188 + /* Send the gpadl notification request */
189 + ret = vmbus_sendpacket(device->channel, init_packet,
190 + sizeof(struct nvsp_message),
191 + (unsigned long)init_packet,
192 + VM_PKT_DATA_INBAND,
193 + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
196 + "unable to send send buffer's gpadl to netvsp\n");
200 + t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
203 + /* Check the response */
204 + if (init_packet->msg.v1_msg.
205 + send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
206 + netdev_err(ndev, "Unable to complete send buffer "
207 + "initialization with NetVsp - status %d\n",
208 + init_packet->msg.v1_msg.
209 + send_recv_buf_complete.status);
214 + /* Parse the response */
215 + net_device->send_section_size = init_packet->msg.
216 + v1_msg.send_send_buf_complete.section_size;
218 + /* Section count is simply the size divided by the section size.
220 + net_device->send_section_cnt =
221 + net_device->send_buf_size/net_device->send_section_size;
223 + dev_info(&device->device, "Send section size: %d, Section count:%d\n",
224 + net_device->send_section_size, net_device->send_section_cnt);
226 + /* Setup state for managing the send buffer. */
227 + net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
230 + net_device->send_section_map =
231 + kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
232 + if (net_device->send_section_map == NULL)
238 - netvsc_destroy_recv_buf(net_device);
239 + netvsc_destroy_buf(net_device);
243 @@ -369,8 +502,9 @@ static int netvsc_connect_vsp(struct hv_device *device)
244 net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
246 net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
247 + net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
249 - ret = netvsc_init_recv_buf(device);
250 + ret = netvsc_init_buf(device);
254 @@ -378,7 +512,7 @@ cleanup:
256 static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
258 - netvsc_destroy_recv_buf(net_device);
259 + netvsc_destroy_buf(net_device);
263 @@ -440,6 +574,12 @@ static inline u32 hv_ringbuf_avail_percent(
264 return avail_write * 100 / ring_info->ring_datasize;
267 +static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
270 + sync_change_bit(index, net_device->send_section_map);
273 static void netvsc_send_completion(struct netvsc_device *net_device,
274 struct hv_device *device,
275 struct vmpacket_descriptor *packet)
276 @@ -447,6 +587,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
277 struct nvsp_message *nvsp_packet;
278 struct hv_netvsc_packet *nvsc_packet;
279 struct net_device *ndev;
282 ndev = net_device->ndev;
284 @@ -477,6 +618,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
286 /* Notify the layer above us */
288 + send_index = nvsc_packet->send_buf_index;
289 + if (send_index != NETVSC_INVALID_INDEX)
290 + netvsc_free_send_slot(net_device, send_index);
291 q_idx = nvsc_packet->q_idx;
292 channel = nvsc_packet->channel;
293 nvsc_packet->send_completion(nvsc_packet->
294 @@ -504,6 +648,52 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
298 +static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
300 + unsigned long index;
301 + u32 max_words = net_device->map_words;
302 + unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
303 + u32 section_cnt = net_device->send_section_cnt;
304 + int ret_val = NETVSC_INVALID_INDEX;
308 + for (i = 0; i < max_words; i++) {
309 + if (!~(map_addr[i]))
311 + index = ffz(map_addr[i]);
312 + prev_val = sync_test_and_set_bit(index, &map_addr[i]);
315 + if ((index + (i * BITS_PER_LONG)) >= section_cnt)
317 + ret_val = (index + (i * BITS_PER_LONG));
323 +u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
324 + unsigned int section_index,
325 + struct hv_netvsc_packet *packet)
327 + char *start = net_device->send_buf;
328 + char *dest = (start + (section_index * net_device->send_section_size));
332 + for (i = 0; i < packet->page_buf_cnt; i++) {
333 + char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
334 + u32 offset = packet->page_buf[i].offset;
335 + u32 len = packet->page_buf[i].len;
337 + memcpy(dest, (src + offset), len);
344 int netvsc_send(struct hv_device *device,
345 struct hv_netvsc_packet *packet)
347 @@ -513,6 +703,10 @@ int netvsc_send(struct hv_device *device,
348 struct net_device *ndev;
349 struct vmbus_channel *out_channel = NULL;
351 + unsigned int section_index = NETVSC_INVALID_INDEX;
353 + struct sk_buff *skb;
356 net_device = get_outbound_net_device(device);
358 @@ -528,10 +722,26 @@ int netvsc_send(struct hv_device *device,
359 sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
362 - /* Not using send buffer section */
363 + /* Attempt to send via sendbuf */
364 + if (packet->total_data_buflen < net_device->send_section_size) {
365 + section_index = netvsc_get_next_send_section(net_device);
366 + if (section_index != NETVSC_INVALID_INDEX) {
367 + msg_size = netvsc_copy_to_send_buf(net_device,
370 + skb = (struct sk_buff *)
371 + (unsigned long)packet->send_completion_tid;
373 + dev_kfree_skb_any(skb);
374 + packet->page_buf_cnt = 0;
377 + packet->send_buf_index = section_index;
380 sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
382 - sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
384 + sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
386 if (packet->send_completion)
387 req_id = (ulong)packet;
388 diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
389 index c76b66515e92..939e3af60ec4 100644
390 --- a/drivers/net/hyperv/netvsc_drv.c
391 +++ b/drivers/net/hyperv/netvsc_drv.c
392 @@ -236,10 +236,11 @@ static void netvsc_xmit_completion(void *context)
393 struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
394 struct sk_buff *skb = (struct sk_buff *)
395 (unsigned long)packet->send_completion_tid;
396 + u32 index = packet->send_buf_index;
401 + if (skb && (index == NETVSC_INVALID_INDEX))
402 dev_kfree_skb_any(skb);