]> git.ipfire.org Git - ipfire-2.x.git/blob - src/patches/linux/0004-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch
core117: Regenerate language cache
[ipfire-2.x.git] / src / patches / linux / 0004-hyperv-Enable-sendbuf-mechanism-on-the-send-path.patch
1 From c25aaf814a63f9d9c4e45416f13d70ef0aa0be2e Mon Sep 17 00:00:00 2001
2 From: KY Srinivasan <kys@microsoft.com>
3 Date: Wed, 30 Apr 2014 10:14:31 -0700
4 Subject: [PATCH 04/11] hyperv: Enable sendbuf mechanism on the send path
5
6 We send packets using a copy-free mechanism (this is the Guest to Host transport
7 via VMBUS). While this is obviously optimal for large packets,
8 it may not be optimal for small packets. Hyper-V host supports
9 a second mechanism for sending packets that is "copy based". We implement that
10 mechanism in this patch.
11
12 In this version of the patch I have addressed a comment from David Miller.
13
14 With this patch (and all of the other offload and VRSS patches), we are now able
15 to almost saturate a 10G interface between Linux VMs on Hyper-V
16 on different hosts - close to 9 Gbps as measured via iperf.
17
18 Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
19 Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
20 Signed-off-by: David S. Miller <davem@davemloft.net>
21 ---
22 drivers/net/hyperv/hyperv_net.h | 14 +++
23 drivers/net/hyperv/netvsc.c | 226 ++++++++++++++++++++++++++++++++++++++--
24 drivers/net/hyperv/netvsc_drv.c | 3 +-
25 3 files changed, 234 insertions(+), 9 deletions(-)
26
27 diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
28 index d1f7826aa75f..4b7df5a5c966 100644
29 --- a/drivers/net/hyperv/hyperv_net.h
30 +++ b/drivers/net/hyperv/hyperv_net.h
31 @@ -140,6 +140,8 @@ struct hv_netvsc_packet {
32 void *send_completion_ctx;
33 void (*send_completion)(void *context);
34
35 + u32 send_buf_index;
36 +
37 /* This points to the memory after page_buf */
38 struct rndis_message *rndis_msg;
39
40 @@ -582,6 +584,9 @@ struct nvsp_message {
41
42 #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */
43 #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */
44 +#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024) /* 1MB */
45 +#define NETVSC_INVALID_INDEX -1
46 +
47
48 #define NETVSC_RECEIVE_BUFFER_ID 0xcafe
49
50 @@ -607,6 +612,15 @@ struct netvsc_device {
51 u32 recv_section_cnt;
52 struct nvsp_1_receive_buffer_section *recv_section;
53
54 + /* Send buffer allocated by us */
55 + void *send_buf;
56 + u32 send_buf_size;
57 + u32 send_buf_gpadl_handle;
58 + u32 send_section_cnt;
59 + u32 send_section_size;
60 + unsigned long *send_section_map;
61 + int map_words;
62 +
63 /* Used for NetVSP initialization protocol */
64 struct completion channel_init_wait;
65 struct nvsp_message channel_init_pkt;
66 diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
67 index bbee44635035..c041f63a6d30 100644
68 --- a/drivers/net/hyperv/netvsc.c
69 +++ b/drivers/net/hyperv/netvsc.c
70 @@ -28,6 +28,7 @@
71 #include <linux/slab.h>
72 #include <linux/netdevice.h>
73 #include <linux/if_ether.h>
74 +#include <asm/sync_bitops.h>
75
76 #include "hyperv_net.h"
77
78 @@ -80,7 +81,7 @@ get_in_err:
79 }
80
81
82 -static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
83 +static int netvsc_destroy_buf(struct netvsc_device *net_device)
84 {
85 struct nvsp_message *revoke_packet;
86 int ret = 0;
87 @@ -146,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
88 net_device->recv_section = NULL;
89 }
90
91 + /* Deal with the send buffer we may have setup.
92 + * If we got a send section size, it means we received a
93 + * SendsendBufferComplete msg (ie sent
94 + * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
95 + * to send a revoke msg here
96 + */
97 + if (net_device->send_section_size) {
98 + /* Send the revoke receive buffer */
99 + revoke_packet = &net_device->revoke_packet;
100 + memset(revoke_packet, 0, sizeof(struct nvsp_message));
101 +
102 + revoke_packet->hdr.msg_type =
103 + NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
104 + revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0;
105 +
106 + ret = vmbus_sendpacket(net_device->dev->channel,
107 + revoke_packet,
108 + sizeof(struct nvsp_message),
109 + (unsigned long)revoke_packet,
110 + VM_PKT_DATA_INBAND, 0);
111 + /* If we failed here, we might as well return and
112 + * have a leak rather than continue and a bugchk
113 + */
114 + if (ret != 0) {
115 + netdev_err(ndev, "unable to send "
116 + "revoke send buffer to netvsp\n");
117 + return ret;
118 + }
119 + }
120 + /* Teardown the gpadl on the vsp end */
121 + if (net_device->send_buf_gpadl_handle) {
122 + ret = vmbus_teardown_gpadl(net_device->dev->channel,
123 + net_device->send_buf_gpadl_handle);
124 +
125 + /* If we failed here, we might as well return and have a leak
126 + * rather than continue and a bugchk
127 + */
128 + if (ret != 0) {
129 + netdev_err(ndev,
130 + "unable to teardown send buffer's gpadl\n");
131 + return ret;
132 + }
133 + net_device->recv_buf_gpadl_handle = 0;
134 + }
135 + if (net_device->send_buf) {
136 + /* Free up the receive buffer */
137 + free_pages((unsigned long)net_device->send_buf,
138 + get_order(net_device->send_buf_size));
139 + net_device->send_buf = NULL;
140 + }
141 + kfree(net_device->send_section_map);
142 +
143 return ret;
144 }
145
146 -static int netvsc_init_recv_buf(struct hv_device *device)
147 +static int netvsc_init_buf(struct hv_device *device)
148 {
149 int ret = 0;
150 int t;
151 @@ -248,10 +301,90 @@ static int netvsc_init_recv_buf(struct hv_device *device)
152 goto cleanup;
153 }
154
155 + /* Now setup the send buffer.
156 + */
157 + net_device->send_buf =
158 + (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
159 + get_order(net_device->send_buf_size));
160 + if (!net_device->send_buf) {
161 + netdev_err(ndev, "unable to allocate send "
162 + "buffer of size %d\n", net_device->send_buf_size);
163 + ret = -ENOMEM;
164 + goto cleanup;
165 + }
166 +
167 + /* Establish the gpadl handle for this buffer on this
168 + * channel. Note: This call uses the vmbus connection rather
169 + * than the channel to establish the gpadl handle.
170 + */
171 + ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
172 + net_device->send_buf_size,
173 + &net_device->send_buf_gpadl_handle);
174 + if (ret != 0) {
175 + netdev_err(ndev,
176 + "unable to establish send buffer's gpadl\n");
177 + goto cleanup;
178 + }
179 +
180 + /* Notify the NetVsp of the gpadl handle */
181 + init_packet = &net_device->channel_init_pkt;
182 + memset(init_packet, 0, sizeof(struct nvsp_message));
183 + init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
184 + init_packet->msg.v1_msg.send_recv_buf.gpadl_handle =
185 + net_device->send_buf_gpadl_handle;
186 + init_packet->msg.v1_msg.send_recv_buf.id = 0;
187 +
188 + /* Send the gpadl notification request */
189 + ret = vmbus_sendpacket(device->channel, init_packet,
190 + sizeof(struct nvsp_message),
191 + (unsigned long)init_packet,
192 + VM_PKT_DATA_INBAND,
193 + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
194 + if (ret != 0) {
195 + netdev_err(ndev,
196 + "unable to send send buffer's gpadl to netvsp\n");
197 + goto cleanup;
198 + }
199 +
200 + t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
201 + BUG_ON(t == 0);
202 +
203 + /* Check the response */
204 + if (init_packet->msg.v1_msg.
205 + send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
206 + netdev_err(ndev, "Unable to complete send buffer "
207 + "initialization with NetVsp - status %d\n",
208 + init_packet->msg.v1_msg.
209 + send_recv_buf_complete.status);
210 + ret = -EINVAL;
211 + goto cleanup;
212 + }
213 +
214 + /* Parse the response */
215 + net_device->send_section_size = init_packet->msg.
216 + v1_msg.send_send_buf_complete.section_size;
217 +
218 + /* Section count is simply the size divided by the section size.
219 + */
220 + net_device->send_section_cnt =
221 + net_device->send_buf_size/net_device->send_section_size;
222 +
223 + dev_info(&device->device, "Send section size: %d, Section count:%d\n",
224 + net_device->send_section_size, net_device->send_section_cnt);
225 +
226 + /* Setup state for managing the send buffer. */
227 + net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
228 + BITS_PER_LONG);
229 +
230 + net_device->send_section_map =
231 + kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
232 + if (net_device->send_section_map == NULL)
233 + goto cleanup;
234 +
235 goto exit;
236
237 cleanup:
238 - netvsc_destroy_recv_buf(net_device);
239 + netvsc_destroy_buf(net_device);
240
241 exit:
242 return ret;
243 @@ -369,8 +502,9 @@ static int netvsc_connect_vsp(struct hv_device *device)
244 net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
245 else
246 net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
247 + net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
248
249 - ret = netvsc_init_recv_buf(device);
250 + ret = netvsc_init_buf(device);
251
252 cleanup:
253 return ret;
254 @@ -378,7 +512,7 @@ cleanup:
255
256 static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
257 {
258 - netvsc_destroy_recv_buf(net_device);
259 + netvsc_destroy_buf(net_device);
260 }
261
262 /*
263 @@ -440,6 +574,12 @@ static inline u32 hv_ringbuf_avail_percent(
264 return avail_write * 100 / ring_info->ring_datasize;
265 }
266
267 +static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
268 + u32 index)
269 +{
270 + sync_change_bit(index, net_device->send_section_map);
271 +}
272 +
273 static void netvsc_send_completion(struct netvsc_device *net_device,
274 struct hv_device *device,
275 struct vmpacket_descriptor *packet)
276 @@ -447,6 +587,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
277 struct nvsp_message *nvsp_packet;
278 struct hv_netvsc_packet *nvsc_packet;
279 struct net_device *ndev;
280 + u32 send_index;
281
282 ndev = net_device->ndev;
283
284 @@ -477,6 +618,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
285
286 /* Notify the layer above us */
287 if (nvsc_packet) {
288 + send_index = nvsc_packet->send_buf_index;
289 + if (send_index != NETVSC_INVALID_INDEX)
290 + netvsc_free_send_slot(net_device, send_index);
291 q_idx = nvsc_packet->q_idx;
292 channel = nvsc_packet->channel;
293 nvsc_packet->send_completion(nvsc_packet->
294 @@ -504,6 +648,52 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
295
296 }
297
298 +static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
299 +{
300 + unsigned long index;
301 + u32 max_words = net_device->map_words;
302 + unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
303 + u32 section_cnt = net_device->send_section_cnt;
304 + int ret_val = NETVSC_INVALID_INDEX;
305 + int i;
306 + int prev_val;
307 +
308 + for (i = 0; i < max_words; i++) {
309 + if (!~(map_addr[i]))
310 + continue;
311 + index = ffz(map_addr[i]);
312 + prev_val = sync_test_and_set_bit(index, &map_addr[i]);
313 + if (prev_val)
314 + continue;
315 + if ((index + (i * BITS_PER_LONG)) >= section_cnt)
316 + break;
317 + ret_val = (index + (i * BITS_PER_LONG));
318 + break;
319 + }
320 + return ret_val;
321 +}
322 +
323 +u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
324 + unsigned int section_index,
325 + struct hv_netvsc_packet *packet)
326 +{
327 + char *start = net_device->send_buf;
328 + char *dest = (start + (section_index * net_device->send_section_size));
329 + int i;
330 + u32 msg_size = 0;
331 +
332 + for (i = 0; i < packet->page_buf_cnt; i++) {
333 + char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
334 + u32 offset = packet->page_buf[i].offset;
335 + u32 len = packet->page_buf[i].len;
336 +
337 + memcpy(dest, (src + offset), len);
338 + msg_size += len;
339 + dest += len;
340 + }
341 + return msg_size;
342 +}
343 +
344 int netvsc_send(struct hv_device *device,
345 struct hv_netvsc_packet *packet)
346 {
347 @@ -513,6 +703,10 @@ int netvsc_send(struct hv_device *device,
348 struct net_device *ndev;
349 struct vmbus_channel *out_channel = NULL;
350 u64 req_id;
351 + unsigned int section_index = NETVSC_INVALID_INDEX;
352 + u32 msg_size = 0;
353 + struct sk_buff *skb;
354 +
355
356 net_device = get_outbound_net_device(device);
357 if (!net_device)
358 @@ -528,10 +722,26 @@ int netvsc_send(struct hv_device *device,
359 sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
360 }
361
362 - /* Not using send buffer section */
363 + /* Attempt to send via sendbuf */
364 + if (packet->total_data_buflen < net_device->send_section_size) {
365 + section_index = netvsc_get_next_send_section(net_device);
366 + if (section_index != NETVSC_INVALID_INDEX) {
367 + msg_size = netvsc_copy_to_send_buf(net_device,
368 + section_index,
369 + packet);
370 + skb = (struct sk_buff *)
371 + (unsigned long)packet->send_completion_tid;
372 + if (skb)
373 + dev_kfree_skb_any(skb);
374 + packet->page_buf_cnt = 0;
375 + }
376 + }
377 + packet->send_buf_index = section_index;
378 +
379 +
380 sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
381 - 0xFFFFFFFF;
382 - sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
383 + section_index;
384 + sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
385
386 if (packet->send_completion)
387 req_id = (ulong)packet;
388 diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
389 index c76b66515e92..939e3af60ec4 100644
390 --- a/drivers/net/hyperv/netvsc_drv.c
391 +++ b/drivers/net/hyperv/netvsc_drv.c
392 @@ -236,10 +236,11 @@ static void netvsc_xmit_completion(void *context)
393 struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
394 struct sk_buff *skb = (struct sk_buff *)
395 (unsigned long)packet->send_completion_tid;
396 + u32 index = packet->send_buf_index;
397
398 kfree(packet);
399
400 - if (skb)
401 + if (skb && (index == NETVSC_INVALID_INDEX))
402 dev_kfree_skb_any(skb);
403 }
404
405 --
406 2.4.3
407