]> git.ipfire.org Git - ipfire-2.x.git/blob - src/patches/linux/0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch
core117: Regenerate language cache
[ipfire-2.x.git] / src / patches / linux / 0016-hyperv-Add-support-for-virtual-Receive-Side-Scaling-.patch
1 From 44559a96c6864eb6e95db0ae896c621b82e605f3 Mon Sep 17 00:00:00 2001
2 From: Haiyang Zhang <haiyangz@microsoft.com>
3 Date: Mon, 21 Apr 2014 10:20:28 -0700
4 Subject: [PATCH 16/25] hyperv: Add support for virtual Receive Side Scaling
5 (vRSS)
6
7 This feature allows multiple channels to be used by each virtual NIC.
8 It is available on Hyper-V host 2012 R2.
9
10 Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
11 Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
12 Signed-off-by: David S. Miller <davem@davemloft.net>
13 ---
14 drivers/net/hyperv/hyperv_net.h | 110 +++++++++++++++++++++-
15 drivers/net/hyperv/netvsc.c | 136 +++++++++++++++++++++------
16 drivers/net/hyperv/netvsc_drv.c | 103 ++++++++++++++++++++-
17 drivers/net/hyperv/rndis_filter.c | 189 +++++++++++++++++++++++++++++++++++++-
18 4 files changed, 504 insertions(+), 34 deletions(-)
19
20 diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
21 index d18f711d0b0c..57eb3f906d64 100644
22 --- a/drivers/net/hyperv/hyperv_net.h
23 +++ b/drivers/net/hyperv/hyperv_net.h
24 @@ -28,6 +28,96 @@
25 #include <linux/hyperv.h>
26 #include <linux/rndis.h>
27
28 +/* RSS related */
29 +#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */
30 +#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */
31 +
32 +#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
33 +#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
34 +
35 +#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
36 +#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
37 +
38 +struct ndis_obj_header {
39 + u8 type;
40 + u8 rev;
41 + u16 size;
42 +} __packed;
43 +
44 +/* ndis_recv_scale_cap/cap_flag */
45 +#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
46 +#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000
47 +#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000
48 +#define NDIS_RSS_CAPS_USING_MSI_X 0x08000000
49 +#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000
50 +#define NDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000
51 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100
52 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200
53 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400
54 +
55 +struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
56 + struct ndis_obj_header hdr;
57 + u32 cap_flag;
58 + u32 num_int_msg;
59 + u32 num_recv_que;
60 + u16 num_indirect_tabent;
61 +} __packed;
62 +
63 +
64 +/* ndis_recv_scale_param flags */
65 +#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001
66 +#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002
67 +#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004
68 +#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008
69 +#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010
70 +
71 +/* Hash info bits */
72 +#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001
73 +#define NDIS_HASH_IPV4 0x00000100
74 +#define NDIS_HASH_TCP_IPV4 0x00000200
75 +#define NDIS_HASH_IPV6 0x00000400
76 +#define NDIS_HASH_IPV6_EX 0x00000800
77 +#define NDIS_HASH_TCP_IPV6 0x00001000
78 +#define NDIS_HASH_TCP_IPV6_EX 0x00002000
79 +
80 +#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
81 +#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40
82 +
83 +#define ITAB_NUM 128
84 +#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
85 +extern u8 netvsc_hash_key[];
86 +
87 +struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
88 + struct ndis_obj_header hdr;
89 +
90 + /* Qualifies the rest of the information */
91 + u16 flag;
92 +
93 + /* The base CPU number to do receive processing. not used */
94 + u16 base_cpu_number;
95 +
96 + /* This describes the hash function and type being enabled */
97 + u32 hashinfo;
98 +
99 + /* The size of indirection table array */
100 + u16 indirect_tabsize;
101 +
102 + /* The offset of the indirection table from the beginning of this
103 + * structure
104 + */
105 + u32 indirect_taboffset;
106 +
107 + /* The size of the hash secret key */
108 + u16 hashkey_size;
109 +
110 + /* The offset of the secret key from the beginning of this structure */
111 + u32 kashkey_offset;
112 +
113 + u32 processor_masks_offset;
114 + u32 num_processor_masks;
115 + u32 processor_masks_entry_size;
116 +};
117 +
118 /* Fwd declaration */
119 struct hv_netvsc_packet;
120 struct ndis_tcp_ip_checksum_info;
121 @@ -39,6 +129,8 @@ struct xferpage_packet {
122
123 /* # of netvsc packets this xfer packet contains */
124 u32 count;
125 +
126 + struct vmbus_channel *channel;
127 };
128
129 /*
130 @@ -54,6 +146,9 @@ struct hv_netvsc_packet {
131 bool is_data_pkt;
132 u16 vlan_tci;
133
134 + u16 q_idx;
135 + struct vmbus_channel *channel;
136 +
137 /*
138 * Valid only for receives when we break a xfer page packet
139 * into multiple netvsc packets
140 @@ -120,6 +215,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
141 int netvsc_recv_callback(struct hv_device *device_obj,
142 struct hv_netvsc_packet *packet,
143 struct ndis_tcp_ip_checksum_info *csum_info);
144 +void netvsc_channel_cb(void *context);
145 int rndis_filter_open(struct hv_device *dev);
146 int rndis_filter_close(struct hv_device *dev);
147 int rndis_filter_device_add(struct hv_device *dev,
148 @@ -522,6 +618,8 @@ struct nvsp_message {
149
150 #define NETVSC_PACKET_SIZE 2048
151
152 +#define VRSS_SEND_TAB_SIZE 16
153 +
154 /* Per netvsc channel-specific */
155 struct netvsc_device {
156 struct hv_device *dev;
157 @@ -555,10 +653,20 @@ struct netvsc_device {
158
159 struct net_device *ndev;
160
161 + struct vmbus_channel *chn_table[NR_CPUS];
162 + u32 send_table[VRSS_SEND_TAB_SIZE];
163 + u32 num_chn;
164 + atomic_t queue_sends[NR_CPUS];
165 +
166 /* Holds rndis device info */
167 void *extension;
168 - /* The recive buffer for this device */
169 +
170 + int ring_size;
171 +
172 + /* The primary channel callback buffer */
173 unsigned char cb_buffer[NETVSC_PACKET_SIZE];
174 + /* The sub channel callback buffer */
175 + unsigned char *sub_cb_buf;
176 };
177
178 /* NdisInitialize message */
179 diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
180 index f7629ecefa84..e7e77f12bc38 100644
181 --- a/drivers/net/hyperv/netvsc.c
182 +++ b/drivers/net/hyperv/netvsc.c
183 @@ -422,6 +422,9 @@ int netvsc_device_remove(struct hv_device *device)
184 kfree(netvsc_packet);
185 }
186
187 + if (net_device->sub_cb_buf)
188 + vfree(net_device->sub_cb_buf);
189 +
190 kfree(net_device);
191 return 0;
192 }
193 @@ -461,7 +464,9 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
194 (nvsp_packet->hdr.msg_type ==
195 NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
196 (nvsp_packet->hdr.msg_type ==
197 - NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
198 + NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
199 + (nvsp_packet->hdr.msg_type ==
200 + NVSP_MSG5_TYPE_SUBCHANNEL)) {
201 /* Copy the response back */
202 memcpy(&net_device->channel_init_pkt, nvsp_packet,
203 sizeof(struct nvsp_message));
204 @@ -469,28 +474,37 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
205 } else if (nvsp_packet->hdr.msg_type ==
206 NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
207 int num_outstanding_sends;
208 + u16 q_idx = 0;
209 + struct vmbus_channel *channel = device->channel;
210 + int queue_sends;
211
212 /* Get the send context */
213 nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
214 packet->trans_id;
215
216 /* Notify the layer above us */
217 - if (nvsc_packet)
218 + if (nvsc_packet) {
219 + q_idx = nvsc_packet->q_idx;
220 + channel = nvsc_packet->channel;
221 nvsc_packet->completion.send.send_completion(
222 nvsc_packet->completion.send.
223 send_completion_ctx);
224 + }
225
226 num_outstanding_sends =
227 atomic_dec_return(&net_device->num_outstanding_sends);
228 + queue_sends = atomic_dec_return(&net_device->
229 + queue_sends[q_idx]);
230
231 if (net_device->destroy && num_outstanding_sends == 0)
232 wake_up(&net_device->wait_drain);
233
234 - if (netif_queue_stopped(ndev) && !net_device->start_remove &&
235 - (hv_ringbuf_avail_percent(&device->channel->outbound)
236 - > RING_AVAIL_PERCENT_HIWATER ||
237 - num_outstanding_sends < 1))
238 - netif_wake_queue(ndev);
239 + if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
240 + !net_device->start_remove &&
241 + (hv_ringbuf_avail_percent(&channel->outbound) >
242 + RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
243 + netif_tx_wake_queue(netdev_get_tx_queue(
244 + ndev, q_idx));
245 } else {
246 netdev_err(ndev, "Unknown send completion packet type- "
247 "%d received!!\n", nvsp_packet->hdr.msg_type);
248 @@ -505,6 +519,7 @@ int netvsc_send(struct hv_device *device,
249 int ret = 0;
250 struct nvsp_message sendMessage;
251 struct net_device *ndev;
252 + struct vmbus_channel *out_channel = NULL;
253 u64 req_id;
254
255 net_device = get_outbound_net_device(device);
256 @@ -531,15 +546,20 @@ int netvsc_send(struct hv_device *device,
257 else
258 req_id = 0;
259
260 + out_channel = net_device->chn_table[packet->q_idx];
261 + if (out_channel == NULL)
262 + out_channel = device->channel;
263 + packet->channel = out_channel;
264 +
265 if (packet->page_buf_cnt) {
266 - ret = vmbus_sendpacket_pagebuffer(device->channel,
267 + ret = vmbus_sendpacket_pagebuffer(out_channel,
268 packet->page_buf,
269 packet->page_buf_cnt,
270 &sendMessage,
271 sizeof(struct nvsp_message),
272 req_id);
273 } else {
274 - ret = vmbus_sendpacket(device->channel, &sendMessage,
275 + ret = vmbus_sendpacket(out_channel, &sendMessage,
276 sizeof(struct nvsp_message),
277 req_id,
278 VM_PKT_DATA_INBAND,
279 @@ -548,17 +568,24 @@ int netvsc_send(struct hv_device *device,
280
281 if (ret == 0) {
282 atomic_inc(&net_device->num_outstanding_sends);
283 - if (hv_ringbuf_avail_percent(&device->channel->outbound) <
284 + atomic_inc(&net_device->queue_sends[packet->q_idx]);
285 +
286 + if (hv_ringbuf_avail_percent(&out_channel->outbound) <
287 RING_AVAIL_PERCENT_LOWATER) {
288 - netif_stop_queue(ndev);
289 + netif_tx_stop_queue(netdev_get_tx_queue(
290 + ndev, packet->q_idx));
291 +
292 if (atomic_read(&net_device->
293 - num_outstanding_sends) < 1)
294 - netif_wake_queue(ndev);
295 + queue_sends[packet->q_idx]) < 1)
296 + netif_tx_wake_queue(netdev_get_tx_queue(
297 + ndev, packet->q_idx));
298 }
299 } else if (ret == -EAGAIN) {
300 - netif_stop_queue(ndev);
301 - if (atomic_read(&net_device->num_outstanding_sends) < 1) {
302 - netif_wake_queue(ndev);
303 + netif_tx_stop_queue(netdev_get_tx_queue(
304 + ndev, packet->q_idx));
305 + if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
306 + netif_tx_wake_queue(netdev_get_tx_queue(
307 + ndev, packet->q_idx));
308 ret = -ENOSPC;
309 }
310 } else {
311 @@ -570,6 +597,7 @@ int netvsc_send(struct hv_device *device,
312 }
313
314 static void netvsc_send_recv_completion(struct hv_device *device,
315 + struct vmbus_channel *channel,
316 struct netvsc_device *net_device,
317 u64 transaction_id, u32 status)
318 {
319 @@ -587,7 +615,7 @@ static void netvsc_send_recv_completion(struct hv_device *device,
320
321 retry_send_cmplt:
322 /* Send the completion */
323 - ret = vmbus_sendpacket(device->channel, &recvcompMessage,
324 + ret = vmbus_sendpacket(channel, &recvcompMessage,
325 sizeof(struct nvsp_message), transaction_id,
326 VM_PKT_COMP, 0);
327 if (ret == 0) {
328 @@ -618,6 +646,7 @@ static void netvsc_receive_completion(void *context)
329 {
330 struct hv_netvsc_packet *packet = context;
331 struct hv_device *device = packet->device;
332 + struct vmbus_channel *channel;
333 struct netvsc_device *net_device;
334 u64 transaction_id = 0;
335 bool fsend_receive_comp = false;
336 @@ -649,6 +678,7 @@ static void netvsc_receive_completion(void *context)
337 */
338 if (packet->xfer_page_pkt->count == 0) {
339 fsend_receive_comp = true;
340 + channel = packet->xfer_page_pkt->channel;
341 transaction_id = packet->completion.recv.recv_completion_tid;
342 status = packet->xfer_page_pkt->status;
343 list_add_tail(&packet->xfer_page_pkt->list_ent,
344 @@ -662,12 +692,13 @@ static void netvsc_receive_completion(void *context)
345
346 /* Send a receive completion for the xfer page packet */
347 if (fsend_receive_comp)
348 - netvsc_send_recv_completion(device, net_device, transaction_id,
349 - status);
350 + netvsc_send_recv_completion(device, channel, net_device,
351 + transaction_id, status);
352
353 }
354
355 static void netvsc_receive(struct netvsc_device *net_device,
356 + struct vmbus_channel *channel,
357 struct hv_device *device,
358 struct vmpacket_descriptor *packet)
359 {
360 @@ -748,7 +779,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
361 spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
362 flags);
363
364 - netvsc_send_recv_completion(device, net_device,
365 + netvsc_send_recv_completion(device, channel, net_device,
366 vmxferpage_packet->d.trans_id,
367 NVSP_STAT_FAIL);
368
369 @@ -759,6 +790,7 @@ static void netvsc_receive(struct netvsc_device *net_device,
370 xferpage_packet = (struct xferpage_packet *)listHead.next;
371 list_del(&xferpage_packet->list_ent);
372 xferpage_packet->status = NVSP_STAT_SUCCESS;
373 + xferpage_packet->channel = channel;
374
375 /* This is how much we can satisfy */
376 xferpage_packet->count = count - 1;
377 @@ -800,10 +832,45 @@ static void netvsc_receive(struct netvsc_device *net_device,
378
379 }
380
381 -static void netvsc_channel_cb(void *context)
382 +
383 +static void netvsc_send_table(struct hv_device *hdev,
384 + struct vmpacket_descriptor *vmpkt)
385 +{
386 + struct netvsc_device *nvscdev;
387 + struct net_device *ndev;
388 + struct nvsp_message *nvmsg;
389 + int i;
390 + u32 count, *tab;
391 +
392 + nvscdev = get_outbound_net_device(hdev);
393 + if (!nvscdev)
394 + return;
395 + ndev = nvscdev->ndev;
396 +
397 + nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
398 + (vmpkt->offset8 << 3));
399 +
400 + if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
401 + return;
402 +
403 + count = nvmsg->msg.v5_msg.send_table.count;
404 + if (count != VRSS_SEND_TAB_SIZE) {
405 + netdev_err(ndev, "Received wrong send-table size:%u\n", count);
406 + return;
407 + }
408 +
409 + tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
410 + nvmsg->msg.v5_msg.send_table.offset);
411 +
412 + for (i = 0; i < count; i++)
413 + nvscdev->send_table[i] = tab[i];
414 +}
415 +
416 +void netvsc_channel_cb(void *context)
417 {
418 int ret;
419 - struct hv_device *device = context;
420 + struct vmbus_channel *channel = (struct vmbus_channel *)context;
421 + struct hv_device *device;
422 struct netvsc_device *net_device;
423 u32 bytes_recvd;
424 u64 request_id;
425 @@ -812,14 +879,19 @@ static void netvsc_channel_cb(void *context)
426 int bufferlen = NETVSC_PACKET_SIZE;
427 struct net_device *ndev;
428
429 + if (channel->primary_channel != NULL)
430 + device = channel->primary_channel->device_obj;
431 + else
432 + device = channel->device_obj;
433 +
434 net_device = get_inbound_net_device(device);
435 if (!net_device)
436 return;
437 ndev = net_device->ndev;
438 - buffer = net_device->cb_buffer;
439 + buffer = get_per_channel_state(channel);
440
441 do {
442 - ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
443 + ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
444 &bytes_recvd, &request_id);
445 if (ret == 0) {
446 if (bytes_recvd > 0) {
447 @@ -831,8 +903,12 @@ static void netvsc_channel_cb(void *context)
448 break;
449
450 case VM_PKT_DATA_USING_XFER_PAGES:
451 - netvsc_receive(net_device,
452 - device, desc);
453 + netvsc_receive(net_device, channel,
454 + device, desc);
455 + break;
456 +
457 + case VM_PKT_DATA_INBAND:
458 + netvsc_send_table(device, desc);
459 break;
460
461 default:
462 @@ -893,6 +969,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
463 goto cleanup;
464 }
465
466 + net_device->ring_size = ring_size;
467 +
468 /*
469 * Coming into this function, struct net_device * is
470 * registered as the driver private data.
471 @@ -917,10 +995,12 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
472 }
473 init_completion(&net_device->channel_init_wait);
474
475 + set_per_channel_state(device->channel, net_device->cb_buffer);
476 +
477 /* Open the channel */
478 ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
479 ring_size * PAGE_SIZE, NULL, 0,
480 - netvsc_channel_cb, device);
481 + netvsc_channel_cb, device->channel);
482
483 if (ret != 0) {
484 netdev_err(ndev, "unable to open channel: %d\n", ret);
485 @@ -930,6 +1010,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
486 /* Channel is opened */
487 pr_info("hv_netvsc channel opened successfully\n");
488
489 + net_device->chn_table[0] = device->channel;
490 +
491 /* Connect with the NetVsp */
492 ret = netvsc_connect_vsp(device);
493 if (ret != 0) {
494 diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
495 index ce6d870dd7ae..e486dbd33f61 100644
496 --- a/drivers/net/hyperv/netvsc_drv.c
497 +++ b/drivers/net/hyperv/netvsc_drv.c
498 @@ -101,7 +101,7 @@ static int netvsc_open(struct net_device *net)
499 return ret;
500 }
501
502 - netif_start_queue(net);
503 + netif_tx_start_all_queues(net);
504
505 nvdev = hv_get_drvdata(device_obj);
506 rdev = nvdev->extension;
507 @@ -149,6 +149,88 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
508 return ppi;
509 }
510
511 +union sub_key {
512 + u64 k;
513 + struct {
514 + u8 pad[3];
515 + u8 kb;
516 + u32 ka;
517 + };
518 +};
519 +
520 +/* Toeplitz hash function
521 + * data: network byte order
522 + * return: host byte order
523 + */
524 +static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen)
525 +{
526 + union sub_key subk;
527 + int k_next = 4;
528 + u8 dt;
529 + int i, j;
530 + u32 ret = 0;
531 +
532 + subk.k = 0;
533 + subk.ka = ntohl(*(u32 *)key);
534 +
535 + for (i = 0; i < dlen; i++) {
536 + subk.kb = key[k_next];
537 + k_next = (k_next + 1) % klen;
538 + dt = data[i];
539 + for (j = 0; j < 8; j++) {
540 + if (dt & 0x80)
541 + ret ^= subk.ka;
542 + dt <<= 1;
543 + subk.k <<= 1;
544 + }
545 + }
546 +
547 + return ret;
548 +}
549 +
550 +static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
551 +{
552 + struct iphdr *iphdr;
553 + int data_len;
554 + bool ret = false;
555 +
556 + if (eth_hdr(skb)->h_proto != htons(ETH_P_IP))
557 + return false;
558 +
559 + iphdr = ip_hdr(skb);
560 +
561 + if (iphdr->version == 4) {
562 + if (iphdr->protocol == IPPROTO_TCP)
563 + data_len = 12;
564 + else
565 + data_len = 8;
566 + *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN,
567 + (u8 *)&iphdr->saddr, data_len);
568 + ret = true;
569 + }
570 +
571 + return ret;
572 +}
573 +
574 +static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
575 + void *accel_priv, select_queue_fallback_t fallback)
576 +{
577 + struct net_device_context *net_device_ctx = netdev_priv(ndev);
578 + struct hv_device *hdev = net_device_ctx->device_ctx;
579 + struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev);
580 + u32 hash;
581 + u16 q_idx = 0;
582 +
583 + if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
584 + return 0;
585 +
586 + if (netvsc_set_hash(&hash, skb))
587 + q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
588 + ndev->real_num_tx_queues;
589 +
590 + return q_idx;
591 +}
592 +
593 static void netvsc_xmit_completion(void *context)
594 {
595 struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
596 @@ -334,6 +416,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
597
598 packet->vlan_tci = skb->vlan_tci;
599
600 + packet->q_idx = skb_get_queue_mapping(skb);
601 +
602 packet->is_data_pkt = true;
603 packet->total_data_buflen = skb->len;
604
605 @@ -559,6 +643,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
606 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
607 packet->vlan_tci);
608
609 + skb_record_rx_queue(skb, packet->xfer_page_pkt->channel->
610 + offermsg.offer.sub_channel_index %
611 + net->real_num_rx_queues);
612 +
613 net->stats.rx_packets++;
614 net->stats.rx_bytes += packet->total_data_buflen;
615
616 @@ -607,7 +695,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
617 hv_set_drvdata(hdev, ndev);
618 device_info.ring_size = ring_size;
619 rndis_filter_device_add(hdev, &device_info);
620 - netif_wake_queue(ndev);
621 + netif_tx_wake_all_queues(ndev);
622
623 return 0;
624 }
625 @@ -653,6 +741,7 @@ static const struct net_device_ops device_ops = {
626 .ndo_change_mtu = netvsc_change_mtu,
627 .ndo_validate_addr = eth_validate_addr,
628 .ndo_set_mac_address = netvsc_set_mac_addr,
629 + .ndo_select_queue = netvsc_select_queue,
630 };
631
632 /*
633 @@ -699,9 +788,11 @@ static int netvsc_probe(struct hv_device *dev,
634 struct net_device *net = NULL;
635 struct net_device_context *net_device_ctx;
636 struct netvsc_device_info device_info;
637 + struct netvsc_device *nvdev;
638 int ret;
639
640 - net = alloc_etherdev(sizeof(struct net_device_context));
641 + net = alloc_etherdev_mq(sizeof(struct net_device_context),
642 + num_online_cpus());
643 if (!net)
644 return -ENOMEM;
645
646 @@ -734,6 +825,12 @@ static int netvsc_probe(struct hv_device *dev,
647 }
648 memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
649
650 + nvdev = hv_get_drvdata(dev);
651 + netif_set_real_num_tx_queues(net, nvdev->num_chn);
652 + netif_set_real_num_rx_queues(net, nvdev->num_chn);
653 + dev_info(&dev->device, "real num tx,rx queues:%u, %u\n",
654 + net->real_num_tx_queues, net->real_num_rx_queues);
655 +
656 ret = register_netdev(net);
657 if (ret != 0) {
658 pr_err("Unable to register netdev.\n");
659 diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
660 index 143a98caf618..d92cfbe43410 100644
661 --- a/drivers/net/hyperv/rndis_filter.c
662 +++ b/drivers/net/hyperv/rndis_filter.c
663 @@ -31,7 +31,7 @@
664 #include "hyperv_net.h"
665
666
667 -#define RNDIS_EXT_LEN 100
668 +#define RNDIS_EXT_LEN PAGE_SIZE
669 struct rndis_request {
670 struct list_head list_ent;
671 struct completion wait_event;
672 @@ -94,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev,
673 rndis_msg->ndis_msg_type = msg_type;
674 rndis_msg->msg_len = msg_len;
675
676 + request->pkt.q_idx = 0;
677 +
678 /*
679 * Set the request id. This field is always after the rndis header for
680 * request/response packet types so we just used the SetRequest as a
681 @@ -509,6 +511,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
682 query->info_buflen = 0;
683 query->dev_vc_handle = 0;
684
685 + if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) {
686 + struct ndis_recv_scale_cap *cap;
687 +
688 + request->request_msg.msg_len +=
689 + sizeof(struct ndis_recv_scale_cap);
690 + query->info_buflen = sizeof(struct ndis_recv_scale_cap);
691 + cap = (struct ndis_recv_scale_cap *)((unsigned long)query +
692 + query->info_buf_offset);
693 + cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES;
694 + cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
695 + cap->hdr.size = sizeof(struct ndis_recv_scale_cap);
696 + }
697 +
698 ret = rndis_filter_send_request(dev, request);
699 if (ret != 0)
700 goto cleanup;
701 @@ -695,6 +710,89 @@ cleanup:
702 return ret;
703 }
704
705 +u8 netvsc_hash_key[HASH_KEYLEN] = {
706 + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
707 + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
708 + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
709 + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
710 + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
711 +};
712 +
713 +int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
714 +{
715 + struct net_device *ndev = rdev->net_dev->ndev;
716 + struct rndis_request *request;
717 + struct rndis_set_request *set;
718 + struct rndis_set_complete *set_complete;
719 + u32 extlen = sizeof(struct ndis_recv_scale_param) +
720 + 4*ITAB_NUM + HASH_KEYLEN;
721 + struct ndis_recv_scale_param *rssp;
722 + u32 *itab;
723 + u8 *keyp;
724 + int i, t, ret;
725 +
726 + request = get_rndis_request(
727 + rdev, RNDIS_MSG_SET,
728 + RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
729 + if (!request)
730 + return -ENOMEM;
731 +
732 + set = &request->request_msg.msg.set_req;
733 + set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS;
734 + set->info_buflen = extlen;
735 + set->info_buf_offset = sizeof(struct rndis_set_request);
736 + set->dev_vc_handle = 0;
737 +
738 + rssp = (struct ndis_recv_scale_param *)(set + 1);
739 + rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS;
740 + rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
741 + rssp->hdr.size = sizeof(struct ndis_recv_scale_param);
742 + rssp->flag = 0;
743 + rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
744 + NDIS_HASH_TCP_IPV4;
745 + rssp->indirect_tabsize = 4*ITAB_NUM;
746 + rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
747 + rssp->hashkey_size = HASH_KEYLEN;
748 + rssp->kashkey_offset = rssp->indirect_taboffset +
749 + rssp->indirect_tabsize;
750 +
751 + /* Set indirection table entries */
752 + itab = (u32 *)(rssp + 1);
753 + for (i = 0; i < ITAB_NUM; i++)
754 + itab[i] = i % num_queue;
755 +
756 + /* Set hask key values */
757 + keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
758 + for (i = 0; i < HASH_KEYLEN; i++)
759 + keyp[i] = netvsc_hash_key[i];
760 +
761 +
762 + ret = rndis_filter_send_request(rdev, request);
763 + if (ret != 0)
764 + goto cleanup;
765 +
766 + t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
767 + if (t == 0) {
768 + netdev_err(ndev, "timeout before we got a set response...\n");
769 + /* can't put_rndis_request, since we may still receive a
770 + * send-completion.
771 + */
772 + return -ETIMEDOUT;
773 + } else {
774 + set_complete = &request->response_msg.msg.set_complete;
775 + if (set_complete->status != RNDIS_STATUS_SUCCESS) {
776 + netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
777 + set_complete->status);
778 + ret = -EINVAL;
779 + }
780 + }
781 +
782 +cleanup:
783 + put_rndis_request(rdev, request);
784 + return ret;
785 +}
786 +
787 +
788 static int rndis_filter_query_device_link_status(struct rndis_device *dev)
789 {
790 u32 size = sizeof(u32);
791 @@ -886,6 +984,28 @@ static int rndis_filter_close_device(struct rndis_device *dev)
792 return ret;
793 }
794
795 +static void netvsc_sc_open(struct vmbus_channel *new_sc)
796 +{
797 + struct netvsc_device *nvscdev;
798 + u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
799 + int ret;
800 +
801 + nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);
802 +
803 + if (chn_index >= nvscdev->num_chn)
804 + return;
805 +
806 + set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
807 + NETVSC_PACKET_SIZE);
808 +
809 + ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
810 + nvscdev->ring_size * PAGE_SIZE, NULL, 0,
811 + netvsc_channel_cb, new_sc);
812 +
813 + if (ret == 0)
814 + nvscdev->chn_table[chn_index] = new_sc;
815 +}
816 +
817 int rndis_filter_device_add(struct hv_device *dev,
818 void *additional_info)
819 {
820 @@ -894,6 +1014,10 @@ int rndis_filter_device_add(struct hv_device *dev,
821 struct rndis_device *rndis_device;
822 struct netvsc_device_info *device_info = additional_info;
823 struct ndis_offload_params offloads;
824 + struct nvsp_message *init_packet;
825 + int t;
826 + struct ndis_recv_scale_cap rsscap;
827 + u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
828
829 rndis_device = get_rndis_device();
830 if (!rndis_device)
831 @@ -913,6 +1037,7 @@ int rndis_filter_device_add(struct hv_device *dev,
832
833 /* Initialize the rndis device */
834 net_device = hv_get_drvdata(dev);
835 + net_device->num_chn = 1;
836
837 net_device->extension = rndis_device;
838 rndis_device->net_dev = net_device;
839 @@ -952,7 +1077,6 @@ int rndis_filter_device_add(struct hv_device *dev,
840 if (ret)
841 goto err_dev_remv;
842
843 -
844 rndis_filter_query_device_link_status(rndis_device);
845
846 device_info->link_state = rndis_device->link_state;
847 @@ -961,7 +1085,66 @@ int rndis_filter_device_add(struct hv_device *dev,
848 rndis_device->hw_mac_adr,
849 device_info->link_state ? "down" : "up");
850
851 - return ret;
852 + if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
853 + return 0;
854 +
855 + /* vRSS setup */
856 + memset(&rsscap, 0, rsscap_size);
857 + ret = rndis_filter_query_device(rndis_device,
858 + OID_GEN_RECEIVE_SCALE_CAPABILITIES,
859 + &rsscap, &rsscap_size);
860 + if (ret || rsscap.num_recv_que < 2)
861 + goto out;
862 +
863 + net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ?
864 + num_online_cpus() : rsscap.num_recv_que;
865 + if (net_device->num_chn == 1)
866 + goto out;
867 +
868 + net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) *
869 + NETVSC_PACKET_SIZE);
870 + if (!net_device->sub_cb_buf) {
871 + net_device->num_chn = 1;
872 + dev_info(&dev->device, "No memory for subchannels.\n");
873 + goto out;
874 + }
875 +
876 + vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
877 +
878 + init_packet = &net_device->channel_init_pkt;
879 + memset(init_packet, 0, sizeof(struct nvsp_message));
880 + init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
881 + init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
882 + init_packet->msg.v5_msg.subchn_req.num_subchannels =
883 + net_device->num_chn - 1;
884 + ret = vmbus_sendpacket(dev->channel, init_packet,
885 + sizeof(struct nvsp_message),
886 + (unsigned long)init_packet,
887 + VM_PKT_DATA_INBAND,
888 + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
889 + if (ret)
890 + goto out;
891 + t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
892 + if (t == 0) {
893 + ret = -ETIMEDOUT;
894 + goto out;
895 + }
896 + if (init_packet->msg.v5_msg.subchn_comp.status !=
897 + NVSP_STAT_SUCCESS) {
898 + ret = -ENODEV;
899 + goto out;
900 + }
901 + net_device->num_chn = 1 +
902 + init_packet->msg.v5_msg.subchn_comp.num_subchannels;
903 +
904 + vmbus_are_subchannels_present(dev->channel);
905 +
906 + ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
907 +
908 +out:
909 + if (ret)
910 + net_device->num_chn = 1;
911 + return 0; /* return 0 because primary channel can be used alone */
912
913 err_dev_remv:
914 rndis_filter_device_remove(dev);
915 --
916 2.4.3
917