]>
Commit | Line | Data |
---|---|---|
fceaf24a | 1 | /* |
fceaf24a HJ |
2 | * Copyright (c) 2009, Microsoft Corporation. |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify it | |
5 | * under the terms and conditions of the GNU General Public License, | |
6 | * version 2, as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope it will be useful, but WITHOUT | |
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
11 | * more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public License along with | |
adf8d3ff | 14 | * this program; if not, see <http://www.gnu.org/licenses/>. |
fceaf24a HJ |
15 | * |
16 | * Authors: | |
d0e94d17 | 17 | * Haiyang Zhang <haiyangz@microsoft.com> |
fceaf24a | 18 | * Hank Janssen <hjanssen@microsoft.com> |
fceaf24a | 19 | */ |
eb335bc4 HJ |
20 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
21 | ||
fceaf24a | 22 | #include <linux/init.h> |
9079ce69 | 23 | #include <linux/atomic.h> |
fceaf24a HJ |
24 | #include <linux/module.h> |
25 | #include <linux/highmem.h> | |
26 | #include <linux/device.h> | |
fceaf24a | 27 | #include <linux/io.h> |
fceaf24a HJ |
28 | #include <linux/delay.h> |
29 | #include <linux/netdevice.h> | |
30 | #include <linux/inetdevice.h> | |
31 | #include <linux/etherdevice.h> | |
32 | #include <linux/skbuff.h> | |
c802db11 | 33 | #include <linux/if_vlan.h> |
fceaf24a | 34 | #include <linux/in.h> |
5a0e3ad6 | 35 | #include <linux/slab.h> |
fceaf24a HJ |
36 | #include <net/arp.h> |
37 | #include <net/route.h> | |
38 | #include <net/sock.h> | |
39 | #include <net/pkt_sched.h> | |
3f335ea2 | 40 | |
5ca7252a | 41 | #include "hyperv_net.h" |
fceaf24a | 42 | |
fceaf24a | 43 | struct net_device_context { |
02fafbc6 | 44 | /* point back to our device context */ |
6bad88da | 45 | struct hv_device *device_ctx; |
122a5f64 | 46 | struct delayed_work dwork; |
792df872 | 47 | struct work_struct work; |
fceaf24a HJ |
48 | }; |
49 | ||
fa85a6c2 | 50 | #define RING_SIZE_MIN 64 |
99c8da0f | 51 | static int ring_size = 128; |
450d7a4b SH |
52 | module_param(ring_size, int, S_IRUGO); |
53 | MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); | |
fceaf24a | 54 | |
d426b2e3 HZ |
55 | static void do_set_multicast(struct work_struct *w) |
56 | { | |
792df872 WM |
57 | struct net_device_context *ndevctx = |
58 | container_of(w, struct net_device_context, work); | |
d426b2e3 HZ |
59 | struct netvsc_device *nvdev; |
60 | struct rndis_device *rdev; | |
61 | ||
62 | nvdev = hv_get_drvdata(ndevctx->device_ctx); | |
792df872 WM |
63 | if (nvdev == NULL || nvdev->ndev == NULL) |
64 | return; | |
d426b2e3 HZ |
65 | |
66 | rdev = nvdev->extension; | |
67 | if (rdev == NULL) | |
792df872 | 68 | return; |
d426b2e3 | 69 | |
792df872 | 70 | if (nvdev->ndev->flags & IFF_PROMISC) |
d426b2e3 HZ |
71 | rndis_filter_set_packet_filter(rdev, |
72 | NDIS_PACKET_TYPE_PROMISCUOUS); | |
73 | else | |
74 | rndis_filter_set_packet_filter(rdev, | |
75 | NDIS_PACKET_TYPE_BROADCAST | | |
76 | NDIS_PACKET_TYPE_ALL_MULTICAST | | |
77 | NDIS_PACKET_TYPE_DIRECTED); | |
d426b2e3 HZ |
78 | } |
79 | ||
4e9bfefa | 80 | static void netvsc_set_multicast_list(struct net_device *net) |
fceaf24a | 81 | { |
792df872 | 82 | struct net_device_context *net_device_ctx = netdev_priv(net); |
d426b2e3 | 83 | |
792df872 | 84 | schedule_work(&net_device_ctx->work); |
fceaf24a HJ |
85 | } |
86 | ||
fceaf24a HJ |
87 | static int netvsc_open(struct net_device *net) |
88 | { | |
fceaf24a | 89 | struct net_device_context *net_device_ctx = netdev_priv(net); |
6bad88da | 90 | struct hv_device *device_obj = net_device_ctx->device_ctx; |
891de74d HZ |
91 | struct netvsc_device *nvdev; |
92 | struct rndis_device *rdev; | |
02fafbc6 | 93 | int ret = 0; |
fceaf24a | 94 | |
891de74d HZ |
95 | netif_carrier_off(net); |
96 | ||
d515d0ff HZ |
97 | /* Open up the device */ |
98 | ret = rndis_filter_open(device_obj); | |
99 | if (ret != 0) { | |
100 | netdev_err(net, "unable to open device (ret %d).\n", ret); | |
101 | return ret; | |
fceaf24a HJ |
102 | } |
103 | ||
44559a96 | 104 | netif_tx_start_all_queues(net); |
d515d0ff | 105 | |
891de74d HZ |
106 | nvdev = hv_get_drvdata(device_obj); |
107 | rdev = nvdev->extension; | |
108 | if (!rdev->link_state) | |
109 | netif_carrier_on(net); | |
110 | ||
fceaf24a HJ |
111 | return ret; |
112 | } | |
113 | ||
fceaf24a HJ |
114 | static int netvsc_close(struct net_device *net) |
115 | { | |
fceaf24a | 116 | struct net_device_context *net_device_ctx = netdev_priv(net); |
6bad88da | 117 | struct hv_device *device_obj = net_device_ctx->device_ctx; |
02fafbc6 | 118 | int ret; |
fceaf24a | 119 | |
0a282538 | 120 | netif_tx_disable(net); |
fceaf24a | 121 | |
792df872 WM |
122 | /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */ |
123 | cancel_work_sync(&net_device_ctx->work); | |
9c26aa0d | 124 | ret = rndis_filter_close(device_obj); |
fceaf24a | 125 | if (ret != 0) |
eb335bc4 | 126 | netdev_err(net, "unable to close device (ret %d).\n", ret); |
fceaf24a | 127 | |
fceaf24a HJ |
128 | return ret; |
129 | } | |
130 | ||
d972eb71 KS |
131 | static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, |
132 | int pkt_type) | |
133 | { | |
134 | struct rndis_packet *rndis_pkt; | |
135 | struct rndis_per_packet_info *ppi; | |
136 | ||
137 | rndis_pkt = &msg->msg.pkt; | |
138 | rndis_pkt->data_offset += ppi_size; | |
139 | ||
140 | ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt + | |
141 | rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len); | |
142 | ||
143 | ppi->size = ppi_size; | |
144 | ppi->type = pkt_type; | |
145 | ppi->ppi_offset = sizeof(struct rndis_per_packet_info); | |
146 | ||
147 | rndis_pkt->per_pkt_info_len += ppi_size; | |
148 | ||
149 | return ppi; | |
150 | } | |
151 | ||
44559a96 HZ |
152 | union sub_key { |
153 | u64 k; | |
154 | struct { | |
155 | u8 pad[3]; | |
156 | u8 kb; | |
157 | u32 ka; | |
158 | }; | |
159 | }; | |
160 | ||
161 | /* Toeplitz hash function | |
162 | * data: network byte order | |
163 | * return: host byte order | |
164 | */ | |
165 | static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen) | |
166 | { | |
167 | union sub_key subk; | |
168 | int k_next = 4; | |
169 | u8 dt; | |
170 | int i, j; | |
171 | u32 ret = 0; | |
172 | ||
173 | subk.k = 0; | |
174 | subk.ka = ntohl(*(u32 *)key); | |
175 | ||
176 | for (i = 0; i < dlen; i++) { | |
177 | subk.kb = key[k_next]; | |
178 | k_next = (k_next + 1) % klen; | |
179 | dt = data[i]; | |
180 | for (j = 0; j < 8; j++) { | |
181 | if (dt & 0x80) | |
182 | ret ^= subk.ka; | |
183 | dt <<= 1; | |
184 | subk.k <<= 1; | |
185 | } | |
186 | } | |
187 | ||
188 | return ret; | |
189 | } | |
190 | ||
191 | static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) | |
192 | { | |
193 | struct iphdr *iphdr; | |
194 | int data_len; | |
195 | bool ret = false; | |
196 | ||
197 | if (eth_hdr(skb)->h_proto != htons(ETH_P_IP)) | |
198 | return false; | |
199 | ||
200 | iphdr = ip_hdr(skb); | |
201 | ||
202 | if (iphdr->version == 4) { | |
203 | if (iphdr->protocol == IPPROTO_TCP) | |
204 | data_len = 12; | |
205 | else | |
206 | data_len = 8; | |
207 | *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, | |
208 | (u8 *)&iphdr->saddr, data_len); | |
209 | ret = true; | |
210 | } | |
211 | ||
212 | return ret; | |
213 | } | |
214 | ||
215 | static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, | |
216 | void *accel_priv, select_queue_fallback_t fallback) | |
217 | { | |
218 | struct net_device_context *net_device_ctx = netdev_priv(ndev); | |
219 | struct hv_device *hdev = net_device_ctx->device_ctx; | |
220 | struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev); | |
221 | u32 hash; | |
222 | u16 q_idx = 0; | |
223 | ||
224 | if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) | |
225 | return 0; | |
226 | ||
227 | if (netvsc_set_hash(&hash, skb)) | |
228 | q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % | |
229 | ndev->real_num_tx_queues; | |
230 | ||
231 | return q_idx; | |
232 | } | |
233 | ||
fceaf24a HJ |
234 | static void netvsc_xmit_completion(void *context) |
235 | { | |
4193d4f4 | 236 | struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; |
02fafbc6 | 237 | struct sk_buff *skb = (struct sk_buff *) |
72a2f5bd | 238 | (unsigned long)packet->completion.send.send_completion_tid; |
fceaf24a | 239 | |
fceaf24a HJ |
240 | kfree(packet); |
241 | ||
1d06825b | 242 | if (skb) |
b220f5f9 | 243 | dev_kfree_skb_any(skb); |
fceaf24a HJ |
244 | } |
245 | ||
4c060340 KS |
246 | static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, |
247 | struct hv_page_buffer *pb) | |
248 | { | |
249 | int j = 0; | |
250 | ||
251 | /* Deal with compund pages by ignoring unused part | |
252 | * of the page. | |
253 | */ | |
254 | page += (offset >> PAGE_SHIFT); | |
255 | offset &= ~PAGE_MASK; | |
256 | ||
257 | while (len > 0) { | |
258 | unsigned long bytes; | |
259 | ||
260 | bytes = PAGE_SIZE - offset; | |
261 | if (bytes > len) | |
262 | bytes = len; | |
263 | pb[j].pfn = page_to_pfn(page); | |
264 | pb[j].offset = offset; | |
265 | pb[j].len = bytes; | |
266 | ||
267 | offset += bytes; | |
268 | len -= bytes; | |
269 | ||
270 | if (offset == PAGE_SIZE && len) { | |
271 | page++; | |
272 | offset = 0; | |
273 | j++; | |
274 | } | |
275 | } | |
276 | ||
277 | return j + 1; | |
278 | } | |
279 | ||
d972eb71 KS |
280 | static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, |
281 | struct hv_page_buffer *pb) | |
4c060340 KS |
282 | { |
283 | u32 slots_used = 0; | |
284 | char *data = skb->data; | |
285 | int frags = skb_shinfo(skb)->nr_frags; | |
286 | int i; | |
287 | ||
288 | /* The packet is laid out thus: | |
289 | * 1. hdr | |
290 | * 2. skb linear data | |
291 | * 3. skb fragment data | |
292 | */ | |
293 | if (hdr != NULL) | |
294 | slots_used += fill_pg_buf(virt_to_page(hdr), | |
295 | offset_in_page(hdr), | |
296 | len, &pb[slots_used]); | |
297 | ||
298 | slots_used += fill_pg_buf(virt_to_page(data), | |
299 | offset_in_page(data), | |
300 | skb_headlen(skb), &pb[slots_used]); | |
301 | ||
302 | for (i = 0; i < frags; i++) { | |
303 | skb_frag_t *frag = skb_shinfo(skb)->frags + i; | |
304 | ||
305 | slots_used += fill_pg_buf(skb_frag_page(frag), | |
306 | frag->page_offset, | |
307 | skb_frag_size(frag), &pb[slots_used]); | |
308 | } | |
d972eb71 | 309 | return slots_used; |
4c060340 KS |
310 | } |
311 | ||
312 | static int count_skb_frag_slots(struct sk_buff *skb) | |
313 | { | |
314 | int i, frags = skb_shinfo(skb)->nr_frags; | |
315 | int pages = 0; | |
316 | ||
317 | for (i = 0; i < frags; i++) { | |
318 | skb_frag_t *frag = skb_shinfo(skb)->frags + i; | |
319 | unsigned long size = skb_frag_size(frag); | |
320 | unsigned long offset = frag->page_offset; | |
321 | ||
322 | /* Skip unused frames from start of page */ | |
323 | offset &= ~PAGE_MASK; | |
324 | pages += PFN_UP(offset + size); | |
325 | } | |
326 | return pages; | |
327 | } | |
328 | ||
329 | static int netvsc_get_slots(struct sk_buff *skb) | |
330 | { | |
331 | char *data = skb->data; | |
332 | unsigned int offset = offset_in_page(data); | |
333 | unsigned int len = skb_headlen(skb); | |
334 | int slots; | |
335 | int frag_slots; | |
336 | ||
337 | slots = DIV_ROUND_UP(offset + len, PAGE_SIZE); | |
338 | frag_slots = count_skb_frag_slots(skb); | |
339 | return slots + frag_slots; | |
340 | } | |
341 | ||
d2a0be7f KS |
342 | static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off) |
343 | { | |
344 | u32 ret_val = TRANSPORT_INFO_NOT_IP; | |
345 | ||
346 | if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) && | |
347 | (eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) { | |
348 | goto not_ip; | |
349 | } | |
350 | ||
351 | *trans_off = skb_transport_offset(skb); | |
352 | ||
353 | if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) { | |
354 | struct iphdr *iphdr = ip_hdr(skb); | |
355 | ||
356 | if (iphdr->protocol == IPPROTO_TCP) | |
357 | ret_val = TRANSPORT_INFO_IPV4_TCP; | |
358 | else if (iphdr->protocol == IPPROTO_UDP) | |
359 | ret_val = TRANSPORT_INFO_IPV4_UDP; | |
360 | } else { | |
361 | if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) | |
362 | ret_val = TRANSPORT_INFO_IPV6_TCP; | |
363 | else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) | |
364 | ret_val = TRANSPORT_INFO_IPV6_UDP; | |
365 | } | |
366 | ||
367 | not_ip: | |
368 | return ret_val; | |
369 | } | |
370 | ||
02fafbc6 | 371 | static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) |
fceaf24a | 372 | { |
fceaf24a | 373 | struct net_device_context *net_device_ctx = netdev_priv(net); |
4193d4f4 | 374 | struct hv_netvsc_packet *packet; |
02fafbc6 | 375 | int ret; |
60fa7e97 | 376 | u32 skb_length = skb->len; |
d972eb71 KS |
377 | unsigned int num_data_pgs; |
378 | struct rndis_message *rndis_msg; | |
379 | struct rndis_packet *rndis_pkt; | |
380 | u32 rndis_msg_size; | |
381 | bool isvlan; | |
382 | struct rndis_per_packet_info *ppi; | |
d2a0be7f | 383 | struct ndis_tcp_ip_checksum_info *csum_info; |
a4ec4f58 | 384 | struct ndis_tcp_lso_info *lso_info; |
d2a0be7f KS |
385 | int hdr_offset; |
386 | u32 net_trans_info; | |
387 | ||
fceaf24a | 388 | |
4c060340 KS |
389 | /* We will atmost need two pages to describe the rndis |
390 | * header. We can only transmit MAX_PAGE_BUFFER_COUNT number | |
391 | * of pages in a single packet. | |
392 | */ | |
d972eb71 KS |
393 | num_data_pgs = netvsc_get_slots(skb) + 2; |
394 | if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { | |
4c060340 KS |
395 | netdev_err(net, "Packet too big: %u\n", skb->len); |
396 | dev_kfree_skb(skb); | |
397 | net->stats.tx_dropped++; | |
398 | return NETDEV_TX_OK; | |
399 | } | |
fceaf24a | 400 | |
454f18a9 | 401 | /* Allocate a netvsc packet based on # of frags. */ |
02fafbc6 | 402 | packet = kzalloc(sizeof(struct hv_netvsc_packet) + |
d972eb71 | 403 | (num_data_pgs * sizeof(struct hv_page_buffer)) + |
f3f885fa | 404 | sizeof(struct rndis_message) + |
e0f6906e KS |
405 | NDIS_VLAN_PPI_SIZE + |
406 | NDIS_CSUM_PPI_SIZE + | |
407 | NDIS_LSO_PPI_SIZE, GFP_ATOMIC); | |
02fafbc6 | 408 | if (!packet) { |
bf769375 | 409 | /* out of memory, drop packet */ |
eb335bc4 | 410 | netdev_err(net, "unable to allocate hv_netvsc_packet\n"); |
b220f5f9 SH |
411 | |
412 | dev_kfree_skb(skb); | |
413 | net->stats.tx_dropped++; | |
bb6d5e76 | 414 | return NETDEV_TX_OK; |
fceaf24a HJ |
415 | } |
416 | ||
1f5f3a75 HZ |
417 | packet->vlan_tci = skb->vlan_tci; |
418 | ||
44559a96 HZ |
419 | packet->q_idx = skb_get_queue_mapping(skb); |
420 | ||
d972eb71 | 421 | packet->is_data_pkt = true; |
4d447c9a | 422 | packet->total_data_buflen = skb->len; |
fceaf24a | 423 | |
d972eb71 KS |
424 | packet->rndis_msg = (struct rndis_message *)((unsigned long)packet + |
425 | sizeof(struct hv_netvsc_packet) + | |
426 | (num_data_pgs * sizeof(struct hv_page_buffer))); | |
fceaf24a | 427 | |
454f18a9 | 428 | /* Set the completion routine */ |
72a2f5bd HZ |
429 | packet->completion.send.send_completion = netvsc_xmit_completion; |
430 | packet->completion.send.send_completion_ctx = packet; | |
431 | packet->completion.send.send_completion_tid = (unsigned long)skb; | |
fceaf24a | 432 | |
d972eb71 KS |
433 | isvlan = packet->vlan_tci & VLAN_TAG_PRESENT; |
434 | ||
435 | /* Add the rndis header */ | |
436 | rndis_msg = packet->rndis_msg; | |
437 | rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; | |
438 | rndis_msg->msg_len = packet->total_data_buflen; | |
439 | rndis_pkt = &rndis_msg->msg.pkt; | |
440 | rndis_pkt->data_offset = sizeof(struct rndis_packet); | |
441 | rndis_pkt->data_len = packet->total_data_buflen; | |
442 | rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); | |
443 | ||
444 | rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); | |
445 | ||
446 | if (isvlan) { | |
447 | struct ndis_pkt_8021q_info *vlan; | |
448 | ||
449 | rndis_msg_size += NDIS_VLAN_PPI_SIZE; | |
450 | ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, | |
451 | IEEE_8021Q_INFO); | |
452 | vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + | |
453 | ppi->ppi_offset); | |
454 | vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK; | |
455 | vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >> | |
456 | VLAN_PRIO_SHIFT; | |
457 | } | |
458 | ||
d2a0be7f KS |
459 | net_trans_info = get_net_transport_info(skb, &hdr_offset); |
460 | if (net_trans_info == TRANSPORT_INFO_NOT_IP) | |
461 | goto do_send; | |
462 | ||
463 | /* | |
464 | * Setup the sendside checksum offload only if this is not a | |
465 | * GSO packet. | |
466 | */ | |
467 | if (skb_is_gso(skb)) | |
a4ec4f58 | 468 | goto do_lso; |
d2a0be7f | 469 | |
54036077 KS |
470 | if ((skb->ip_summed == CHECKSUM_NONE) || |
471 | (skb->ip_summed == CHECKSUM_UNNECESSARY)) | |
472 | goto do_send; | |
473 | ||
d2a0be7f KS |
474 | rndis_msg_size += NDIS_CSUM_PPI_SIZE; |
475 | ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, | |
476 | TCPIP_CHKSUM_PKTINFO); | |
477 | ||
478 | csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + | |
479 | ppi->ppi_offset); | |
480 | ||
481 | if (net_trans_info & (INFO_IPV4 << 16)) | |
482 | csum_info->transmit.is_ipv4 = 1; | |
483 | else | |
484 | csum_info->transmit.is_ipv6 = 1; | |
485 | ||
486 | if (net_trans_info & INFO_TCP) { | |
487 | csum_info->transmit.tcp_checksum = 1; | |
488 | csum_info->transmit.tcp_header_offset = hdr_offset; | |
489 | } else if (net_trans_info & INFO_UDP) { | |
b822ee7a KS |
490 | /* UDP checksum offload is not supported on ws2008r2. |
491 | * Furthermore, on ws2012 and ws2012r2, there are some | |
492 | * issues with udp checksum offload from Linux guests. | |
493 | * (these are host issues). | |
494 | * For now compute the checksum here. | |
495 | */ | |
496 | struct udphdr *uh; | |
497 | u16 udp_len; | |
498 | ||
499 | ret = skb_cow_head(skb, 0); | |
500 | if (ret) | |
501 | goto drop; | |
502 | ||
503 | uh = udp_hdr(skb); | |
504 | udp_len = ntohs(uh->len); | |
505 | uh->check = 0; | |
506 | uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr, | |
507 | ip_hdr(skb)->daddr, | |
508 | udp_len, IPPROTO_UDP, | |
509 | csum_partial(uh, udp_len, 0)); | |
510 | if (uh->check == 0) | |
511 | uh->check = CSUM_MANGLED_0; | |
512 | ||
513 | csum_info->transmit.udp_checksum = 0; | |
d2a0be7f | 514 | } |
a4ec4f58 KS |
515 | goto do_send; |
516 | ||
517 | do_lso: | |
518 | rndis_msg_size += NDIS_LSO_PPI_SIZE; | |
519 | ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, | |
520 | TCP_LARGESEND_PKTINFO); | |
521 | ||
522 | lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + | |
523 | ppi->ppi_offset); | |
524 | ||
525 | lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; | |
526 | if (net_trans_info & (INFO_IPV4 << 16)) { | |
527 | lso_info->lso_v2_transmit.ip_version = | |
528 | NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; | |
529 | ip_hdr(skb)->tot_len = 0; | |
530 | ip_hdr(skb)->check = 0; | |
531 | tcp_hdr(skb)->check = | |
532 | ~csum_tcpudp_magic(ip_hdr(skb)->saddr, | |
533 | ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); | |
534 | } else { | |
535 | lso_info->lso_v2_transmit.ip_version = | |
536 | NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; | |
537 | ipv6_hdr(skb)->payload_len = 0; | |
538 | tcp_hdr(skb)->check = | |
539 | ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, | |
540 | &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); | |
541 | } | |
542 | lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; | |
543 | lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; | |
d2a0be7f KS |
544 | |
545 | do_send: | |
d972eb71 KS |
546 | /* Start filling in the page buffers with the rndis hdr */ |
547 | rndis_msg->msg_len += rndis_msg_size; | |
548 | packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, | |
549 | skb, &packet->page_buf[0]); | |
550 | ||
551 | ret = netvsc_send(net_device_ctx->device_ctx, packet); | |
552 | ||
b822ee7a | 553 | drop: |
02fafbc6 | 554 | if (ret == 0) { |
60fa7e97 | 555 | net->stats.tx_bytes += skb_length; |
b852fdce | 556 | net->stats.tx_packets++; |
b220f5f9 | 557 | } else { |
8a5f9edc | 558 | kfree(packet); |
33be96e4 HZ |
559 | if (ret != -EAGAIN) { |
560 | dev_kfree_skb_any(skb); | |
561 | net->stats.tx_dropped++; | |
562 | } | |
fceaf24a HJ |
563 | } |
564 | ||
33be96e4 | 565 | return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK; |
fceaf24a HJ |
566 | } |
567 | ||
3e189519 | 568 | /* |
02fafbc6 GKH |
569 | * netvsc_linkstatus_callback - Link up/down notification |
570 | */ | |
90ef117a | 571 | void netvsc_linkstatus_callback(struct hv_device *device_obj, |
02fafbc6 | 572 | unsigned int status) |
fceaf24a | 573 | { |
2ddd5e5f | 574 | struct net_device *net; |
c996edcf | 575 | struct net_device_context *ndev_ctx; |
2ddd5e5f | 576 | struct netvsc_device *net_device; |
891de74d | 577 | struct rndis_device *rdev; |
2ddd5e5f S |
578 | |
579 | net_device = hv_get_drvdata(device_obj); | |
891de74d HZ |
580 | rdev = net_device->extension; |
581 | ||
582 | rdev->link_state = status != 1; | |
583 | ||
2ddd5e5f | 584 | net = net_device->ndev; |
fceaf24a | 585 | |
891de74d | 586 | if (!net || net->reg_state != NETREG_REGISTERED) |
fceaf24a | 587 | return; |
fceaf24a | 588 | |
891de74d | 589 | ndev_ctx = netdev_priv(net); |
02fafbc6 | 590 | if (status == 1) { |
c4b6a2ea | 591 | schedule_delayed_work(&ndev_ctx->dwork, 0); |
122a5f64 | 592 | schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); |
02fafbc6 | 593 | } else { |
891de74d | 594 | schedule_delayed_work(&ndev_ctx->dwork, 0); |
fceaf24a | 595 | } |
fceaf24a HJ |
596 | } |
597 | ||
3e189519 HJ |
598 | /* |
599 | * netvsc_recv_callback - Callback when we receive a packet from the | |
600 | * "wire" on the specified device. | |
02fafbc6 | 601 | */ |
f79adf8f | 602 | int netvsc_recv_callback(struct hv_device *device_obj, |
59e0c70c KS |
603 | struct hv_netvsc_packet *packet, |
604 | struct ndis_tcp_ip_checksum_info *csum_info) | |
fceaf24a | 605 | { |
6f4c4446 | 606 | struct net_device *net; |
fceaf24a | 607 | struct sk_buff *skb; |
fceaf24a | 608 | |
6f4c4446 | 609 | net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; |
a68f9614 | 610 | if (!net || net->reg_state != NETREG_REGISTERED) { |
63f6921d | 611 | packet->status = NVSP_STAT_FAIL; |
fceaf24a HJ |
612 | return 0; |
613 | } | |
614 | ||
9495c282 | 615 | /* Allocate a skb - TODO direct I/O to pages? */ |
72a2f5bd | 616 | skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); |
9495c282 SH |
617 | if (unlikely(!skb)) { |
618 | ++net->stats.rx_dropped; | |
63f6921d | 619 | packet->status = NVSP_STAT_FAIL; |
9495c282 SH |
620 | return 0; |
621 | } | |
fceaf24a | 622 | |
02fafbc6 GKH |
623 | /* |
624 | * Copy to skb. This copy is needed here since the memory pointed by | |
625 | * hv_netvsc_packet cannot be deallocated | |
626 | */ | |
45326342 HZ |
627 | memcpy(skb_put(skb, packet->total_data_buflen), packet->data, |
628 | packet->total_data_buflen); | |
fceaf24a HJ |
629 | |
630 | skb->protocol = eth_type_trans(skb, net); | |
59e0c70c KS |
631 | if (csum_info) { |
632 | /* We only look at the IP checksum here. | |
633 | * Should we be dropping the packet if checksum | |
634 | * failed? How do we deal with other checksums - TCP/UDP? | |
635 | */ | |
636 | if (csum_info->receive.ip_checksum_succeeded) | |
637 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
638 | else | |
639 | skb->ip_summed = CHECKSUM_NONE; | |
640 | } | |
641 | ||
93725cbd HZ |
642 | if (packet->vlan_tci & VLAN_TAG_PRESENT) |
643 | __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), | |
644 | packet->vlan_tci); | |
fceaf24a | 645 | |
d6bf5567 | 646 | skb_record_rx_queue(skb, packet->channel-> |
44559a96 HZ |
647 | offermsg.offer.sub_channel_index % |
648 | net->real_num_rx_queues); | |
649 | ||
9495c282 | 650 | net->stats.rx_packets++; |
48c38839 | 651 | net->stats.rx_bytes += packet->total_data_buflen; |
9495c282 | 652 | |
02fafbc6 GKH |
653 | /* |
654 | * Pass the skb back up. Network stack will deallocate the skb when it | |
9495c282 SH |
655 | * is done. |
656 | * TODO - use NAPI? | |
02fafbc6 | 657 | */ |
9495c282 | 658 | netif_rx(skb); |
fceaf24a | 659 | |
fceaf24a HJ |
660 | return 0; |
661 | } | |
662 | ||
f82f4ad7 SH |
663 | static void netvsc_get_drvinfo(struct net_device *net, |
664 | struct ethtool_drvinfo *info) | |
665 | { | |
7826d43f | 666 | strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); |
7826d43f | 667 | strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); |
f82f4ad7 SH |
668 | } |
669 | ||
4d447c9a HZ |
670 | static int netvsc_change_mtu(struct net_device *ndev, int mtu) |
671 | { | |
672 | struct net_device_context *ndevctx = netdev_priv(ndev); | |
673 | struct hv_device *hdev = ndevctx->device_ctx; | |
674 | struct netvsc_device *nvdev = hv_get_drvdata(hdev); | |
675 | struct netvsc_device_info device_info; | |
676 | int limit = ETH_DATA_LEN; | |
677 | ||
678 | if (nvdev == NULL || nvdev->destroy) | |
679 | return -ENODEV; | |
680 | ||
3c2a271d | 681 | if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) |
4d447c9a HZ |
682 | limit = NETVSC_MTU; |
683 | ||
684 | if (mtu < 68 || mtu > limit) | |
685 | return -EINVAL; | |
686 | ||
687 | nvdev->start_remove = true; | |
792df872 | 688 | cancel_work_sync(&ndevctx->work); |
0a282538 | 689 | netif_tx_disable(ndev); |
4d447c9a HZ |
690 | rndis_filter_device_remove(hdev); |
691 | ||
692 | ndev->mtu = mtu; | |
693 | ||
694 | ndevctx->device_ctx = hdev; | |
695 | hv_set_drvdata(hdev, ndev); | |
696 | device_info.ring_size = ring_size; | |
697 | rndis_filter_device_add(hdev, &device_info); | |
44559a96 | 698 | netif_tx_wake_all_queues(ndev); |
4d447c9a HZ |
699 | |
700 | return 0; | |
701 | } | |
702 | ||
1ce09e89 HZ |
703 | |
704 | static int netvsc_set_mac_addr(struct net_device *ndev, void *p) | |
705 | { | |
706 | struct net_device_context *ndevctx = netdev_priv(ndev); | |
707 | struct hv_device *hdev = ndevctx->device_ctx; | |
708 | struct sockaddr *addr = p; | |
9a4c831e | 709 | char save_adr[ETH_ALEN]; |
1ce09e89 HZ |
710 | unsigned char save_aatype; |
711 | int err; | |
712 | ||
713 | memcpy(save_adr, ndev->dev_addr, ETH_ALEN); | |
714 | save_aatype = ndev->addr_assign_type; | |
715 | ||
716 | err = eth_mac_addr(ndev, p); | |
717 | if (err != 0) | |
718 | return err; | |
719 | ||
720 | err = rndis_filter_set_device_mac(hdev, addr->sa_data); | |
721 | if (err != 0) { | |
722 | /* roll back to saved MAC */ | |
723 | memcpy(ndev->dev_addr, save_adr, ETH_ALEN); | |
724 | ndev->addr_assign_type = save_aatype; | |
725 | } | |
726 | ||
727 | return err; | |
728 | } | |
729 | ||
730 | ||
f82f4ad7 SH |
731 | static const struct ethtool_ops ethtool_ops = { |
732 | .get_drvinfo = netvsc_get_drvinfo, | |
f82f4ad7 SH |
733 | .get_link = ethtool_op_get_link, |
734 | }; | |
735 | ||
df2fff28 GKH |
736 | static const struct net_device_ops device_ops = { |
737 | .ndo_open = netvsc_open, | |
738 | .ndo_stop = netvsc_close, | |
739 | .ndo_start_xmit = netvsc_start_xmit, | |
afc4b13d | 740 | .ndo_set_rx_mode = netvsc_set_multicast_list, |
4d447c9a | 741 | .ndo_change_mtu = netvsc_change_mtu, |
b681b588 | 742 | .ndo_validate_addr = eth_validate_addr, |
1ce09e89 | 743 | .ndo_set_mac_address = netvsc_set_mac_addr, |
44559a96 | 744 | .ndo_select_queue = netvsc_select_queue, |
df2fff28 GKH |
745 | }; |
746 | ||
c996edcf HZ |
747 | /* |
748 | * Send GARP packet to network peers after migrations. | |
749 | * After Quick Migration, the network is not immediately operational in the | |
750 | * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add | |
122a5f64 | 751 | * another netif_notify_peers() into a delayed work, otherwise GARP packet |
c996edcf | 752 | * will not be sent after quick migration, and cause network disconnection. |
891de74d | 753 | * Also, we update the carrier status here. |
c996edcf | 754 | */ |
891de74d | 755 | static void netvsc_link_change(struct work_struct *w) |
c996edcf HZ |
756 | { |
757 | struct net_device_context *ndev_ctx; | |
758 | struct net_device *net; | |
2ddd5e5f | 759 | struct netvsc_device *net_device; |
891de74d HZ |
760 | struct rndis_device *rdev; |
761 | bool notify; | |
762 | ||
763 | rtnl_lock(); | |
c996edcf | 764 | |
122a5f64 | 765 | ndev_ctx = container_of(w, struct net_device_context, dwork.work); |
2ddd5e5f | 766 | net_device = hv_get_drvdata(ndev_ctx->device_ctx); |
891de74d | 767 | rdev = net_device->extension; |
2ddd5e5f | 768 | net = net_device->ndev; |
891de74d HZ |
769 | |
770 | if (rdev->link_state) { | |
771 | netif_carrier_off(net); | |
772 | notify = false; | |
773 | } else { | |
774 | netif_carrier_on(net); | |
775 | notify = true; | |
776 | } | |
777 | ||
778 | rtnl_unlock(); | |
779 | ||
780 | if (notify) | |
781 | netdev_notify_peers(net); | |
c996edcf HZ |
782 | } |
783 | ||
784 | ||
84946899 S |
785 | static int netvsc_probe(struct hv_device *dev, |
786 | const struct hv_vmbus_device_id *dev_id) | |
df2fff28 | 787 | { |
df2fff28 GKH |
788 | struct net_device *net = NULL; |
789 | struct net_device_context *net_device_ctx; | |
790 | struct netvsc_device_info device_info; | |
44559a96 | 791 | struct netvsc_device *nvdev; |
df2fff28 GKH |
792 | int ret; |
793 | ||
44559a96 HZ |
794 | net = alloc_etherdev_mq(sizeof(struct net_device_context), |
795 | num_online_cpus()); | |
df2fff28 | 796 | if (!net) |
51a805d0 | 797 | return -ENOMEM; |
df2fff28 | 798 | |
1b07da51 HZ |
799 | netif_carrier_off(net); |
800 | ||
df2fff28 | 801 | net_device_ctx = netdev_priv(net); |
9efd21e1 | 802 | net_device_ctx->device_ctx = dev; |
2ddd5e5f | 803 | hv_set_drvdata(dev, net); |
891de74d | 804 | INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); |
792df872 | 805 | INIT_WORK(&net_device_ctx->work, do_set_multicast); |
df2fff28 | 806 | |
df2fff28 GKH |
807 | net->netdev_ops = &device_ops; |
808 | ||
a4ec4f58 KS |
809 | net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | |
810 | NETIF_F_TSO; | |
d2a0be7f | 811 | net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | |
a4ec4f58 | 812 | NETIF_F_IP_CSUM | NETIF_F_TSO; |
6048718d | 813 | |
f82f4ad7 | 814 | SET_ETHTOOL_OPS(net, ðtool_ops); |
9efd21e1 | 815 | SET_NETDEV_DEV(net, &dev->device); |
df2fff28 | 816 | |
692e084e HZ |
817 | /* Notify the netvsc driver of the new device */ |
818 | device_info.ring_size = ring_size; | |
819 | ret = rndis_filter_device_add(dev, &device_info); | |
820 | if (ret != 0) { | |
821 | netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); | |
df2fff28 | 822 | free_netdev(net); |
2ddd5e5f | 823 | hv_set_drvdata(dev, NULL); |
692e084e | 824 | return ret; |
df2fff28 | 825 | } |
692e084e HZ |
826 | memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); |
827 | ||
44559a96 HZ |
828 | nvdev = hv_get_drvdata(dev); |
829 | netif_set_real_num_tx_queues(net, nvdev->num_chn); | |
830 | netif_set_real_num_rx_queues(net, nvdev->num_chn); | |
831 | dev_info(&dev->device, "real num tx,rx queues:%u, %u\n", | |
832 | net->real_num_tx_queues, net->real_num_rx_queues); | |
833 | ||
a68f9614 HZ |
834 | ret = register_netdev(net); |
835 | if (ret != 0) { | |
836 | pr_err("Unable to register netdev.\n"); | |
837 | rndis_filter_device_remove(dev); | |
838 | free_netdev(net); | |
1b07da51 HZ |
839 | } else { |
840 | schedule_delayed_work(&net_device_ctx->dwork, 0); | |
a68f9614 HZ |
841 | } |
842 | ||
df2fff28 GKH |
843 | return ret; |
844 | } | |
845 | ||
415b023a | 846 | static int netvsc_remove(struct hv_device *dev) |
df2fff28 | 847 | { |
2ddd5e5f | 848 | struct net_device *net; |
122a5f64 | 849 | struct net_device_context *ndev_ctx; |
2ddd5e5f S |
850 | struct netvsc_device *net_device; |
851 | ||
852 | net_device = hv_get_drvdata(dev); | |
853 | net = net_device->ndev; | |
df2fff28 | 854 | |
df2fff28 | 855 | if (net == NULL) { |
415b023a | 856 | dev_err(&dev->device, "No net device to remove\n"); |
df2fff28 GKH |
857 | return 0; |
858 | } | |
859 | ||
4d447c9a HZ |
860 | net_device->start_remove = true; |
861 | ||
122a5f64 HZ |
862 | ndev_ctx = netdev_priv(net); |
863 | cancel_delayed_work_sync(&ndev_ctx->dwork); | |
792df872 | 864 | cancel_work_sync(&ndev_ctx->work); |
122a5f64 | 865 | |
df2fff28 | 866 | /* Stop outbound asap */ |
0a282538 | 867 | netif_tx_disable(net); |
df2fff28 GKH |
868 | |
869 | unregister_netdev(net); | |
870 | ||
871 | /* | |
872 | * Call to the vsc driver to let it know that the device is being | |
873 | * removed | |
874 | */ | |
df06bcff | 875 | rndis_filter_device_remove(dev); |
df2fff28 GKH |
876 | |
877 | free_netdev(net); | |
df06bcff | 878 | return 0; |
df2fff28 GKH |
879 | } |
880 | ||
345c4cc3 | 881 | static const struct hv_vmbus_device_id id_table[] = { |
c45cf2d4 | 882 | /* Network guid */ |
8f505944 | 883 | { HV_NIC_GUID, }, |
c45cf2d4 | 884 | { }, |
345c4cc3 S |
885 | }; |
886 | ||
887 | MODULE_DEVICE_TABLE(vmbus, id_table); | |
888 | ||
f1542a66 | 889 | /* The one and only one */ |
fde0ef9b | 890 | static struct hv_driver netvsc_drv = { |
d31b20fc | 891 | .name = KBUILD_MODNAME, |
345c4cc3 | 892 | .id_table = id_table, |
fde0ef9b S |
893 | .probe = netvsc_probe, |
894 | .remove = netvsc_remove, | |
d4890970 | 895 | }; |
f1542a66 | 896 | |
a9869c94 | 897 | static void __exit netvsc_drv_exit(void) |
fceaf24a | 898 | { |
768fa219 | 899 | vmbus_driver_unregister(&netvsc_drv); |
fceaf24a HJ |
900 | } |
901 | ||
1fde28cf | 902 | static int __init netvsc_drv_init(void) |
df2fff28 | 903 | { |
fa85a6c2 HZ |
904 | if (ring_size < RING_SIZE_MIN) { |
905 | ring_size = RING_SIZE_MIN; | |
906 | pr_info("Increased ring_size to %d (min allowed)\n", | |
907 | ring_size); | |
908 | } | |
768fa219 | 909 | return vmbus_driver_register(&netvsc_drv); |
df2fff28 GKH |
910 | } |
911 | ||
26c14cc1 | 912 | MODULE_LICENSE("GPL"); |
7880fc54 | 913 | MODULE_DESCRIPTION("Microsoft Hyper-V network driver"); |
fceaf24a | 914 | |
1fde28cf | 915 | module_init(netvsc_drv_init); |
a9869c94 | 916 | module_exit(netvsc_drv_exit); |